Upload Python Script

The following code uploads files and directories via S3. This version gets files and directories created on a Sunday

import os
import datetime
import re
import subprocess
import argparse

def get_newest_in_sunday(path, s3_target_path):
    """
    This function scans a given directory for files and directories that were created on the last Sunday.
    It then uploads the newest file and directory to a specified S3 target path using the s3cmd tool.

    Args:
        path (str): The path of the directory to scan.
        s3_target_path (str): The S3 path to upload the file and directory to.

    Returns:
        Tuple[str, str]: The names of the newest file and directory, if they exist.
    """
    sunday_files = []
    sunday_dirs = []

    today = datetime.datetime.now()

    # Calculate the last Sunday
    days_since_sunday = (today.weekday() - 6) % 7
    last_sunday = today - datetime.timedelta(days=days_since_sunday)

    pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z(\.pbm\.json)?$")

    for name in os.listdir(path):
        # Skip names that do not match the pattern
        if not pattern.match(name):
            continue

        full_path = os.path.join(path, name)
        timestamp_str = name.split('T')[0]  # Extract the date part from the name
        timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d")

        # Check if it's the last Sunday
        if timestamp.date() == last_sunday.date():
            if os.path.isfile(full_path):
                sunday_files.append((timestamp, name))
            elif os.path.isdir(full_path):
                sunday_dirs.append((timestamp, name))

    # Sort by timestamp (newest first) and get the first name
    sunday_files.sort(reverse=True)
    sunday_dirs.sort(reverse=True)

    newest_file = sunday_files[0][1] if sunday_files else None
    newest_dir = sunday_dirs[0][1] if sunday_dirs else None

    # If both the file and directory exist, upload them
    if newest_file and newest_dir:
        s3_config_path = "./s3config.txt"
        subprocess.call(["s3cmd", "put", "--config", s3_config_path, os.path.join(path, newest_file), s3_target_path])
        subprocess.call(["s3cmd", "put", "--recursive", "--config", s3_config_path, os.path.join(path, newest_dir), s3_target_path])
        print(f"Uploaded file: {newest_file}")
        print(f"Uploaded directory: {newest_dir}")
    else:
        print("Nothing was uploaded.")

    return newest_file, newest_dir

def main():
    parser = argparse.ArgumentParser(description='Process some paths.')
    parser.add_argument('path', type=str, help='The path of the directory to scan')
    parser.add_argument('s3_target_path', type=str, help='The S3 path to upload the file and directory to')

    args = parser.parse_args()

    get_newest_in_sunday(args.path, args.s3_target_path)


if __name__ == "__main__":
    main()

How to call from Command line

We can then call the function from the command line like this:

python3 scan_upload.py /mnt/backups/server/rs /weekly-backup-folder/

First of the month version

The following version returns values for the first of the month.

import os
import datetime
import re
import subprocess
import argparse

def get_newest_from_first_of_month(path, s3_target_path):
    """
    This function scans a given directory for files and directories that were created on the first day of the current month.
    It then uploads the newest file and directory to a specified S3 target path using the s3cmd tool.

    Args:
        path (str): The path of the directory to scan.
        s3_target_path (str): The S3 path to upload the file and directory to.

    Returns:
        Tuple[str, str]: The names of the newest file and directory, if they exist.
    """
    monthly_files = []
    monthly_dirs = []

    today = datetime.datetime.now()

    # Calculate the first day of the current month
    first_of_month = today.replace(day=1)

    pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z(\.pbm\.json)?$")

    for name in os.listdir(path):
        # Skip names that do not match the pattern
        if not pattern.match(name):
            continue

        full_path = os.path.join(path, name)
        timestamp_str = name.split('T')[0]  # Extract the date part from the name
        timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d")

        # Check if it's the first day of the current month
        if timestamp.date() == first_of_month.date():
            if os.path.isfile(full_path):
                monthly_files.append((timestamp, name))
            elif os.path.isdir(full_path):
                monthly_dirs.append((timestamp, name))

    # Sort by timestamp (newest first) and get the first name
    monthly_files.sort(reverse=True)
    monthly_dirs.sort(reverse=True)

    newest_file = monthly_files[0][1] if monthly_files else None
    newest_dir = monthly_dirs[0][1] if monthly_dirs else None

    # If both the file and directory exist, upload them
    if newest_file and newest_dir:
        s3_config_path = "./s3config.txt"
        subprocess.call(["s3cmd", "put", "--config", s3_config_path, os.path.join(path, newest_file), s3_target_path])
        subprocess.call(["s3cmd", "put", "--recursive", "--config", s3_config_path, os.path.join(path, newest_dir), s3_target_path])
        print(f"Uploaded file: {newest_file}")
        print(f"Uploaded directory: {newest_dir}")
    else:
        print("Nothing was uploaded.")

    return newest_file, newest_dir

def main():
    parser = argparse.ArgumentParser(description='Process some paths.')
    parser.add_argument('path', type=str, help='The path of the directory to scan')
    parser.add_argument('s3_target_path', type=str, help='The S3 path to upload the file and directory to')

    args = parser.parse_args()

    get_newest_from_first_of_month(args.path, args.s3_target_path)

if __name__ == "__main__":
    main()

By Rudy