Upload Python Script
The following code uploads files and directories via S3. This version gets files and directories created on a Sunday
import os
import datetime
import re
import subprocess
import argparse
def get_newest_in_sunday(path, s3_target_path):
"""
This function scans a given directory for files and directories that were created on the last Sunday.
It then uploads the newest file and directory to a specified S3 target path using the s3cmd tool.
Args:
path (str): The path of the directory to scan.
s3_target_path (str): The S3 path to upload the file and directory to.
Returns:
Tuple[str, str]: The names of the newest file and directory, if they exist.
"""
sunday_files = []
sunday_dirs = []
today = datetime.datetime.now()
# Calculate the last Sunday
days_since_sunday = (today.weekday() - 6) % 7
last_sunday = today - datetime.timedelta(days=days_since_sunday)
pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z(\.pbm\.json)?$")
for name in os.listdir(path):
# Skip names that do not match the pattern
if not pattern.match(name):
continue
full_path = os.path.join(path, name)
timestamp_str = name.split('T')[0] # Extract the date part from the name
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d")
# Check if it's the last Sunday
if timestamp.date() == last_sunday.date():
if os.path.isfile(full_path):
sunday_files.append((timestamp, name))
elif os.path.isdir(full_path):
sunday_dirs.append((timestamp, name))
# Sort by timestamp (newest first) and get the first name
sunday_files.sort(reverse=True)
sunday_dirs.sort(reverse=True)
newest_file = sunday_files[0][1] if sunday_files else None
newest_dir = sunday_dirs[0][1] if sunday_dirs else None
# If both the file and directory exist, upload them
if newest_file and newest_dir:
s3_config_path = "./s3config.txt"
subprocess.call(["s3cmd", "put", "--config", s3_config_path, os.path.join(path, newest_file), s3_target_path])
subprocess.call(["s3cmd", "put", "--recursive", "--config", s3_config_path, os.path.join(path, newest_dir), s3_target_path])
print(f"Uploaded file: {newest_file}")
print(f"Uploaded directory: {newest_dir}")
else:
print("Nothing was uploaded.")
return newest_file, newest_dir
def main():
parser = argparse.ArgumentParser(description='Process some paths.')
parser.add_argument('path', type=str, help='The path of the directory to scan')
parser.add_argument('s3_target_path', type=str, help='The S3 path to upload the file and directory to')
args = parser.parse_args()
get_newest_in_sunday(args.path, args.s3_target_path)
if __name__ == "__main__":
main()
How to call from Command line
We can then call the function from the command line like this:
python3 scan_upload.py /mnt/backups/server/rs /weekly-backup-folder/
First of the month version
The following version returns values for the first of the month.
import os
import datetime
import re
import subprocess
import argparse
def get_newest_from_first_of_month(path, s3_target_path):
"""
This function scans a given directory for files and directories that were created on the first day of the current month.
It then uploads the newest file and directory to a specified S3 target path using the s3cmd tool.
Args:
path (str): The path of the directory to scan.
s3_target_path (str): The S3 path to upload the file and directory to.
Returns:
Tuple[str, str]: The names of the newest file and directory, if they exist.
"""
monthly_files = []
monthly_dirs = []
today = datetime.datetime.now()
# Calculate the first day of the current month
first_of_month = today.replace(day=1)
pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z(\.pbm\.json)?$")
for name in os.listdir(path):
# Skip names that do not match the pattern
if not pattern.match(name):
continue
full_path = os.path.join(path, name)
timestamp_str = name.split('T')[0] # Extract the date part from the name
timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%d")
# Check if it's the first day of the current month
if timestamp.date() == first_of_month.date():
if os.path.isfile(full_path):
monthly_files.append((timestamp, name))
elif os.path.isdir(full_path):
monthly_dirs.append((timestamp, name))
# Sort by timestamp (newest first) and get the first name
monthly_files.sort(reverse=True)
monthly_dirs.sort(reverse=True)
newest_file = monthly_files[0][1] if monthly_files else None
newest_dir = monthly_dirs[0][1] if monthly_dirs else None
# If both the file and directory exist, upload them
if newest_file and newest_dir:
s3_config_path = "./s3config.txt"
subprocess.call(["s3cmd", "put", "--config", s3_config_path, os.path.join(path, newest_file), s3_target_path])
subprocess.call(["s3cmd", "put", "--recursive", "--config", s3_config_path, os.path.join(path, newest_dir), s3_target_path])
print(f"Uploaded file: {newest_file}")
print(f"Uploaded directory: {newest_dir}")
else:
print("Nothing was uploaded.")
return newest_file, newest_dir
def main():
parser = argparse.ArgumentParser(description='Process some paths.')
parser.add_argument('path', type=str, help='The path of the directory to scan')
parser.add_argument('s3_target_path', type=str, help='The S3 path to upload the file and directory to')
args = parser.parse_args()
get_newest_from_first_of_month(args.path, args.s3_target_path)
if __name__ == "__main__":
main()