From d02a2fe42cffdcf4188abb427209da5de65faae0 Mon Sep 17 00:00:00 2001 From: "ashutosh.nehete" Date: Sat, 6 Dec 2025 17:36:05 +0530 Subject: [PATCH] Added a script to store mongodb database in S3 --- mongoDB script/backup-mongodb-to-s3.py | 216 +++++++++++++++++++++++++ mongoDB script/backup_execution.log | 12 ++ mongoDB script/config.json | 23 ++- mongoDB script/delete-old-logs.py | 18 ++- 4 files changed, 262 insertions(+), 7 deletions(-) create mode 100644 mongoDB script/backup-mongodb-to-s3.py create mode 100644 mongoDB script/backup_execution.log diff --git a/mongoDB script/backup-mongodb-to-s3.py b/mongoDB script/backup-mongodb-to-s3.py new file mode 100644 index 0000000..c5de377 --- /dev/null +++ b/mongoDB script/backup-mongodb-to-s3.py @@ -0,0 +1,216 @@ +import os +import json +import sys +import subprocess +import shutil +import boto3 +import logging +from datetime import datetime +from botocore.exceptions import ClientError, NoCredentialsError +from typing import Optional, Dict, Any + +class BackupManager: + """ + Manages the lifecycle of a MongoDB backup: Dump -> Compress -> Upload -> Cleanup. + """ + + def __init__(self, config_path: str): + self.config_path = config_path + self.timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + self.logger = self._setup_logging() + self.config = self._load_and_validate_config() + + # Setup path variables + self.db_name = self.config["MONGODB_BACKUP"].get("DATABASE_NAME") + self.backup_dir = os.path.join(os.getcwd(), f"temp_backup_{self.timestamp}") + self.archive_name_base = f"backup_{self.db_name if self.db_name else 'all'}_{self.timestamp}" + self.archive_path_zip = f"{self.archive_name_base}.zip" + + def _setup_logging(self) -> logging.Logger: + """Sets up console and file logging.""" + logger = logging.getLogger("BackupAgent") + logger.setLevel(logging.INFO) + + # Console Handler + c_handler = logging.StreamHandler() + c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + c_handler.setFormatter(c_format) + logger.addHandler(c_handler) + + # File Handler (Optional: Appends to a log file) + f_handler = logging.FileHandler("backup_execution.log") + f_handler.setFormatter(c_format) + logger.addHandler(f_handler) + + return logger + + def _load_and_validate_config(self) -> Dict[str, Any]: + """Loads JSON config and validates required keys.""" + if not os.path.exists(self.config_path): + self.logger.critical(f"Configuration file '{self.config_path}' not found.") + sys.exit(1) + + try: + with open(self.config_path, "r", encoding="utf-8") as f: + config = json.load(f) + except json.JSONDecodeError as e: + self.logger.critical(f"Invalid JSON format in config file: {e}") + sys.exit(1) + + # Basic Validation + required_sections = ["MONGODB_BACKUP", "AWS_S3_CONFIGURATION"] + for section in required_sections: + if section not in config: + self.logger.critical(f"Missing required config section: {section}") + sys.exit(1) + + # Security Warning + if config["AWS_S3_CONFIGURATION"].get("ACCESS_KEY"): + self.logger.warning("Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables.") + + return config + + def check_prerequisites(self) -> bool: + """Checks if mongodump is installed and accessible.""" + if shutil.which("mongodump") is None: + self.logger.error("'mongodump' executable not found in system PATH.") + return False + return True + + def dump_mongodb(self) -> bool: + """Runs mongodump to backup data locally.""" + mongo_config = self.config["MONGODB_BACKUP"] + mongo_uri = mongo_config.get("MONGO_CONNECTION_STRING") + + if not mongo_uri: + self.logger.error("MongoDB Connection String is missing.") + return False + + self.logger.info(f"Starting MongoDB backup for: {self.db_name if self.db_name else 'All Databases'}") + + # Construct command securely + cmd = ["mongodump", "--uri", mongo_uri, "--out", self.backup_dir] + if self.db_name: + cmd.extend(["--db", self.db_name]) + + try: + # Capture output for logging if needed + process = subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + self.logger.info("MongoDB dump completed successfully.") + self.logger.debug(process.stderr) # mongodump writes logs to stderr usually + return True + except subprocess.CalledProcessError as e: + self.logger.error(f"MongoDB dump failed with return code {e.returncode}") + self.logger.error(f"Error details: {e.stderr}") + return False + except Exception as e: + self.logger.error(f"Unexpected error during dump: {e}") + return False + + def compress_backup(self) -> Optional[str]: + """Compresses the backup directory into a zip file.""" + self.logger.info(f"Compressing folder: {self.backup_dir}") + try: + # shutil.make_archive expects the base name without extension + shutil.make_archive(self.archive_name_base, 'zip', self.backup_dir) + + if os.path.exists(self.archive_path_zip): + size_mb = os.path.getsize(self.archive_path_zip) / (1024 * 1024) + self.logger.info(f"Compression successful. File: {self.archive_path_zip} ({size_mb:.2f} MB)") + return self.archive_path_zip + else: + self.logger.error("Compression finished but file was not found.") + return None + except Exception as e: + self.logger.error(f"Compression failed: {e}") + return None + + def upload_to_s3(self, file_path: str) -> bool: + """Uploads the zip file to AWS S3 using boto3.""" + s3_config = self.config["AWS_S3_CONFIGURATION"] + bucket_name = s3_config.get("S3_BUCKET_NAME") + region = s3_config.get("S3_REGION", "us-east-1") + folder = s3_config.get("S3_FOLDER", "backups") + + self.logger.info(f"Initializing S3 upload to bucket: {bucket_name}") + + try: + s3_client = boto3.client( + 's3', + aws_access_key_id=s3_config.get("ACCESS_KEY"), + aws_secret_access_key=s3_config.get("SECRET_KEY"), + region_name=region + ) + + file_name = os.path.basename(file_path) + s3_key = f"{folder}/{file_name}" + + s3_client.upload_file(file_path, bucket_name, s3_key) + self.logger.info(f"Upload successful. S3 URI: s3://{bucket_name}/{s3_key}") + return True + + except ClientError as e: + self.logger.error(f"AWS ClientError: {e}") + return False + except NoCredentialsError: + self.logger.error("AWS Credentials not found or invalid.") + return False + except Exception as e: + self.logger.error(f"Unexpected error during upload: {e}") + return False + + def cleanup(self): + """Removes local artifacts to save space.""" + self.logger.info("Starting cleanup of local temporary files...") + try: + # Remove Zip + if os.path.exists(self.archive_path_zip): + os.remove(self.archive_path_zip) + self.logger.info(f"Deleted file: {self.archive_path_zip}") + + # Remove Temp Directory + if os.path.exists(self.backup_dir): + shutil.rmtree(self.backup_dir) + self.logger.info(f"Deleted directory: {self.backup_dir}") + + except Exception as e: + self.logger.warning(f"Cleanup encountered issues: {e}") + + def run(self): + """Main execution flow.""" + self.logger.info("=== Backup Process Started ===") + + if not self.check_prerequisites(): + sys.exit(1) + + if self.dump_mongodb(): + zip_file = self.compress_backup() + + upload_success = False + if zip_file: + upload_success = self.upload_to_s3(zip_file) + + self.cleanup() + + if upload_success: + self.logger.info("=== Backup Process Completed Successfully ===") + else: + self.logger.error("=== Backup Process Failed during Upload stage ===") + sys.exit(1) + else: + self.logger.error("=== Backup Process Failed during Dump stage ===") + self.cleanup() # Attempt to clean up partial dumps + sys.exit(1) + +if __name__ == "__main__": + # Point this to your config file + CONFIG_FILE = "config.json" + + agent = BackupManager(CONFIG_FILE) + agent.run() \ No newline at end of file diff --git a/mongoDB script/backup_execution.log b/mongoDB script/backup_execution.log new file mode 100644 index 0000000..3549a1b --- /dev/null +++ b/mongoDB script/backup_execution.log @@ -0,0 +1,12 @@ +2025-12-06 17:35:03,431 - WARNING - Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables. +2025-12-06 17:35:03,432 - INFO - === Backup Process Started === +2025-12-06 17:35:03,435 - INFO - Starting MongoDB backup for: MarcoBMSLocalDev +2025-12-06 17:35:05,290 - INFO - MongoDB dump completed successfully. +2025-12-06 17:35:05,291 - INFO - Compressing folder: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03 +2025-12-06 17:35:05,379 - INFO - Compression successful. File: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip (0.48 MB) +2025-12-06 17:35:05,379 - INFO - Initializing S3 upload to bucket: testenv-marco-pms-documents +2025-12-06 17:35:08,588 - INFO - Upload successful. S3 URI: s3://testenv-marco-pms-documents/mongodb_backups/backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip +2025-12-06 17:35:08,594 - INFO - Starting cleanup of local temporary files... +2025-12-06 17:35:08,595 - INFO - Deleted file: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip +2025-12-06 17:35:08,616 - INFO - Deleted directory: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03 +2025-12-06 17:35:08,616 - INFO - === Backup Process Completed Successfully === diff --git a/mongoDB script/config.json b/mongoDB script/config.json index 1a7b129..51fa57d 100644 --- a/mongoDB script/config.json +++ b/mongoDB script/config.json @@ -1,8 +1,23 @@ { - "MONGODB":{ - "MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017", - "DATABASE_NAME": "DotNetLogsDev", + "MONGODB_LOGS": { + "MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true", + "DATABASE_NAME": "DotNetLogsProd", "COLLECTION_NAME": "api-logs" }, - "DAYS_THRESHOLD": 30 + "MONGODB_BACKUP": { + "MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true", + "DATABASE_NAME": "MarcoBMSLocalDev" + }, + "DAYS_THRESHOLD": 30, + "AWS_S3_CONFIGURATION": { + "ACCESS_KEY": "xxxxx", + "SECRET_KEY": "xxxxx", + "S3_BUCKET_NAME": "xxxxx", + "S3_REGION": "us-east-1", + "S3_FOLDER":"mongodb_backups" + }, + "LOGGING": { + "LOG_FILE": "backup_process.log", + "LOG_LEVEL": "INFO" + } } \ No newline at end of file diff --git a/mongoDB script/delete-old-logs.py b/mongoDB script/delete-old-logs.py index 288ced8..782ee3c 100644 --- a/mongoDB script/delete-old-logs.py +++ b/mongoDB script/delete-old-logs.py @@ -49,11 +49,23 @@ def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold): # We explicitly cast to int to prevent type errors cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold)) - logger.info(f"Targeting documents created before: {cutoff_date} (UTC)") + # logger.info(f"Targeting documents created before: {cutoff_date} (UTC)") # 3. Define the query # Using "Timestamp" as requested. Ensure this matches your DB schema. - query = {"Timestamp": {"$lt": cutoff_date}} + # query = {"Timestamp": {"$lt": cutoff_date}} + + start_of_day = datetime.datetime.fromisoformat("2025-05-03T00:00:00") + end_of_day = start_of_day + datetime.timedelta(days=1) + + logger.info(f"Deleting documents between: {start_of_day} and {end_of_day}") + + query = { + "Timestamp": { + "$gte": start_of_day, + "$lt": end_of_day + } + } # 4. Perform the deletion logger.info(f"Executing delete query on {db_name}.{collection_name}...") @@ -82,7 +94,7 @@ if __name__ == "__main__": config = load_config(GLOBAL_CONFIG_PATH) # --- CONFIGURATION --- - MONGODB_CONFIG = config.get("MONGODB", {}) + MONGODB_CONFIG = config.get("MONGODB_LOGS", {}) # Use .get() with defaults for safety MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/")