Added a script to store mongodb database in S3

This commit is contained in:
ashutosh.nehete 2025-12-06 17:36:05 +05:30
parent b67d0e495f
commit d02a2fe42c
4 changed files with 262 additions and 7 deletions

View File

@ -0,0 +1,216 @@
import os
import json
import sys
import subprocess
import shutil
import boto3
import logging
from datetime import datetime
from botocore.exceptions import ClientError, NoCredentialsError
from typing import Optional, Dict, Any
class BackupManager:
"""
Manages the lifecycle of a MongoDB backup: Dump -> Compress -> Upload -> Cleanup.
"""
def __init__(self, config_path: str):
self.config_path = config_path
self.timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
self.logger = self._setup_logging()
self.config = self._load_and_validate_config()
# Setup path variables
self.db_name = self.config["MONGODB_BACKUP"].get("DATABASE_NAME")
self.backup_dir = os.path.join(os.getcwd(), f"temp_backup_{self.timestamp}")
self.archive_name_base = f"backup_{self.db_name if self.db_name else 'all'}_{self.timestamp}"
self.archive_path_zip = f"{self.archive_name_base}.zip"
def _setup_logging(self) -> logging.Logger:
"""Sets up console and file logging."""
logger = logging.getLogger("BackupAgent")
logger.setLevel(logging.INFO)
# Console Handler
c_handler = logging.StreamHandler()
c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)
logger.addHandler(c_handler)
# File Handler (Optional: Appends to a log file)
f_handler = logging.FileHandler("backup_execution.log")
f_handler.setFormatter(c_format)
logger.addHandler(f_handler)
return logger
def _load_and_validate_config(self) -> Dict[str, Any]:
"""Loads JSON config and validates required keys."""
if not os.path.exists(self.config_path):
self.logger.critical(f"Configuration file '{self.config_path}' not found.")
sys.exit(1)
try:
with open(self.config_path, "r", encoding="utf-8") as f:
config = json.load(f)
except json.JSONDecodeError as e:
self.logger.critical(f"Invalid JSON format in config file: {e}")
sys.exit(1)
# Basic Validation
required_sections = ["MONGODB_BACKUP", "AWS_S3_CONFIGURATION"]
for section in required_sections:
if section not in config:
self.logger.critical(f"Missing required config section: {section}")
sys.exit(1)
# Security Warning
if config["AWS_S3_CONFIGURATION"].get("ACCESS_KEY"):
self.logger.warning("Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables.")
return config
def check_prerequisites(self) -> bool:
"""Checks if mongodump is installed and accessible."""
if shutil.which("mongodump") is None:
self.logger.error("'mongodump' executable not found in system PATH.")
return False
return True
def dump_mongodb(self) -> bool:
"""Runs mongodump to backup data locally."""
mongo_config = self.config["MONGODB_BACKUP"]
mongo_uri = mongo_config.get("MONGO_CONNECTION_STRING")
if not mongo_uri:
self.logger.error("MongoDB Connection String is missing.")
return False
self.logger.info(f"Starting MongoDB backup for: {self.db_name if self.db_name else 'All Databases'}")
# Construct command securely
cmd = ["mongodump", "--uri", mongo_uri, "--out", self.backup_dir]
if self.db_name:
cmd.extend(["--db", self.db_name])
try:
# Capture output for logging if needed
process = subprocess.run(
cmd,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
self.logger.info("MongoDB dump completed successfully.")
self.logger.debug(process.stderr) # mongodump writes logs to stderr usually
return True
except subprocess.CalledProcessError as e:
self.logger.error(f"MongoDB dump failed with return code {e.returncode}")
self.logger.error(f"Error details: {e.stderr}")
return False
except Exception as e:
self.logger.error(f"Unexpected error during dump: {e}")
return False
def compress_backup(self) -> Optional[str]:
"""Compresses the backup directory into a zip file."""
self.logger.info(f"Compressing folder: {self.backup_dir}")
try:
# shutil.make_archive expects the base name without extension
shutil.make_archive(self.archive_name_base, 'zip', self.backup_dir)
if os.path.exists(self.archive_path_zip):
size_mb = os.path.getsize(self.archive_path_zip) / (1024 * 1024)
self.logger.info(f"Compression successful. File: {self.archive_path_zip} ({size_mb:.2f} MB)")
return self.archive_path_zip
else:
self.logger.error("Compression finished but file was not found.")
return None
except Exception as e:
self.logger.error(f"Compression failed: {e}")
return None
def upload_to_s3(self, file_path: str) -> bool:
"""Uploads the zip file to AWS S3 using boto3."""
s3_config = self.config["AWS_S3_CONFIGURATION"]
bucket_name = s3_config.get("S3_BUCKET_NAME")
region = s3_config.get("S3_REGION", "us-east-1")
folder = s3_config.get("S3_FOLDER", "backups")
self.logger.info(f"Initializing S3 upload to bucket: {bucket_name}")
try:
s3_client = boto3.client(
's3',
aws_access_key_id=s3_config.get("ACCESS_KEY"),
aws_secret_access_key=s3_config.get("SECRET_KEY"),
region_name=region
)
file_name = os.path.basename(file_path)
s3_key = f"{folder}/{file_name}"
s3_client.upload_file(file_path, bucket_name, s3_key)
self.logger.info(f"Upload successful. S3 URI: s3://{bucket_name}/{s3_key}")
return True
except ClientError as e:
self.logger.error(f"AWS ClientError: {e}")
return False
except NoCredentialsError:
self.logger.error("AWS Credentials not found or invalid.")
return False
except Exception as e:
self.logger.error(f"Unexpected error during upload: {e}")
return False
def cleanup(self):
"""Removes local artifacts to save space."""
self.logger.info("Starting cleanup of local temporary files...")
try:
# Remove Zip
if os.path.exists(self.archive_path_zip):
os.remove(self.archive_path_zip)
self.logger.info(f"Deleted file: {self.archive_path_zip}")
# Remove Temp Directory
if os.path.exists(self.backup_dir):
shutil.rmtree(self.backup_dir)
self.logger.info(f"Deleted directory: {self.backup_dir}")
except Exception as e:
self.logger.warning(f"Cleanup encountered issues: {e}")
def run(self):
"""Main execution flow."""
self.logger.info("=== Backup Process Started ===")
if not self.check_prerequisites():
sys.exit(1)
if self.dump_mongodb():
zip_file = self.compress_backup()
upload_success = False
if zip_file:
upload_success = self.upload_to_s3(zip_file)
self.cleanup()
if upload_success:
self.logger.info("=== Backup Process Completed Successfully ===")
else:
self.logger.error("=== Backup Process Failed during Upload stage ===")
sys.exit(1)
else:
self.logger.error("=== Backup Process Failed during Dump stage ===")
self.cleanup() # Attempt to clean up partial dumps
sys.exit(1)
if __name__ == "__main__":
# Point this to your config file
CONFIG_FILE = "config.json"
agent = BackupManager(CONFIG_FILE)
agent.run()

View File

@ -0,0 +1,12 @@
2025-12-06 17:35:03,431 - WARNING - Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables.
2025-12-06 17:35:03,432 - INFO - === Backup Process Started ===
2025-12-06 17:35:03,435 - INFO - Starting MongoDB backup for: MarcoBMSLocalDev
2025-12-06 17:35:05,290 - INFO - MongoDB dump completed successfully.
2025-12-06 17:35:05,291 - INFO - Compressing folder: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03
2025-12-06 17:35:05,379 - INFO - Compression successful. File: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip (0.48 MB)
2025-12-06 17:35:05,379 - INFO - Initializing S3 upload to bucket: testenv-marco-pms-documents
2025-12-06 17:35:08,588 - INFO - Upload successful. S3 URI: s3://testenv-marco-pms-documents/mongodb_backups/backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip
2025-12-06 17:35:08,594 - INFO - Starting cleanup of local temporary files...
2025-12-06 17:35:08,595 - INFO - Deleted file: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip
2025-12-06 17:35:08,616 - INFO - Deleted directory: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03
2025-12-06 17:35:08,616 - INFO - === Backup Process Completed Successfully ===

View File

@ -1,8 +1,23 @@
{
"MONGODB":{
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017",
"DATABASE_NAME": "DotNetLogsDev",
"MONGODB_LOGS": {
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true",
"DATABASE_NAME": "DotNetLogsProd",
"COLLECTION_NAME": "api-logs"
},
"DAYS_THRESHOLD": 30
"MONGODB_BACKUP": {
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true",
"DATABASE_NAME": "MarcoBMSLocalDev"
},
"DAYS_THRESHOLD": 30,
"AWS_S3_CONFIGURATION": {
"ACCESS_KEY": "xxxxx",
"SECRET_KEY": "xxxxx",
"S3_BUCKET_NAME": "xxxxx",
"S3_REGION": "us-east-1",
"S3_FOLDER":"mongodb_backups"
},
"LOGGING": {
"LOG_FILE": "backup_process.log",
"LOG_LEVEL": "INFO"
}
}

View File

@ -49,11 +49,23 @@ def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold):
# We explicitly cast to int to prevent type errors
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold))
logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
# logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
# 3. Define the query
# Using "Timestamp" as requested. Ensure this matches your DB schema.
query = {"Timestamp": {"$lt": cutoff_date}}
# query = {"Timestamp": {"$lt": cutoff_date}}
start_of_day = datetime.datetime.fromisoformat("2025-05-03T00:00:00")
end_of_day = start_of_day + datetime.timedelta(days=1)
logger.info(f"Deleting documents between: {start_of_day} and {end_of_day}")
query = {
"Timestamp": {
"$gte": start_of_day,
"$lt": end_of_day
}
}
# 4. Perform the deletion
logger.info(f"Executing delete query on {db_name}.{collection_name}...")
@ -82,7 +94,7 @@ if __name__ == "__main__":
config = load_config(GLOBAL_CONFIG_PATH)
# --- CONFIGURATION ---
MONGODB_CONFIG = config.get("MONGODB", {})
MONGODB_CONFIG = config.get("MONGODB_LOGS", {})
# Use .get() with defaults for safety
MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/")