Added a script to store mongodb database in S3
This commit is contained in:
parent
b67d0e495f
commit
d02a2fe42c
216
mongoDB script/backup-mongodb-to-s3.py
Normal file
216
mongoDB script/backup-mongodb-to-s3.py
Normal file
@ -0,0 +1,216 @@
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
import subprocess
|
||||
import shutil
|
||||
import boto3
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from botocore.exceptions import ClientError, NoCredentialsError
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
class BackupManager:
|
||||
"""
|
||||
Manages the lifecycle of a MongoDB backup: Dump -> Compress -> Upload -> Cleanup.
|
||||
"""
|
||||
|
||||
def __init__(self, config_path: str):
|
||||
self.config_path = config_path
|
||||
self.timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
self.logger = self._setup_logging()
|
||||
self.config = self._load_and_validate_config()
|
||||
|
||||
# Setup path variables
|
||||
self.db_name = self.config["MONGODB_BACKUP"].get("DATABASE_NAME")
|
||||
self.backup_dir = os.path.join(os.getcwd(), f"temp_backup_{self.timestamp}")
|
||||
self.archive_name_base = f"backup_{self.db_name if self.db_name else 'all'}_{self.timestamp}"
|
||||
self.archive_path_zip = f"{self.archive_name_base}.zip"
|
||||
|
||||
def _setup_logging(self) -> logging.Logger:
|
||||
"""Sets up console and file logging."""
|
||||
logger = logging.getLogger("BackupAgent")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# Console Handler
|
||||
c_handler = logging.StreamHandler()
|
||||
c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
c_handler.setFormatter(c_format)
|
||||
logger.addHandler(c_handler)
|
||||
|
||||
# File Handler (Optional: Appends to a log file)
|
||||
f_handler = logging.FileHandler("backup_execution.log")
|
||||
f_handler.setFormatter(c_format)
|
||||
logger.addHandler(f_handler)
|
||||
|
||||
return logger
|
||||
|
||||
def _load_and_validate_config(self) -> Dict[str, Any]:
|
||||
"""Loads JSON config and validates required keys."""
|
||||
if not os.path.exists(self.config_path):
|
||||
self.logger.critical(f"Configuration file '{self.config_path}' not found.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.critical(f"Invalid JSON format in config file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Basic Validation
|
||||
required_sections = ["MONGODB_BACKUP", "AWS_S3_CONFIGURATION"]
|
||||
for section in required_sections:
|
||||
if section not in config:
|
||||
self.logger.critical(f"Missing required config section: {section}")
|
||||
sys.exit(1)
|
||||
|
||||
# Security Warning
|
||||
if config["AWS_S3_CONFIGURATION"].get("ACCESS_KEY"):
|
||||
self.logger.warning("Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables.")
|
||||
|
||||
return config
|
||||
|
||||
def check_prerequisites(self) -> bool:
|
||||
"""Checks if mongodump is installed and accessible."""
|
||||
if shutil.which("mongodump") is None:
|
||||
self.logger.error("'mongodump' executable not found in system PATH.")
|
||||
return False
|
||||
return True
|
||||
|
||||
def dump_mongodb(self) -> bool:
|
||||
"""Runs mongodump to backup data locally."""
|
||||
mongo_config = self.config["MONGODB_BACKUP"]
|
||||
mongo_uri = mongo_config.get("MONGO_CONNECTION_STRING")
|
||||
|
||||
if not mongo_uri:
|
||||
self.logger.error("MongoDB Connection String is missing.")
|
||||
return False
|
||||
|
||||
self.logger.info(f"Starting MongoDB backup for: {self.db_name if self.db_name else 'All Databases'}")
|
||||
|
||||
# Construct command securely
|
||||
cmd = ["mongodump", "--uri", mongo_uri, "--out", self.backup_dir]
|
||||
if self.db_name:
|
||||
cmd.extend(["--db", self.db_name])
|
||||
|
||||
try:
|
||||
# Capture output for logging if needed
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
check=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
self.logger.info("MongoDB dump completed successfully.")
|
||||
self.logger.debug(process.stderr) # mongodump writes logs to stderr usually
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
self.logger.error(f"MongoDB dump failed with return code {e.returncode}")
|
||||
self.logger.error(f"Error details: {e.stderr}")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error during dump: {e}")
|
||||
return False
|
||||
|
||||
def compress_backup(self) -> Optional[str]:
|
||||
"""Compresses the backup directory into a zip file."""
|
||||
self.logger.info(f"Compressing folder: {self.backup_dir}")
|
||||
try:
|
||||
# shutil.make_archive expects the base name without extension
|
||||
shutil.make_archive(self.archive_name_base, 'zip', self.backup_dir)
|
||||
|
||||
if os.path.exists(self.archive_path_zip):
|
||||
size_mb = os.path.getsize(self.archive_path_zip) / (1024 * 1024)
|
||||
self.logger.info(f"Compression successful. File: {self.archive_path_zip} ({size_mb:.2f} MB)")
|
||||
return self.archive_path_zip
|
||||
else:
|
||||
self.logger.error("Compression finished but file was not found.")
|
||||
return None
|
||||
except Exception as e:
|
||||
self.logger.error(f"Compression failed: {e}")
|
||||
return None
|
||||
|
||||
def upload_to_s3(self, file_path: str) -> bool:
|
||||
"""Uploads the zip file to AWS S3 using boto3."""
|
||||
s3_config = self.config["AWS_S3_CONFIGURATION"]
|
||||
bucket_name = s3_config.get("S3_BUCKET_NAME")
|
||||
region = s3_config.get("S3_REGION", "us-east-1")
|
||||
folder = s3_config.get("S3_FOLDER", "backups")
|
||||
|
||||
self.logger.info(f"Initializing S3 upload to bucket: {bucket_name}")
|
||||
|
||||
try:
|
||||
s3_client = boto3.client(
|
||||
's3',
|
||||
aws_access_key_id=s3_config.get("ACCESS_KEY"),
|
||||
aws_secret_access_key=s3_config.get("SECRET_KEY"),
|
||||
region_name=region
|
||||
)
|
||||
|
||||
file_name = os.path.basename(file_path)
|
||||
s3_key = f"{folder}/{file_name}"
|
||||
|
||||
s3_client.upload_file(file_path, bucket_name, s3_key)
|
||||
self.logger.info(f"Upload successful. S3 URI: s3://{bucket_name}/{s3_key}")
|
||||
return True
|
||||
|
||||
except ClientError as e:
|
||||
self.logger.error(f"AWS ClientError: {e}")
|
||||
return False
|
||||
except NoCredentialsError:
|
||||
self.logger.error("AWS Credentials not found or invalid.")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error during upload: {e}")
|
||||
return False
|
||||
|
||||
def cleanup(self):
|
||||
"""Removes local artifacts to save space."""
|
||||
self.logger.info("Starting cleanup of local temporary files...")
|
||||
try:
|
||||
# Remove Zip
|
||||
if os.path.exists(self.archive_path_zip):
|
||||
os.remove(self.archive_path_zip)
|
||||
self.logger.info(f"Deleted file: {self.archive_path_zip}")
|
||||
|
||||
# Remove Temp Directory
|
||||
if os.path.exists(self.backup_dir):
|
||||
shutil.rmtree(self.backup_dir)
|
||||
self.logger.info(f"Deleted directory: {self.backup_dir}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Cleanup encountered issues: {e}")
|
||||
|
||||
def run(self):
|
||||
"""Main execution flow."""
|
||||
self.logger.info("=== Backup Process Started ===")
|
||||
|
||||
if not self.check_prerequisites():
|
||||
sys.exit(1)
|
||||
|
||||
if self.dump_mongodb():
|
||||
zip_file = self.compress_backup()
|
||||
|
||||
upload_success = False
|
||||
if zip_file:
|
||||
upload_success = self.upload_to_s3(zip_file)
|
||||
|
||||
self.cleanup()
|
||||
|
||||
if upload_success:
|
||||
self.logger.info("=== Backup Process Completed Successfully ===")
|
||||
else:
|
||||
self.logger.error("=== Backup Process Failed during Upload stage ===")
|
||||
sys.exit(1)
|
||||
else:
|
||||
self.logger.error("=== Backup Process Failed during Dump stage ===")
|
||||
self.cleanup() # Attempt to clean up partial dumps
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Point this to your config file
|
||||
CONFIG_FILE = "config.json"
|
||||
|
||||
agent = BackupManager(CONFIG_FILE)
|
||||
agent.run()
|
||||
12
mongoDB script/backup_execution.log
Normal file
12
mongoDB script/backup_execution.log
Normal file
@ -0,0 +1,12 @@
|
||||
2025-12-06 17:35:03,431 - WARNING - Security Warning: AWS Keys found in plain text config. Consider using IAM Roles or Environment Variables.
|
||||
2025-12-06 17:35:03,432 - INFO - === Backup Process Started ===
|
||||
2025-12-06 17:35:03,435 - INFO - Starting MongoDB backup for: MarcoBMSLocalDev
|
||||
2025-12-06 17:35:05,290 - INFO - MongoDB dump completed successfully.
|
||||
2025-12-06 17:35:05,291 - INFO - Compressing folder: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03
|
||||
2025-12-06 17:35:05,379 - INFO - Compression successful. File: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip (0.48 MB)
|
||||
2025-12-06 17:35:05,379 - INFO - Initializing S3 upload to bucket: testenv-marco-pms-documents
|
||||
2025-12-06 17:35:08,588 - INFO - Upload successful. S3 URI: s3://testenv-marco-pms-documents/mongodb_backups/backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip
|
||||
2025-12-06 17:35:08,594 - INFO - Starting cleanup of local temporary files...
|
||||
2025-12-06 17:35:08,595 - INFO - Deleted file: backup_MarcoBMSLocalDev_2025-12-06_17-35-03.zip
|
||||
2025-12-06 17:35:08,616 - INFO - Deleted directory: D:\Marco AIOT\gita\release-scripts\mongoDB script\temp_backup_2025-12-06_17-35-03
|
||||
2025-12-06 17:35:08,616 - INFO - === Backup Process Completed Successfully ===
|
||||
@ -1,8 +1,23 @@
|
||||
{
|
||||
"MONGODB":{
|
||||
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017",
|
||||
"DATABASE_NAME": "DotNetLogsDev",
|
||||
"MONGODB_LOGS": {
|
||||
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true",
|
||||
"DATABASE_NAME": "DotNetLogsProd",
|
||||
"COLLECTION_NAME": "api-logs"
|
||||
},
|
||||
"DAYS_THRESHOLD": 30
|
||||
"MONGODB_BACKUP": {
|
||||
"MONGO_CONNECTION_STRING": "mongodb://devuser:DevPass123@147.93.98.152:27017/?authSource=admin&eplicaSet=rs01&directConnection=true",
|
||||
"DATABASE_NAME": "MarcoBMSLocalDev"
|
||||
},
|
||||
"DAYS_THRESHOLD": 30,
|
||||
"AWS_S3_CONFIGURATION": {
|
||||
"ACCESS_KEY": "xxxxx",
|
||||
"SECRET_KEY": "xxxxx",
|
||||
"S3_BUCKET_NAME": "xxxxx",
|
||||
"S3_REGION": "us-east-1",
|
||||
"S3_FOLDER":"mongodb_backups"
|
||||
},
|
||||
"LOGGING": {
|
||||
"LOG_FILE": "backup_process.log",
|
||||
"LOG_LEVEL": "INFO"
|
||||
}
|
||||
}
|
||||
@ -49,11 +49,23 @@ def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold):
|
||||
# We explicitly cast to int to prevent type errors
|
||||
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold))
|
||||
|
||||
logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
|
||||
# logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
|
||||
|
||||
# 3. Define the query
|
||||
# Using "Timestamp" as requested. Ensure this matches your DB schema.
|
||||
query = {"Timestamp": {"$lt": cutoff_date}}
|
||||
# query = {"Timestamp": {"$lt": cutoff_date}}
|
||||
|
||||
start_of_day = datetime.datetime.fromisoformat("2025-05-03T00:00:00")
|
||||
end_of_day = start_of_day + datetime.timedelta(days=1)
|
||||
|
||||
logger.info(f"Deleting documents between: {start_of_day} and {end_of_day}")
|
||||
|
||||
query = {
|
||||
"Timestamp": {
|
||||
"$gte": start_of_day,
|
||||
"$lt": end_of_day
|
||||
}
|
||||
}
|
||||
|
||||
# 4. Perform the deletion
|
||||
logger.info(f"Executing delete query on {db_name}.{collection_name}...")
|
||||
@ -82,7 +94,7 @@ if __name__ == "__main__":
|
||||
config = load_config(GLOBAL_CONFIG_PATH)
|
||||
|
||||
# --- CONFIGURATION ---
|
||||
MONGODB_CONFIG = config.get("MONGODB", {})
|
||||
MONGODB_CONFIG = config.get("MONGODB_LOGS", {})
|
||||
|
||||
# Use .get() with defaults for safety
|
||||
MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user