import json import sys import datetime import logging from pymongo import MongoClient, errors # Setup logging configuration logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), # logging.FileHandler("mongo_cleanup.log") # Uncomment to log to a file ] ) logger = logging.getLogger(__name__) def load_config(config_path): """Loads configuration from a JSON file.""" try: with open(config_path, "r", encoding="utf-8") as f: return json.load(f) except FileNotFoundError: logger.warning(f"'{config_path}' file not found. Using defaults/environment variables if available.") return {} except Exception as e: logger.critical(f"Failed to load config: {e}") sys.exit(1) def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold): client = None try: # Validation if days_threshold is None: raise ValueError("DAYS_THRESHOLD configuration is missing or None.") # 1. Connect to MongoDB logger.info(f"Connecting to MongoDB at {mongo_uri}...") client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) # Check connection client.admin.command('ping') logger.info("Connected successfully.") db = client[db_name] collection = db[collection_name] # 2. Calculate the cutoff date (Using UTC for consistency) # We explicitly cast to int to prevent type errors cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold)) logger.info(f"Targeting documents created before: {cutoff_date} (UTC)") # 3. Define the query # Using "Timestamp" as requested. Ensure this matches your DB schema. query = {"Timestamp": {"$lt": cutoff_date}} # 4. Perform the deletion logger.info(f"Executing delete query on {db_name}.{collection_name}...") result = collection.delete_many(query) logger.info(f"Operation complete. Deleted {result.deleted_count} documents.") except errors.ServerSelectionTimeoutError: logger.error("Could not connect to MongoDB. Check URI and network status.") except errors.PyMongoError as e: logger.error(f"MongoDB error occurred: {e}") except ValueError as ve: logger.error(f"Configuration error: {ve}") except Exception as e: logger.error(f"An unexpected error occurred: {e}") finally: # Close the connection if client: client.close() logger.debug("MongoDB connection closed.") if __name__ == "__main__": GLOBAL_CONFIG_PATH = "config.json" # Load Configuration config = load_config(GLOBAL_CONFIG_PATH) # --- CONFIGURATION --- MONGODB_CONFIG = config.get("MONGODB", {}) # Use .get() with defaults for safety MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/") DB_NAME = MONGODB_CONFIG.get("DATABASE_NAME", "DotNetLogsProd") COLLECTION_NAME = MONGODB_CONFIG.get("COLLECTION_NAME", "api-logs") # Days to keep (anything older than this gets deleted) # Checking root level as per your snippet DAYS_THRESHOLD = config.get("DAYS_THRESHOLD") # Fallback: If DAYS_THRESHOLD is missing in config, prevent the NoneType error if DAYS_THRESHOLD is None: logger.warning("DAYS_THRESHOLD not found in config. Defaulting to 30 days.") DAYS_THRESHOLD = 30 # --------------------- # Pass configuration as arguments delete_old_documents(MONGO_URI, DB_NAME, COLLECTION_NAME, DAYS_THRESHOLD)