release-scripts/mongoDB script/delete-old-logs.py

115 lines
4.1 KiB
Python

import json
import sys
import datetime
import logging
from pymongo import MongoClient, errors
# Setup logging configuration
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
# logging.FileHandler("mongo_cleanup.log") # Uncomment to log to a file
]
)
logger = logging.getLogger(__name__)
def load_config(config_path):
"""Loads configuration from a JSON file."""
try:
with open(config_path, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
logger.warning(f"'{config_path}' file not found. Using defaults/environment variables if available.")
return {}
except Exception as e:
logger.critical(f"Failed to load config: {e}")
sys.exit(1)
def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold):
client = None
try:
# Validation
if days_threshold is None:
raise ValueError("DAYS_THRESHOLD configuration is missing or None.")
# 1. Connect to MongoDB
logger.info(f"Connecting to MongoDB at {mongo_uri}...")
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
# Check connection
client.admin.command('ping')
logger.info("Connected successfully.")
db = client[db_name]
collection = db[collection_name]
# 2. Calculate the cutoff date (Using UTC for consistency)
# We explicitly cast to int to prevent type errors
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold))
# logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
# 3. Define the query
# Using "Timestamp" as requested. Ensure this matches your DB schema.
# query = {"Timestamp": {"$lt": cutoff_date}}
start_of_day = datetime.datetime.fromisoformat("2025-05-03T00:00:00")
end_of_day = start_of_day + datetime.timedelta(days=1)
logger.info(f"Deleting documents between: {start_of_day} and {end_of_day}")
query = {
"Timestamp": {
"$gte": start_of_day,
"$lt": end_of_day
}
}
# 4. Perform the deletion
logger.info(f"Executing delete query on {db_name}.{collection_name}...")
result = collection.delete_many(query)
logger.info(f"Operation complete. Deleted {result.deleted_count} documents.")
except errors.ServerSelectionTimeoutError:
logger.error("Could not connect to MongoDB. Check URI and network status.")
except errors.PyMongoError as e:
logger.error(f"MongoDB error occurred: {e}")
except ValueError as ve:
logger.error(f"Configuration error: {ve}")
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")
finally:
# Close the connection
if client:
client.close()
logger.debug("MongoDB connection closed.")
if __name__ == "__main__":
GLOBAL_CONFIG_PATH = "config.json"
# Load Configuration
config = load_config(GLOBAL_CONFIG_PATH)
# --- CONFIGURATION ---
MONGODB_CONFIG = config.get("MONGODB_LOGS", {})
# Use .get() with defaults for safety
MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/")
DB_NAME = MONGODB_CONFIG.get("DATABASE_NAME", "DotNetLogsProd")
COLLECTION_NAME = MONGODB_CONFIG.get("COLLECTION_NAME", "api-logs")
# Days to keep (anything older than this gets deleted)
# Checking root level as per your snippet
DAYS_THRESHOLD = config.get("DAYS_THRESHOLD")
# Fallback: If DAYS_THRESHOLD is missing in config, prevent the NoneType error
if DAYS_THRESHOLD is None:
logger.warning("DAYS_THRESHOLD not found in config. Defaulting to 30 days.")
DAYS_THRESHOLD = 30
# ---------------------
# Pass configuration as arguments
delete_old_documents(MONGO_URI, DB_NAME, COLLECTION_NAME, DAYS_THRESHOLD)