115 lines
4.1 KiB
Python
115 lines
4.1 KiB
Python
import json
|
|
import sys
|
|
import datetime
|
|
import logging
|
|
from pymongo import MongoClient, errors
|
|
|
|
# Setup logging configuration
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
# logging.FileHandler("mongo_cleanup.log") # Uncomment to log to a file
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def load_config(config_path):
|
|
"""Loads configuration from a JSON file."""
|
|
try:
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except FileNotFoundError:
|
|
logger.warning(f"'{config_path}' file not found. Using defaults/environment variables if available.")
|
|
return {}
|
|
except Exception as e:
|
|
logger.critical(f"Failed to load config: {e}")
|
|
sys.exit(1)
|
|
|
|
def delete_old_documents(mongo_uri, db_name, collection_name, days_threshold):
|
|
client = None
|
|
try:
|
|
# Validation
|
|
if days_threshold is None:
|
|
raise ValueError("DAYS_THRESHOLD configuration is missing or None.")
|
|
|
|
# 1. Connect to MongoDB
|
|
logger.info(f"Connecting to MongoDB at {mongo_uri}...")
|
|
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
|
|
|
|
# Check connection
|
|
client.admin.command('ping')
|
|
logger.info("Connected successfully.")
|
|
|
|
db = client[db_name]
|
|
collection = db[collection_name]
|
|
|
|
# 2. Calculate the cutoff date (Using UTC for consistency)
|
|
# We explicitly cast to int to prevent type errors
|
|
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=int(days_threshold))
|
|
|
|
# logger.info(f"Targeting documents created before: {cutoff_date} (UTC)")
|
|
|
|
# 3. Define the query
|
|
# Using "Timestamp" as requested. Ensure this matches your DB schema.
|
|
# query = {"Timestamp": {"$lt": cutoff_date}}
|
|
|
|
start_of_day = datetime.datetime.fromisoformat("2025-05-03T00:00:00")
|
|
end_of_day = start_of_day + datetime.timedelta(days=1)
|
|
|
|
logger.info(f"Deleting documents between: {start_of_day} and {end_of_day}")
|
|
|
|
query = {
|
|
"Timestamp": {
|
|
"$gte": start_of_day,
|
|
"$lt": end_of_day
|
|
}
|
|
}
|
|
|
|
# 4. Perform the deletion
|
|
logger.info(f"Executing delete query on {db_name}.{collection_name}...")
|
|
result = collection.delete_many(query)
|
|
|
|
logger.info(f"Operation complete. Deleted {result.deleted_count} documents.")
|
|
|
|
except errors.ServerSelectionTimeoutError:
|
|
logger.error("Could not connect to MongoDB. Check URI and network status.")
|
|
except errors.PyMongoError as e:
|
|
logger.error(f"MongoDB error occurred: {e}")
|
|
except ValueError as ve:
|
|
logger.error(f"Configuration error: {ve}")
|
|
except Exception as e:
|
|
logger.error(f"An unexpected error occurred: {e}")
|
|
finally:
|
|
# Close the connection
|
|
if client:
|
|
client.close()
|
|
logger.debug("MongoDB connection closed.")
|
|
|
|
if __name__ == "__main__":
|
|
GLOBAL_CONFIG_PATH = "config.json"
|
|
|
|
# Load Configuration
|
|
config = load_config(GLOBAL_CONFIG_PATH)
|
|
|
|
# --- CONFIGURATION ---
|
|
MONGODB_CONFIG = config.get("MONGODB_LOGS", {})
|
|
|
|
# Use .get() with defaults for safety
|
|
MONGO_URI = MONGODB_CONFIG.get("MONGO_CONNECTION_STRING", "mongodb://localhost:27017/")
|
|
DB_NAME = MONGODB_CONFIG.get("DATABASE_NAME", "DotNetLogsProd")
|
|
COLLECTION_NAME = MONGODB_CONFIG.get("COLLECTION_NAME", "api-logs")
|
|
|
|
# Days to keep (anything older than this gets deleted)
|
|
# Checking root level as per your snippet
|
|
DAYS_THRESHOLD = config.get("DAYS_THRESHOLD")
|
|
|
|
# Fallback: If DAYS_THRESHOLD is missing in config, prevent the NoneType error
|
|
if DAYS_THRESHOLD is None:
|
|
logger.warning("DAYS_THRESHOLD not found in config. Defaulting to 30 days.")
|
|
DAYS_THRESHOLD = 30
|
|
# ---------------------
|
|
|
|
# Pass configuration as arguments
|
|
delete_old_documents(MONGO_URI, DB_NAME, COLLECTION_NAME, DAYS_THRESHOLD) |