add dated migration for s3
This commit is contained in:
parent
3564e14e80
commit
71d4ae1820
176
aws-s3-migration/aws-s3-migrate-missing-documents.py
Normal file
176
aws-s3-migration/aws-s3-migrate-missing-documents.py
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import mysql.connector
|
||||||
|
import boto3
|
||||||
|
import sys
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
import utils
|
||||||
|
|
||||||
|
# --- Configuration Loading ---
|
||||||
|
|
||||||
|
CONFIG_FILE = 'config.json'
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
"""Loads configuration from config.json."""
|
||||||
|
if not os.path.exists(CONFIG_FILE):
|
||||||
|
print(f"Error: Configuration file '{CONFIG_FILE}' not found.")
|
||||||
|
sys.exit(1)
|
||||||
|
try:
|
||||||
|
with open(CONFIG_FILE, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"Error decoding JSON in config file: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def fetch_documents(db_config_full):
|
||||||
|
"""
|
||||||
|
Connects to MySQL and fetches all documents (ID and S3Key).
|
||||||
|
NOTE: db_config_full includes the 'table' key, which must be filtered out for the connection.
|
||||||
|
"""
|
||||||
|
conn = None
|
||||||
|
cursor = None
|
||||||
|
|
||||||
|
# Separate connection parameters from the table name
|
||||||
|
table_name = db_config_full['table']
|
||||||
|
connection_config = {k: v for k, v in db_config_full.items() if k != 'table'}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Connect using only valid connection parameters
|
||||||
|
conn = mysql.connector.connect(**connection_config)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
print(f"Fetching document IDs and S3Keys from table: {table_name}...")
|
||||||
|
|
||||||
|
# Select all ID and S3Key (or whatever column holds the key)
|
||||||
|
query = f"SELECT ID, S3Key FROM {table_name} where UploadedAt between '2025-11-01' and '2025-11-26' order by UploadedAt desc"
|
||||||
|
print(query)
|
||||||
|
cursor.execute(query)
|
||||||
|
|
||||||
|
# Fetch results as a list of dictionaries/tuples
|
||||||
|
documents = cursor.fetchall()
|
||||||
|
print(f"Found {len(documents)} documents to process.")
|
||||||
|
return documents
|
||||||
|
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
print(f"MySQL Error: {err}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if cursor:
|
||||||
|
cursor.close()
|
||||||
|
if conn and conn.is_connected():
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def check_s3_key_exists(s3_client, bucket_name, key):
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use head_object to retrieve metadata. It's faster than list_objects.
|
||||||
|
s3_client.head_object(Bucket=bucket_name, Key=key)
|
||||||
|
return True
|
||||||
|
except ClientError as e:
|
||||||
|
# If a client error is thrown, check if it was a 404 (Not Found) error.
|
||||||
|
# If it was, then the key does not exist.
|
||||||
|
error_code = e.response['Error']['Code']
|
||||||
|
if error_code == '404':
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Handle other errors, like permission denied (403) or bucket not found.
|
||||||
|
print(f"Error checking key '{key}' in bucket '{bucket_name}': {e}")
|
||||||
|
# Reraise the exception for other issues
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
# Handle other unexpected errors
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def migrate_object(s3_client, aws_config, old_s3_key, doc_id):
|
||||||
|
success_count = 0
|
||||||
|
failure_count = 0
|
||||||
|
# 1. Generate new key, preserving path structure
|
||||||
|
key_prefix = aws_config['destination_key_prefix']
|
||||||
|
new_s3_key = utils.generate_new_s3_key(old_s3_key, key_prefix)
|
||||||
|
if not new_s3_key:
|
||||||
|
print(f" -> Skipping ID {doc_id}: Could not generate new key from old key.")
|
||||||
|
failure_count += 1
|
||||||
|
|
||||||
|
print(f" -> Calculated New Key: {new_s3_key}")
|
||||||
|
|
||||||
|
# 2. Move S3 object (Copy + Delete)
|
||||||
|
move_successful = utils.move_s3_object(
|
||||||
|
s3_client,
|
||||||
|
aws_config['source_bucket'],
|
||||||
|
old_s3_key,
|
||||||
|
aws_config['destination_bucket'],
|
||||||
|
new_s3_key
|
||||||
|
)
|
||||||
|
|
||||||
|
if move_successful:
|
||||||
|
# 3. Update database
|
||||||
|
config = load_config()
|
||||||
|
db_update_successful = utils.update_document_key(
|
||||||
|
config['mysql'],
|
||||||
|
doc_id,
|
||||||
|
new_s3_key
|
||||||
|
)
|
||||||
|
|
||||||
|
if db_update_successful:
|
||||||
|
success_count += 1
|
||||||
|
return new_s3_key
|
||||||
|
else:
|
||||||
|
# If DB update fails, the S3 object is MOVED. Log critical error.
|
||||||
|
print(f"CRITICAL: DB update failed for ID {doc_id}. Object is MOVED to {aws_config['destination_bucket']}/{new_s3_key}. Manual DB correction needed.")
|
||||||
|
failure_count += 1
|
||||||
|
else:
|
||||||
|
# If S3 move failed, the object remains in the source bucket.
|
||||||
|
print(f"S3 move failed for ID {doc_id}. Object remains in {aws_config['source_bucket']}/{old_s3_key}. DB not updated.")
|
||||||
|
failure_count += 1
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_documents(s3_client, aws_config, documents):
|
||||||
|
source_bucket = aws_config['source_bucket']
|
||||||
|
dest_bucket = aws_config['destination_bucket']
|
||||||
|
key_prefix = aws_config['destination_key_prefix']
|
||||||
|
|
||||||
|
for doc_id, S3Key in documents:
|
||||||
|
if check_s3_key_exists(s3_client, dest_bucket, S3Key):
|
||||||
|
print(f"✅ Key '{S3Key}' exists in bucket '{dest_bucket}'.")
|
||||||
|
else:
|
||||||
|
new_s3_key = migrate_object(s3_client, aws_config, S3Key, doc_id)
|
||||||
|
if check_s3_key_exists(s3_client, dest_bucket, new_s3_key):
|
||||||
|
print(f"✅ ✅ Key '{new_s3_key}' migrated and now exists.")
|
||||||
|
else:
|
||||||
|
print(f"❌ Key '{S3Key}' does not exist after migration attempt as well .")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Executes the S3 migration and database update workflow."""
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
# Initialize S3 Client
|
||||||
|
aws_config = config['aws']
|
||||||
|
try:
|
||||||
|
s3_client = boto3.client(
|
||||||
|
's3',
|
||||||
|
aws_access_key_id=aws_config['aws_access_key_id'],
|
||||||
|
aws_secret_access_key=aws_config['aws_secret_access_key'],
|
||||||
|
region_name=aws_config['aws_region']
|
||||||
|
)
|
||||||
|
print("S3 client initialized successfully.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to initialize S3 client: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Fetch Documents
|
||||||
|
documents = fetch_documents(config['mysql'])
|
||||||
|
if not documents:
|
||||||
|
print("No documents found or failed to connect to the database. Exiting.")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print(f"Documents Found: '{documents.count}'")
|
||||||
|
|
||||||
|
migrate_documents(s3_client, aws_config, documents)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -3,7 +3,7 @@
|
|||||||
"host": "147.93.98.152",
|
"host": "147.93.98.152",
|
||||||
"user": "devuser",
|
"user": "devuser",
|
||||||
"password": "AppUser@123$",
|
"password": "AppUser@123$",
|
||||||
"database": "MarcoBMSStage",
|
"database": "MarcoBMSProd",
|
||||||
"table": "Documents"
|
"table": "Documents"
|
||||||
},
|
},
|
||||||
"aws": {
|
"aws": {
|
||||||
@ -11,7 +11,7 @@
|
|||||||
"aws_secret_access_key": "ZEnwQuN9vWi+KodD+gvNEz46VCZ6/4ue3UKLbTTs",
|
"aws_secret_access_key": "ZEnwQuN9vWi+KodD+gvNEz46VCZ6/4ue3UKLbTTs",
|
||||||
"aws_region": "us-east-1",
|
"aws_region": "us-east-1",
|
||||||
"source_bucket": "testenv-marco-pms-documents",
|
"source_bucket": "testenv-marco-pms-documents",
|
||||||
"destination_bucket": "tryenv-marco-pms-documents",
|
"destination_bucket": "prodenv-marco-pms-documents",
|
||||||
"destination_key_prefix": "",
|
"destination_key_prefix": "",
|
||||||
"destination_key_prefix_how": "IMPORTANT: The original S3Key (e.g., 'reports/2024/doc.pdf') will be APPENDED to this prefix, preserving the path structure. Resulting Key: 'migrated_files/reports/2024/doc.pdf'"
|
"destination_key_prefix_how": "IMPORTANT: The original S3Key (e.g., 'reports/2024/doc.pdf') will be APPENDED to this prefix, preserving the path structure. Resulting Key: 'migrated_files/reports/2024/doc.pdf'"
|
||||||
}
|
}
|
||||||
|
|||||||
95
aws-s3-migration/utils.py
Normal file
95
aws-s3-migration/utils.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import mysql
|
||||||
|
|
||||||
|
|
||||||
|
def generate_new_s3_key(old_key, prefix):
|
||||||
|
"""
|
||||||
|
Generates a new S3 key by prepending the destination prefix to the old key,
|
||||||
|
thereby preserving the original path structure.
|
||||||
|
"""
|
||||||
|
if not isinstance(old_key, str) or not old_key:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Ensure the prefix ends with a slash if it's not empty and doesn't already end with one.
|
||||||
|
prefixed_path = prefix
|
||||||
|
if prefix and not prefix.endswith('/'):
|
||||||
|
prefixed_path += '/'
|
||||||
|
|
||||||
|
# Combine the prefix and the old key. If the old key starts with a slash, strip it.
|
||||||
|
final_key = old_key.lstrip('/')
|
||||||
|
|
||||||
|
return f"{prefixed_path}{final_key}"
|
||||||
|
|
||||||
|
def move_s3_object(s3_client, source_bucket, source_key, dest_bucket, dest_key):
|
||||||
|
"""
|
||||||
|
Performs the S3 'move' operation (Copy + Delete).
|
||||||
|
|
||||||
|
Returns True on success, False otherwise.
|
||||||
|
"""
|
||||||
|
copy_source = {
|
||||||
|
'Bucket': source_bucket,
|
||||||
|
'Key': source_key
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(f" -> Copying '{source_key}' to '{dest_key}' in '{dest_bucket}'...")
|
||||||
|
# 1. Copy the object
|
||||||
|
s3_client.copy_object(
|
||||||
|
CopySource=copy_source,
|
||||||
|
Bucket=dest_bucket,
|
||||||
|
Key=dest_key
|
||||||
|
)
|
||||||
|
print(" -> Copy object successful.")
|
||||||
|
|
||||||
|
# print(f" -> Deleting original object from '{source_bucket}/{source_key}'...")
|
||||||
|
# # 2. Delete the original object
|
||||||
|
# s3_client.delete_object(
|
||||||
|
# Bucket=source_bucket,
|
||||||
|
# Key=source_key
|
||||||
|
# )
|
||||||
|
# print(" -> Delete object successful.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" -> S3 Move FAILED for {source_key}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def update_document_key(db_config_full, doc_id, new_s3_key):
|
||||||
|
"""
|
||||||
|
Updates the S3Key for a specific document ID in the database.
|
||||||
|
NOTE: db_config_full includes the 'table' key, which must be filtered out for the connection.
|
||||||
|
"""
|
||||||
|
conn = None
|
||||||
|
cursor = None
|
||||||
|
|
||||||
|
# Separate connection parameters from the table name
|
||||||
|
table_name = db_config_full['table']
|
||||||
|
connection_config = {k: v for k, v in db_config_full.items() if k != 'table'}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Connect using only valid connection parameters
|
||||||
|
conn = mysql.connector.connect(**connection_config)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Prepare the UPDATE query
|
||||||
|
update_query = (
|
||||||
|
f"UPDATE {table_name} SET S3Key = %s WHERE ID = %s"
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(update_query, (new_s3_key, doc_id))
|
||||||
|
|
||||||
|
# Commit the transaction to apply the changes
|
||||||
|
conn.commit()
|
||||||
|
print(f" -> DB Update SUCCESS for ID {doc_id}.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
print(f" -> DB Update FAILED for ID {doc_id}: {err}. Rolling back.")
|
||||||
|
if conn:
|
||||||
|
conn.rollback()
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if cursor:
|
||||||
|
cursor.close()
|
||||||
|
if conn and conn.is_connected():
|
||||||
|
conn.close()
|
||||||
Loading…
x
Reference in New Issue
Block a user