From a654136ae940cffbc3622708eaf2def325d81c8e Mon Sep 17 00:00:00 2001 From: Vikas Nale Date: Sat, 5 Jul 2025 11:25:30 +0530 Subject: [PATCH] Initial version of thumbnail generation --- image-thumbnail/.env | 10 + image-thumbnail/image-processor.py | 236 ++++++++++++++++++++++ image-thumbnail/image_processing_utils.py | 45 +++++ image-thumbnail/s3_utils.py | 71 +++++++ 4 files changed, 362 insertions(+) create mode 100644 image-thumbnail/.env create mode 100644 image-thumbnail/image-processor.py create mode 100644 image-thumbnail/image_processing_utils.py create mode 100644 image-thumbnail/s3_utils.py diff --git a/image-thumbnail/.env b/image-thumbnail/.env new file mode 100644 index 0000000..118f3d5 --- /dev/null +++ b/image-thumbnail/.env @@ -0,0 +1,10 @@ +## Database Configuration +DB_HOST=147.93.98.152 +DB_USER=devuser +DB_PASSWORD=xxxxxxx + +## AWS S3 Configuration +ACCESS_KEY=xxxxxxx +SECRET_KEY=xxxxxx +S3_BUCKET_NAME=xxxxxxxx +S3_REGION=us-east-1 \ No newline at end of file diff --git a/image-thumbnail/image-processor.py b/image-thumbnail/image-processor.py new file mode 100644 index 0000000..bca77ff --- /dev/null +++ b/image-thumbnail/image-processor.py @@ -0,0 +1,236 @@ +import mysql.connector +import os +import base64 +import logging # Import logging +from datetime import datetime +from dotenv import load_dotenv # Import load_dotenv + +# Import the utility functions +from s3_utils import get_s3_client, download_image_from_s3, upload_object_to_s3, generate_thumbnail_s3_key +from image_processing_utils import create_thumbnail_from_bytes, create_thumbnail_from_base64 + +# --- Logging Configuration --- +# Create a logger object +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # Set the default logging level + +# Create handlers +# Console handler +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) # Console shows INFO and above + +# File handler +log_file_name = f"image_processor_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +file_handler = logging.FileHandler(log_file_name) +file_handler.setLevel(logging.DEBUG) # File captures all DEBUG messages and above + +# Create formatters and add them to handlers +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +console_handler.setFormatter(formatter) +file_handler.setFormatter(formatter) + +# Add handlers to the logger +logger.addHandler(console_handler) +logger.addHandler(file_handler) + +logger.info(f"Logging initialized. Full logs available in '{log_file_name}'") + + +# --- Configuration --- +DB_CONFIG = { + 'host': 'your_mysql_host', + 'user': 'your_mysql_user', + 'password': 'your_mysql_password', + 'database': 'your_mysql_database' +} + +AWS_CONFIG = { + 'aws_access_key_id': 'YOUR_AWS_ACCESS_KEY_ID', + 'aws_secret_access_key': 'YOUR_AWS_SECRET_ACCESS_KEY', + 'region_name': 'your_aws_region' # e.g., 'us-east-1' +} + +S3_BUCKET_NAME = 'your-s3-bucket-name' +THUMBNAIL_PREFIX = 'thumbnails/' # Folder where thumbnails will be stored in S3 +THUMBNAIL_SIZE = (128, 128) # Width, Height for thumbnails +THUMBNAIL_OUTPUT_FORMAT = 'JPEG' # Output format for thumbnails + +# --- Helper to connect to DB and S3 --- +def get_db_connection(): + try: + mydb = mysql.connector.connect(**DB_CONFIG) + logger.info("Successfully connected to MySQL database.") + return mydb + except mysql.connector.Error as err: + logger.error(f"Error connecting to MySQL: {err}") + return None + +def get_s3_connection(): + s3_client = get_s3_client(**AWS_CONFIG) + if s3_client: + logger.info("Successfully obtained S3 client.") + else: + logger.error("Failed to obtain S3 client.") + return s3_client + +# --- Core Processing Logic --- +def process_single_image(mycursor, s3_client, image_id, original_image_source_key, base64_image_data=None): + """ + Handles the creation and upload of a thumbnail for a single image, + and updates the database. + """ + logger.info(f"Processing image ID: {image_id}") + image_data_bytes = None + source_type = "unknown" + + if original_image_source_key: + source_type = "S3" + logger.debug(f"Attempting to download image from S3: '{original_image_source_key}'") + image_data_bytes = download_image_from_s3(s3_client, S3_BUCKET_NAME, original_image_source_key) + elif base64_image_data: + source_type = "Base64" + logger.debug(f"Attempting to decode base64 image data for image ID: {image_id}") + image_data_bytes = base64.b64decode(base64_image_data) # We decode here, then pass bytes to create_thumbnail_from_bytes + else: + logger.warning(f"No valid image source (S3 link or base64) for image ID {image_id}. Skipping.") + return + + if image_data_bytes: + logger.debug(f"Image data ({source_type}) obtained for ID: {image_id}. Creating thumbnail...") + thumbnail_bytes = create_thumbnail_from_bytes( + image_data_bytes, + size=THUMBNAIL_SIZE, + output_format=THUMBNAIL_OUTPUT_FORMAT + ) + + if thumbnail_bytes: + # Determine the key for the thumbnail + if original_image_source_key: + thumbnail_s3_key = generate_thumbnail_s3_key(original_image_source_key, THUMBNAIL_PREFIX) + else: + # If only base64 data, we need a way to derive a unique key. + # A simple approach: use the ID and a timestamp or hash. + # In a real scenario, you might want to store the original filename + # or a hash of the base64 data to avoid collisions. + thumbnail_s3_key = f"{THUMBNAIL_PREFIX}base64_image_{image_id}.{THUMBNAIL_OUTPUT_FORMAT.lower()}" + logger.debug(f"Generated base64 thumbnail S3 key: '{thumbnail_s3_key}' for ID: {image_id}") + + content_type = f'image/{THUMBNAIL_OUTPUT_FORMAT.lower()}' # e.g., image/jpeg + if upload_object_to_s3(s3_client, S3_BUCKET_NAME, thumbnail_s3_key, thumbnail_bytes, content_type=content_type): + # Update database with new thumbnailLink + update_sql = "UPDATE images SET thumbnailLink = %s WHERE id = %s" + try: + mycursor.execute(update_sql, (thumbnail_s3_key, image_id)) + mydb.commit() + logger.info(f"Successfully updated database for image ID {image_id} with thumbnail: '{thumbnail_s3_key}'") + except mysql.connector.Error as err: + logger.error(f"Failed to update database for image ID {image_id}: {err}") + mydb.rollback() # Rollback if update fails + else: + logger.error(f"Failed to upload thumbnail for image ID {image_id} to S3.") + else: + logger.error(f"Failed to create thumbnail bytes for image ID {image_id}.") + else: + logger.error(f"No image data retrieved from {source_type} for image ID {image_id}. Skipping thumbnail creation.") + +def process_images_from_s3_link(): + """ + Processes images where `imageLink` is present and `thumbnailLink` is NULL. + """ + logger.info("Starting processing of images from S3 links...") + mydb = get_db_connection() + if not mydb: + logger.critical("Could not establish database connection. Aborting S3 link processing.") + return + + s3 = get_s3_connection() + if not s3: + logger.critical("Could not establish S3 connection. Aborting S3 link processing.") + mydb.close() + return + + mycursor = mydb.cursor() + + try: + sql = "SELECT id, imageLink FROM images WHERE imageLink IS NOT NULL AND thumbnailLink IS NULL" + mycursor.execute(sql) + records = mycursor.fetchall() + + if not records: + logger.info("No images found with S3 link and null thumbnailLink. Skipping this step.") + return + + logger.info(f"Found {len(records)} images with S3 link and null thumbnailLink. Beginning processing...") + + for image_id, image_link in records: + logger.info(f"Initiating processing for image ID: {image_id}, S3 Link: '{image_link}'") + process_single_image(mycursor, s3, image_id, image_link) + + except mysql.connector.Error as err: + logger.error(f"MySQL Error during S3 link processing: {err}") + except Exception as e: + logger.error(f"An unexpected error occurred during S3 link processing: {e}", exc_info=True) # exc_info=True to log traceback + finally: + if 'mycursor' in locals() and mycursor: + mycursor.close() + if mydb.is_connected(): + mydb.close() + logger.info("MySQL connection closed after S3 link processing.") + +def process_images_from_base64_string(): + """ + Processes images where `imageDataBase64` is present and `thumbnailLink` is NULL. + """ + logger.info("Starting processing of images from Base64 strings...") + mydb = get_db_connection() + if not mydb: + logger.critical("Could not establish database connection. Aborting Base64 processing.") + return + + s3 = get_s3_connection() + if not s3: + logger.critical("Could not establish S3 connection. Aborting Base64 processing.") + mydb.close() + return + + mycursor = mydb.cursor() + + try: + sql = "SELECT id, imageLink, imageDataBase64 FROM images WHERE imageDataBase64 IS NOT NULL AND thumbnailLink IS NULL" + mycursor.execute(sql) + records = mycursor.fetchall() + + if not records: + logger.info("No images found with base64 data and null thumbnailLink. Skipping this step.") + return + + logger.info(f"Found {len(records)} images with base64 data and null thumbnailLink. Beginning processing...") + + for image_id, image_link, base64_data in records: + logger.info(f"Initiating processing for image ID: {image_id} from Base64 data.") + if base64_data: + process_single_image(mycursor, s3, image_id, None, base64_data) + else: + logger.warning(f"Base64 data for image ID {image_id} is unexpectedly NULL after query. Skipping.") + + except mysql.connector.Error as err: + logger.error(f"MySQL Error during Base64 processing: {err}") + except Exception as e: + logger.error(f"An unexpected error occurred during Base64 processing: {e}", exc_info=True) # exc_info=True to log traceback + finally: + if 'mycursor' in locals() and mycursor: + mycursor.close() + if mydb.is_connected(): + mydb.close() + logger.info("MySQL connection closed after Base64 processing.") + +if __name__ == "__main__": + logger.info("Application started.") + # Process images from S3 links first + process_images_from_s3_link() + logger.info("--- Finished processing images from S3 links ---") + + # Then process images from base64 strings + process_images_from_base64_string() + logger.info("--- Finished processing images from base64 strings ---") + logger.info("All image thumbnail processing complete. Application exiting.") \ No newline at end of file diff --git a/image-thumbnail/image_processing_utils.py b/image-thumbnail/image_processing_utils.py new file mode 100644 index 0000000..c46ffb2 --- /dev/null +++ b/image-thumbnail/image_processing_utils.py @@ -0,0 +1,45 @@ +from PIL import Image +import io +import base64 +import logging # Import logging + +# Get a logger instance for this module +logger = logging.getLogger(__name__) + +def create_thumbnail_from_bytes(image_data_bytes, size=(128, 128), output_format='JPEG'): + """ + Creates a thumbnail from image data (bytes). + Returns the thumbnail as bytes. + """ + try: + img = Image.open(io.BytesIO(image_data_bytes)) + img.thumbnail(size) + + thumbnail_buffer = io.BytesIO() + # Ensure the output format is supported by Pillow and the original image + if img.mode == 'RGBA' and output_format == 'JPEG': + # Convert RGBA to RGB for JPEG output to avoid errors with alpha channel + img = img.convert('RGB') + logger.debug("Converted RGBA image to RGB for JPEG output.") + img.save(thumbnail_buffer, format=output_format) + thumbnail_buffer.seek(0) + logger.info(f"Thumbnail created from bytes. Size: {size}, Format: {output_format}") + return thumbnail_buffer.getvalue() + except Exception as e: + logger.error(f"Error creating thumbnail from bytes: {e}") + return None + +def create_thumbnail_from_base64(base64_string, size=(128, 128), output_format='JPEG'): + """ + Decodes a base64 image string, creates a thumbnail, and returns the thumbnail as bytes. + """ + try: + decoded_image_data = base64.b64decode(base64_string) + logger.debug("Base64 string decoded successfully.") + return create_thumbnail_from_bytes(decoded_image_data, size, output_format) + except base64.binascii.Error as e: + logger.error(f"Invalid base64 string provided: {e}") + return None + except Exception as e: + logger.error(f"Error creating thumbnail from base64 string: {e}") + return None \ No newline at end of file diff --git a/image-thumbnail/s3_utils.py b/image-thumbnail/s3_utils.py new file mode 100644 index 0000000..d9d8779 --- /dev/null +++ b/image-thumbnail/s3_utils.py @@ -0,0 +1,71 @@ +import boto3 +import os +import logging # Import logging + +# Get a logger instance for this module +logger = logging.getLogger(__name__) + +def get_s3_client(aws_access_key_id, aws_secret_access_key, region_name): + """ + Initializes and returns an S3 client. + """ + try: + s3_client = boto3.client( + 's3', + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + region_name=region_name + ) + logger.debug("S3 client initialized successfully.") + return s3_client + except Exception as e: + logger.error(f"Failed to initialize S3 client: {e}") + return None + +def download_image_from_s3(s3_client, bucket_name, object_key): + """ + Downloads an image from S3 and returns its binary data. + """ + try: + response = s3_client.get_object(Bucket=bucket_name, Key=object_key) + image_data = response['Body'].read() + logger.info(f"Downloaded '{object_key}' from S3 bucket '{bucket_name}'.") + return image_data + except s3_client.exceptions.NoSuchKey: + logger.warning(f"S3 object '{object_key}' not found in bucket '{bucket_name}'.") + return None + except Exception as e: + logger.error(f"Error downloading '{object_key}' from S3 bucket '{bucket_name}': {e}") + return None + +def upload_object_to_s3(s3_client, bucket_name, object_key, data_bytes, content_type='application/octet-stream'): + """ + Uploads binary data to S3. + """ + try: + s3_client.put_object( + Bucket=bucket_name, + Key=object_key, + Body=data_bytes, + ContentType=content_type + ) + logger.info(f"Uploaded object to s3://{bucket_name}/{object_key} with content type '{content_type}'.") + return True + except Exception as e: + logger.error(f"Error uploading '{object_key}' to S3 bucket '{bucket_name}': {e}") + return False + +def generate_thumbnail_s3_key(original_s3_key, thumbnail_prefix): + """ + Generates a suitable S3 key for a thumbnail based on the original key. + E.g., original/path/image.jpg -> thumbnails/original/path/image.jpg + """ + thumbnail_name = os.path.basename(original_s3_key) + thumbnail_directory = os.path.dirname(original_s3_key) + + if thumbnail_directory: + thumbnail_key = f"{thumbnail_prefix}{thumbnail_directory}/{thumbnail_name}" + else: + thumbnail_key = f"{thumbnail_prefix}{thumbnail_name}" + logger.debug(f"Generated thumbnail S3 key: '{thumbnail_key}' from original: '{original_s3_key}'") + return thumbnail_key \ No newline at end of file