release-scripts/python-process-monitor/python-process-monitor.py

115 lines
3.8 KiB
Python

#!/usr/bin/env python3
import subprocess
import smtplib
import os
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dotenv import load_dotenv
load_dotenv()
# ----------------- CONFIG -----------------
services = os.getenv("SERVICES", "").split(",")
containers = os.getenv("CONTAINERS", "").split(",") # update with your container names
sender_email = os.getenv("SENDER_EMAIL", "")
receiver_email =os.getenv("RECEIVER_EMAIL", "").split(",")
smtp_server = os.getenv("SMTP_SERVER", "")
smtp_port = os.getenv("SMTP_PORT", "")
smtp_user = os.getenv("SMTP_USER", "")
smtp_password = os.getenv("SMTP_PASSWORD", "")
# ------------------------------------------
def run_cmd(cmd):
return subprocess.getoutput(cmd)
def check_service(service):
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
if status.returncode == 0:
return True, ""
else:
for attempt in range(2):
print("Attempting to start: " + service)
subprocess.run(["systemctl", "restart", service])
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
if status.returncode == 0:
return True, ""
logs = run_cmd(f"journalctl -u {service} -n 20 --no-pager")
return False, logs
def check_container(container):
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
if status.strip() == "true":
return True, ""
else:
for attempt in range(2):
run_cmd(f"docker restart {container}")
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
if status.strip() == "true":
return True, ""
logs = run_cmd(f"docker logs --tail 20 {container}")
return False, logs
def send_email(failures):
if not failures:
return
msg = MIMEMultipart("alternative")
msg["From"] = sender_email
msg["To"] = ", ".join(receiver_email)
msg["Subject"] = "Service/Container Failure Report"
# HTML Email Body
html = """
<html>
<body style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;">
<h2 style="color: #dc3545;">⚠️ Failure Report</h2>
<p>The following services/containers failed even after restart attempts:</p>
<hr>
"""
for name, logs in failures.items():
html += f"""
<div style="margin-bottom: 20px; padding: 10px; background-color: #fff3cd; border-left: 5px solid #dc3545;">
<h3 style="margin: 0; color: #721c24;">❌ {name} Failed</h3>
<p><b>Last 20 log lines:</b></p>
<pre style="background-color: #f1f1f1; padding: 10px; border-radius: 5px; max-height: 300px; overflow-y: auto; font-size: 12px; color: #212529;">{logs}</pre>
</div>
"""
html += """
<hr>
<p style="color: #6c757d;">This is an automated alert. Please check the server immediately.</p>
</body>
</html>
"""
msg.attach(MIMEText(html, "html"))
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(smtp_user, smtp_password)
server.sendmail(sender_email, receiver_email, msg.as_string())
if __name__ == "__main__":
failures = {}
print("inside __main__")
for service in services:
print("looping services: " + service)
ok, logs = check_service(service)
print(ok)
print(logs)
if not ok:
failures[service] = logs
for container in containers:
ok, logs = check_container(container)
print("looping containers: " + container)
print(ok)
print(logs)
if not ok:
failures[f"Docker: {container}"] = logs
print(failures)
send_email(failures)