diff --git a/python-process-monitor/.env b/python-process-monitor/.env new file mode 100644 index 0000000..0bb26c0 --- /dev/null +++ b/python-process-monitor/.env @@ -0,0 +1,11 @@ +# Services & Containers +SERVICES=nginx,rocketchat,mongod,mysql +CONTAINERS=redmine-app,mediawiki-app,sonarqube,postgres-sonar,ecc269bb3ba3 + +# Mail settings +SENDER_EMAIL=marcoioitsoft@gmail.com +RECEIVER_EMAIL=umesh@marcoaiot.com,vikas@marcoaiot.com +SMTP_SERVER=smtp.gmail.com +SMTP_PORT=587 +SMTP_USER=marcoioitsoft@gmail.com +SMTP_PASSWORD=qrtq wfuj hwpp fhqr \ No newline at end of file diff --git a/python-process-monitor/python-process-monitor.py b/python-process-monitor/python-process-monitor.py new file mode 100644 index 0000000..ea15b81 --- /dev/null +++ b/python-process-monitor/python-process-monitor.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import subprocess +import smtplib +import os +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from dotenv import load_dotenv + +load_dotenv() + +# ----------------- CONFIG ----------------- +services = os.getenv("SERVICES", "").split(",") +containers = os.getenv("CONTAINERS", "").split(",") # update with your container names + +sender_email = os.getenv("SENDER_EMAIL", "") +receiver_email =os.getenv("RECEIVER_EMAIL", "").split(",") +smtp_server = os.getenv("SMTP_SERVER", "") +smtp_port = os.getenv("SMTP_PORT", "") +smtp_user = os.getenv("SMTP_USER", "") +smtp_password = os.getenv("SMTP_PASSWORD", "") +# ------------------------------------------ + +def run_cmd(cmd): + return subprocess.getoutput(cmd) + +def check_service(service): + status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True) + if status.returncode == 0: + return True, "" + else: + for attempt in range(2): + print("Attempting to start: " + service) + subprocess.run(["systemctl", "restart", service]) + status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True) + if status.returncode == 0: + return True, "" + logs = run_cmd(f"journalctl -u {service} -n 20 --no-pager") + return False, logs + +def check_container(container): + status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}") + if status.strip() == "true": + return True, "" + else: + for attempt in range(2): + run_cmd(f"docker restart {container}") + status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}") + if status.strip() == "true": + return True, "" + logs = run_cmd(f"docker logs --tail 20 {container}") + return False, logs + +def send_email(failures): + if not failures: + return + + msg = MIMEMultipart("alternative") + msg["From"] = sender_email + msg["To"] = ", ".join(receiver_email) + msg["Subject"] = "Service/Container Failure Report" + + # HTML Email Body + html = """ + +
+The following services/containers failed even after restart attempts:
+Last 20 log lines:
+{logs}+
This is an automated alert. Please check the server immediately.
+ + + """ + + msg.attach(MIMEText(html, "html")) + + with smtplib.SMTP(smtp_server, smtp_port) as server: + server.starttls() + server.login(smtp_user, smtp_password) + server.sendmail(sender_email, receiver_email, msg.as_string()) + +if __name__ == "__main__": + failures = {} + print("inside __main__") + for service in services: + print("looping services: " + service) + ok, logs = check_service(service) + print(ok) + print(logs) + if not ok: + failures[service] = logs + + for container in containers: + ok, logs = check_container(container) + print("looping containers: " + container) + print(ok) + print(logs) + if not ok: + failures[f"Docker: {container}"] = logs + print(failures) + send_email(failures)