Refer : https://wiki.marcoaiot.com/index.php/Systemd_Monitor for more details about setup and maintenance
115 lines
3.8 KiB
Python
115 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import subprocess
|
|
import smtplib
|
|
import os
|
|
from email.mime.text import MIMEText
|
|
from email.mime.multipart import MIMEMultipart
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
# ----------------- CONFIG -----------------
|
|
services = os.getenv("SERVICES", "").split(",")
|
|
containers = os.getenv("CONTAINERS", "").split(",") # update with your container names
|
|
|
|
sender_email = os.getenv("SENDER_EMAIL", "")
|
|
receiver_email =os.getenv("RECEIVER_EMAIL", "").split(",")
|
|
smtp_server = os.getenv("SMTP_SERVER", "")
|
|
smtp_port = os.getenv("SMTP_PORT", "")
|
|
smtp_user = os.getenv("SMTP_USER", "")
|
|
smtp_password = os.getenv("SMTP_PASSWORD", "")
|
|
# ------------------------------------------
|
|
|
|
def run_cmd(cmd):
|
|
return subprocess.getoutput(cmd)
|
|
|
|
def check_service(service):
|
|
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
|
|
if status.returncode == 0:
|
|
return True, ""
|
|
else:
|
|
for attempt in range(2):
|
|
print("Attempting to start: " + service)
|
|
subprocess.run(["systemctl", "restart", service])
|
|
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
|
|
if status.returncode == 0:
|
|
return True, ""
|
|
logs = run_cmd(f"journalctl -u {service} -n 20 --no-pager")
|
|
return False, logs
|
|
|
|
def check_container(container):
|
|
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
|
|
if status.strip() == "true":
|
|
return True, ""
|
|
else:
|
|
for attempt in range(2):
|
|
run_cmd(f"docker restart {container}")
|
|
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
|
|
if status.strip() == "true":
|
|
return True, ""
|
|
logs = run_cmd(f"docker logs --tail 20 {container}")
|
|
return False, logs
|
|
|
|
def send_email(failures):
|
|
if not failures:
|
|
return
|
|
|
|
msg = MIMEMultipart("alternative")
|
|
msg["From"] = sender_email
|
|
msg["To"] = ", ".join(receiver_email)
|
|
msg["Subject"] = "Service/Container Failure Report"
|
|
|
|
# HTML Email Body
|
|
html = """
|
|
<html>
|
|
<body style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;">
|
|
<h2 style="color: #dc3545;">⚠️ Failure Report</h2>
|
|
<p>The following services/containers failed even after restart attempts:</p>
|
|
<hr>
|
|
"""
|
|
|
|
for name, logs in failures.items():
|
|
html += f"""
|
|
<div style="margin-bottom: 20px; padding: 10px; background-color: #fff3cd; border-left: 5px solid #dc3545;">
|
|
<h3 style="margin: 0; color: #721c24;">❌ {name} Failed</h3>
|
|
<p><b>Last 20 log lines:</b></p>
|
|
<pre style="background-color: #f1f1f1; padding: 10px; border-radius: 5px; max-height: 300px; overflow-y: auto; font-size: 12px; color: #212529;">{logs}</pre>
|
|
</div>
|
|
"""
|
|
|
|
html += """
|
|
<hr>
|
|
<p style="color: #6c757d;">This is an automated alert. Please check the server immediately.</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
msg.attach(MIMEText(html, "html"))
|
|
|
|
with smtplib.SMTP(smtp_server, smtp_port) as server:
|
|
server.starttls()
|
|
server.login(smtp_user, smtp_password)
|
|
server.sendmail(sender_email, receiver_email, msg.as_string())
|
|
|
|
if __name__ == "__main__":
|
|
failures = {}
|
|
print("inside __main__")
|
|
for service in services:
|
|
print("looping services: " + service)
|
|
ok, logs = check_service(service)
|
|
print(ok)
|
|
print(logs)
|
|
if not ok:
|
|
failures[service] = logs
|
|
|
|
for container in containers:
|
|
ok, logs = check_container(container)
|
|
print("looping containers: " + container)
|
|
print(ok)
|
|
print(logs)
|
|
if not ok:
|
|
failures[f"Docker: {container}"] = logs
|
|
print(failures)
|
|
send_email(failures)
|