Add systemd monitor for our all services like mysql, mongod and some container
Refer : https://wiki.marcoaiot.com/index.php/Systemd_Monitor for more details about setup and maintenance
This commit is contained in:
parent
3c379a9bf2
commit
023691d062
11
python-process-monitor/.env
Normal file
11
python-process-monitor/.env
Normal file
@ -0,0 +1,11 @@
|
||||
# Services & Containers
|
||||
SERVICES=nginx,rocketchat,mongod,mysql
|
||||
CONTAINERS=redmine-app,mediawiki-app,sonarqube,postgres-sonar,ecc269bb3ba3
|
||||
|
||||
# Mail settings
|
||||
SENDER_EMAIL=marcoioitsoft@gmail.com
|
||||
RECEIVER_EMAIL=umesh@marcoaiot.com,vikas@marcoaiot.com
|
||||
SMTP_SERVER=smtp.gmail.com
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=marcoioitsoft@gmail.com
|
||||
SMTP_PASSWORD=qrtq wfuj hwpp fhqr
|
114
python-process-monitor/python-process-monitor.py
Normal file
114
python-process-monitor/python-process-monitor.py
Normal file
@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import subprocess
|
||||
import smtplib
|
||||
import os
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ----------------- CONFIG -----------------
|
||||
services = os.getenv("SERVICES", "").split(",")
|
||||
containers = os.getenv("CONTAINERS", "").split(",") # update with your container names
|
||||
|
||||
sender_email = os.getenv("SENDER_EMAIL", "")
|
||||
receiver_email =os.getenv("RECEIVER_EMAIL", "").split(",")
|
||||
smtp_server = os.getenv("SMTP_SERVER", "")
|
||||
smtp_port = os.getenv("SMTP_PORT", "")
|
||||
smtp_user = os.getenv("SMTP_USER", "")
|
||||
smtp_password = os.getenv("SMTP_PASSWORD", "")
|
||||
# ------------------------------------------
|
||||
|
||||
def run_cmd(cmd):
|
||||
return subprocess.getoutput(cmd)
|
||||
|
||||
def check_service(service):
|
||||
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
|
||||
if status.returncode == 0:
|
||||
return True, ""
|
||||
else:
|
||||
for attempt in range(2):
|
||||
print("Attempting to start: " + service)
|
||||
subprocess.run(["systemctl", "restart", service])
|
||||
status = subprocess.run(["systemctl", "is-active", service], capture_output=True, text=True)
|
||||
if status.returncode == 0:
|
||||
return True, ""
|
||||
logs = run_cmd(f"journalctl -u {service} -n 20 --no-pager")
|
||||
return False, logs
|
||||
|
||||
def check_container(container):
|
||||
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
|
||||
if status.strip() == "true":
|
||||
return True, ""
|
||||
else:
|
||||
for attempt in range(2):
|
||||
run_cmd(f"docker restart {container}")
|
||||
status = run_cmd(f"docker inspect -f '{{{{.State.Running}}}}' {container}")
|
||||
if status.strip() == "true":
|
||||
return True, ""
|
||||
logs = run_cmd(f"docker logs --tail 20 {container}")
|
||||
return False, logs
|
||||
|
||||
def send_email(failures):
|
||||
if not failures:
|
||||
return
|
||||
|
||||
msg = MIMEMultipart("alternative")
|
||||
msg["From"] = sender_email
|
||||
msg["To"] = ", ".join(receiver_email)
|
||||
msg["Subject"] = "Service/Container Failure Report"
|
||||
|
||||
# HTML Email Body
|
||||
html = """
|
||||
<html>
|
||||
<body style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;">
|
||||
<h2 style="color: #dc3545;">⚠️ Failure Report</h2>
|
||||
<p>The following services/containers failed even after restart attempts:</p>
|
||||
<hr>
|
||||
"""
|
||||
|
||||
for name, logs in failures.items():
|
||||
html += f"""
|
||||
<div style="margin-bottom: 20px; padding: 10px; background-color: #fff3cd; border-left: 5px solid #dc3545;">
|
||||
<h3 style="margin: 0; color: #721c24;">❌ {name} Failed</h3>
|
||||
<p><b>Last 20 log lines:</b></p>
|
||||
<pre style="background-color: #f1f1f1; padding: 10px; border-radius: 5px; max-height: 300px; overflow-y: auto; font-size: 12px; color: #212529;">{logs}</pre>
|
||||
</div>
|
||||
"""
|
||||
|
||||
html += """
|
||||
<hr>
|
||||
<p style="color: #6c757d;">This is an automated alert. Please check the server immediately.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
msg.attach(MIMEText(html, "html"))
|
||||
|
||||
with smtplib.SMTP(smtp_server, smtp_port) as server:
|
||||
server.starttls()
|
||||
server.login(smtp_user, smtp_password)
|
||||
server.sendmail(sender_email, receiver_email, msg.as_string())
|
||||
|
||||
if __name__ == "__main__":
|
||||
failures = {}
|
||||
print("inside __main__")
|
||||
for service in services:
|
||||
print("looping services: " + service)
|
||||
ok, logs = check_service(service)
|
||||
print(ok)
|
||||
print(logs)
|
||||
if not ok:
|
||||
failures[service] = logs
|
||||
|
||||
for container in containers:
|
||||
ok, logs = check_container(container)
|
||||
print("looping containers: " + container)
|
||||
print(ok)
|
||||
print(logs)
|
||||
if not ok:
|
||||
failures[f"Docker: {container}"] = logs
|
||||
print(failures)
|
||||
send_email(failures)
|
Loading…
x
Reference in New Issue
Block a user