1 unstable release
0.0.0 | Aug 30, 2024 |
---|
#34 in #nginx
9KB
83 lines
Ramon
⚠️ This project is a WIP and not yet functional. All configuration is subject to change.
Ramon is a lightweight server monitoring framework. It runs actions (e.g. sending emails) when certain conditions are met. For example, you can configure Ramon to send an email every time an SSH connection from a new IP address is established. Or send an email when a service goes down. Perhaps you would like daily emails of any 5xx status codes that occurred. Ramon makes this easy.
Ramon's design is heavily inspired by fail2ban and Tasker.
Examples
Setup
[notify.default]
smtp = "localhost:587"
from = "ramon@{{host}}"
to = "you@{{host}}"
limit = "10/m"
# When a notification is dispatched, wait 10 seconds to see
# if another action of the same type occurs, and aggregate
# them into one notification. If this process repeats for
# longer than 1 minute, send the notification immediately.
aggregate = "10s"
aggregate_timeout = "1m"
notify = "default"
# Aggregate all info notifications, and send them all in
# one email at 8:00AM daily.
[notify.type.info]
aggregate = "* * * 8:00AM"
# Do not aggregate critical notifications.
[notify.type.critical]
aggregate = "0s"
SSH
# Log each login from a new IP
[var]
ssh_ips = { length = 64, store = true }
[monitor.ssh_login]
service = "ssh"
match_log = '^.*\]: Accepted \S+ for (?<user>\S+) from (?<ip>)''
if = { "!ssh_ips" = "{{ip}}" }
push = { ssh_ips = "{{ip}}" }
notify = { type = "critical", title = "New SSH login from {{ip}} to {{user}}@{{host}}" }
# Alternatively, log every login
[monitor.ssh_login]
service = "ssh"
match_log = '^.*\]: Accepted \S+ for (?<user>\S+) from (?<ip>)'
[[monitor.ssh_login.actions]]
if = { ssh_ips = "{{ip}}" }
# We only send these info emails once per day.
notify = { type = "info", title = "SSH login to {{user}}@{{host}}" }
[[monitor.ssh_login.actions]]
if = { "!ssh_ips" = "{{ip}}" }
push = { ssh_ips = "{{ip}}" }
notify = { type = "critical", title = "New SSH login from {{ip}} to {{user}}@{{host}}" }
System resources
[monitor.cpu]
cpu = ">90"
duration = "2m"
notify = { type = "warn", title = "[{{host}}] CPU > 90% for 2m" }
cooldown = "1h"
[monitor.ram]
ram = ">90"
swap = ">50"
notify = { type = "warn", title = "[{{host}}] RAM: {{ram}}, swap: {{swap}}" }
cooldown = "1h"
Nginx
[var]
nginx_log = "/var/log/nginx/access.log"
[monitor.nginx_5xx]
log = "{{nginx_log}}"
match_log = '^\S+ \S+ \S+ \[.+\] "(?<path>.*)" (?<code>5\d{2})'
notify = { type = "error", title = "Server error: {{code}} at {{path}}" }
# Report 404s generated by browsers.
# We can determine browsers by seeing if
# they successfully GET a .css file.
[var]
humans = { length = 64 }
[monitor.mark_human]
log = "{{nginx_log}}"
match_log = '^(?<ip>\S+) \S+ \S+ \[.+\] ".*\.css" 200'
push = { human_ips = "{{ip}}" }
[monitor.nginx_404]
log = "{{nginx_log}}"
match_log = '^(?<ip>\S+) \S+ \S+ \[.+\] "(?<path>.*)" 404'
if = { human_ips = "{{ip}}" }
notify = { type = "info", title = "404 at {{path}}" }
HTTP
[monitor.example_endpoint]
every = "5m"
get = "https://example.com/endpoint"
if = { "!err" = "" }
notify = { type = "error", title = "{{url}}: {{err}}" }
systemd
[monitor.services]
on = [ "service_fail" ]
notify = { type = "error", title = "Service failed: {{service}}" }
[monitor.critical_service]
service = "criticald"
on = [ "service_fail" ]
notify = { type = "critical", title = "Critical service failed! Restarting..." }
System integrity
[monitor.etc_passwd]
watch = "/etc/passwd"
notify = { type = "critical", title = "File changed: /etc/passwd" }
[monitor.ports]
on = [ "port_open" ]
notify = { type = "critical", title = "New port opened: {{port}}" }
Dependencies
~490–770KB
~16K SLoC