- Introduced health checks for Nextcloud, Immich, and Jitsi in the .env.example file. - Implemented Prometheus metrics for HTTP requests, IMAP sync, outbox processing, and webhook executions. - Added Grafana configuration files for dashboards and data sources. - Updated Docker Compose to include Prometheus and Grafana services. - Enhanced logging middleware to include request IDs and metrics tracking. - Created health checker for monitoring database and external service statuses. - Updated README with observability setup instructions and service URLs.
46 lines
1.6 KiB
YAML
46 lines
1.6 KiB
YAML
# Prometheus alerting rules for ultid.
|
|
# Load via prometheus.yml rule_files or a Prometheus Operator PrometheusRule.
|
|
groups:
|
|
- name: ultid
|
|
rules:
|
|
- alert: UltidIMAPSyncBlocked
|
|
expr: |
|
|
ultid_imap_sync_last_success_timestamp_seconds > 0
|
|
and (time() - ultid_imap_sync_last_success_timestamp_seconds) > 900
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
service: ultid
|
|
annotations:
|
|
summary: IMAP mail sync has not succeeded recently
|
|
description: Mail sync has not completed successfully in over 15 minutes. Check IMAP connectivity and mail account credentials.
|
|
|
|
- alert: UltidOutboxBacklogHigh
|
|
expr: ultid_outbox_queue_depth > 50
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
service: ultid
|
|
annotations:
|
|
summary: Outbox queue backlog is high
|
|
description: >-
|
|
{{ printf "%.0f" $value }} outbox items are queued/sending/scheduled
|
|
(threshold 50 for 10m). Outbound mail may be delayed.
|
|
|
|
- alert: UltidHTTP5xxRateHigh
|
|
expr: |
|
|
sum(rate(ultid_http_requests_total[5m])) > 0.5
|
|
and (
|
|
sum(rate(ultid_http_errors_total[5m]))
|
|
/ sum(rate(ultid_http_requests_total[5m]))
|
|
) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: ultid
|
|
annotations:
|
|
summary: Elevated HTTP 5xx error rate
|
|
description: >-
|
|
5xx rate is {{ printf "%.2f" $value }} (threshold 5% over 5m with
|
|
sufficient traffic). Check ultid logs and downstream dependencies.
|