diff --git a/.env.example b/.env.example index fd286ca..2ac947b 100644 --- a/.env.example +++ b/.env.example @@ -156,6 +156,19 @@ IMMICH_DB_NAME=immich IMMICH_UPLOAD_LOCATION=/upload IMMICH_ML_URL=http://immich-ml:3003 +# ----------------------------------------------------------------------------- +# Health checks / Observability +# ----------------------------------------------------------------------------- +# Endpoints vérifiés par /healthz (surchargables pour déploiements externes) +HEALTH_NEXTCLOUD_URL={{NEXTCLOUD_URL}}/status.php +HEALTH_IMMICH_URL={{IMMICH_API_URL}}/server-info/ping +# Par défaut on retire /meet de JITSI_PUBLIC_URL pour viser /about/health +HEALTH_JITSI_URL=https://{{DOMAIN}}/about/health +HEALTH_HTTP_TIMEOUT=3s +# Grafana local (monitoring) +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin + # ----------------------------------------------------------------------------- # Mail (Ultimail) — toujours gere par ultid # ----------------------------------------------------------------------------- diff --git a/README.md b/README.md index 9ad1b46..ab7779d 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,26 @@ Secret rotation policy is enforced through: - `MAIL_CREDENTIAL_KEY_ROTATED_AT` - `MAIL_WEBHOOK_SHARED_SECRET_ROTATED_AT` +### Observability (Prometheus / Grafana) + +ultid exposes Prometheus metrics at `/metrics` (see `internal/observability/metrics.go`). The core Docker Compose stack includes Prometheus and Grafana with configs under `deploy/observability/`: + +| File | Purpose | +|------|---------| +| `deploy/observability/prometheus/prometheus.yml` | Scrape ultid + self; loads alert rules | +| `deploy/observability/prometheus/alerts.yml` | Alert rules: IMAP sync stalled, outbox backlog, HTTP 5xx rate | +| `deploy/observability/grafana/ultid-baseline.json` | Baseline dashboard (HTTP latency/errors, IMAP sync, outbox, webhooks) | +| `deploy/observability/grafana/provisioning/` | Grafana datasource + dashboard auto-load | + +Start with the rest of the stack (`./deploy/compose-up.sh up -d`), then open: + +| Service | URL | Notes | +|---------|-----|-------| +| Prometheus | http://localhost:9090 | Targets: `ultid`, `prometheus` | +| Grafana | http://localhost:3000 | Login from `.env` (`GRAFANA_ADMIN_USER` / `GRAFANA_ADMIN_PASSWORD`, default `admin` / `admin`); dashboard **Ultid Baseline** under folder **Ultid** | + +**Alertmanager** — not included in compose; route labels `service=ultid` and `severity` (`critical`, `warning`) to your on-call channels when you add it. + ## Stack | Component | Technology | @@ -128,6 +148,7 @@ Secret rotation policy is enforced through: ├── migrations/ — SQL migrations ├── deploy/ — Docker Compose configs │ ├── docker-compose.yml — Core stack +│ ├── observability/ — Prometheus alerts + Grafana dashboard │ ├── nginx/ │ ├── nextcloud/ │ ├── jitsi/ diff --git a/cmd/ultid/main.go b/cmd/ultid/main.go index 02c5df7..6830757 100644 --- a/cmd/ultid/main.go +++ b/cmd/ultid/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/json" "fmt" "log/slog" "net/http" @@ -15,6 +16,7 @@ import ( "github.com/jackc/pgx/v5/pgxpool" "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/redis/go-redis/v9" "github.com/ultisuite/ulti-backend/internal/api/admin" @@ -34,6 +36,7 @@ import ( "github.com/ultisuite/ulti-backend/internal/mail/smtp" "github.com/ultisuite/ulti-backend/internal/meet" "github.com/ultisuite/ulti-backend/internal/nextcloud" + "github.com/ultisuite/ulti-backend/internal/observability" "github.com/ultisuite/ulti-backend/internal/photos" "github.com/ultisuite/ulti-backend/internal/realtime" "github.com/ultisuite/ulti-backend/internal/search" @@ -125,6 +128,7 @@ func main() { // WebSocket hub hub := realtime.NewHub() + healthChecker := observability.NewHealthChecker(cfg, pool, rdb) // Start background workers go imapsync.NewSyncWorker(pool, cfg.MailSyncInterval, credentialManager).Start(ctx) @@ -144,17 +148,20 @@ func main() { MaxAge: 300, })) r.Use(middleware.TraceID) + r.Use(observability.HTTPMetrics) r.Use(middleware.Logging) r.Get("/healthz", func(w http.ResponseWriter, r *http.Request) { - if err := pool.Ping(r.Context()); err != nil { - w.WriteHeader(http.StatusServiceUnavailable) - w.Write([]byte("db unhealthy")) - return + report := healthChecker.Check(r.Context()) + statusCode := http.StatusOK + if report.Status != "ok" { + statusCode = http.StatusServiceUnavailable } - w.WriteHeader(http.StatusOK) - w.Write([]byte("ok")) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + _ = json.NewEncoder(w).Encode(report) }) + r.Handle("/metrics", promhttp.Handler()) r.Get("/ws", hub.HandleWS) diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index e203759..09b13d5 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -126,6 +126,44 @@ services: keydb: condition: service_healthy + prometheus: + image: prom/prometheus:v2.54.1 + restart: unless-stopped + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + volumes: + - ./observability/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./observability/prometheus/alerts.yml:/etc/prometheus/rules/alerts.yml:ro + - prometheus_data:/prometheus + ports: + - "9090:9090" + networks: + - ulti-net + depends_on: + ultid: + condition: service_started + + grafana: + image: grafana/grafana:11.3.0 + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} + GF_USERS_ALLOW_SIGN_UP: "false" + volumes: + - ./observability/grafana/provisioning:/etc/grafana/provisioning:ro + - ./observability/grafana/ultid-baseline.json:/etc/grafana/dashboards/ultid-baseline.json:ro + - grafana_data:/var/lib/grafana + ports: + - "3000:3000" + networks: + - ulti-net + depends_on: + prometheus: + condition: service_started + networks: ulti-net: driver: bridge @@ -134,3 +172,5 @@ volumes: postgres_data: keydb_data: rustfs_data: + prometheus_data: + grafana_data: diff --git a/deploy/observability/grafana/provisioning/dashboards/dashboards.yml b/deploy/observability/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..f3c8340 --- /dev/null +++ b/deploy/observability/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: ultid + orgId: 1 + folder: Ultid + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/dashboards diff --git a/deploy/observability/grafana/provisioning/datasources/prometheus.yml b/deploy/observability/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..e61ecba --- /dev/null +++ b/deploy/observability/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + uid: prometheus + editable: false diff --git a/deploy/observability/grafana/ultid-baseline.json b/deploy/observability/grafana/ultid-baseline.json new file mode 100644 index 0000000..1c2eaa2 --- /dev/null +++ b/deploy/observability/grafana/ultid-baseline.json @@ -0,0 +1,537 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(ultid_http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p95", + "range": true, + "refId": "A" + } + ], + "title": "HTTP p95 latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.01 + }, + { + "color": "red", + "value": 0.05 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(ultid_http_errors_total[5m])) / sum(rate(ultid_http_requests_total[5m]))", + "legendFormat": "5xx rate", + "range": true, + "refId": "A" + } + ], + "title": "HTTP 5xx rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(ultid_imap_sync_runs_total{outcome=\"success\"}[5m]))", + "legendFormat": "success", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(ultid_imap_sync_runs_total{outcome=\"error\"}[5m]))", + "legendFormat": "error", + "range": true, + "refId": "B" + } + ], + "title": "IMAP sync success/error rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 600 + }, + { + "color": "red", + "value": 900 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "time() - ultid_imap_sync_last_success_timestamp_seconds", + "legendFormat": "age", + "range": true, + "refId": "A" + } + ], + "title": "IMAP last success age", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "ultid_outbox_queue_depth", + "legendFormat": "depth", + "range": true, + "refId": "A" + } + ], + "title": "Outbox queue depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.9 + }, + { + "color": "green", + "value": 0.95 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(ultid_webhook_executions_total{outcome=\"success\"}[5m])) / sum(rate(ultid_webhook_executions_total[5m]))", + "legendFormat": "success ratio", + "range": true, + "refId": "A" + } + ], + "title": "Webhook success ratio", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "ultid", + "ultimail", + "baseline" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Ultid Baseline", + "uid": "ultid-baseline", + "version": 1 +} diff --git a/deploy/observability/prometheus/alerts.yml b/deploy/observability/prometheus/alerts.yml new file mode 100644 index 0000000..5e9869f --- /dev/null +++ b/deploy/observability/prometheus/alerts.yml @@ -0,0 +1,45 @@ +# Prometheus alerting rules for ultid. +# Load via prometheus.yml rule_files or a Prometheus Operator PrometheusRule. +groups: + - name: ultid + rules: + - alert: UltidIMAPSyncBlocked + expr: | + ultid_imap_sync_last_success_timestamp_seconds > 0 + and (time() - ultid_imap_sync_last_success_timestamp_seconds) > 900 + for: 5m + labels: + severity: critical + service: ultid + annotations: + summary: IMAP mail sync has not succeeded recently + description: Mail sync has not completed successfully in over 15 minutes. Check IMAP connectivity and mail account credentials. + + - alert: UltidOutboxBacklogHigh + expr: ultid_outbox_queue_depth > 50 + for: 10m + labels: + severity: warning + service: ultid + annotations: + summary: Outbox queue backlog is high + description: >- + {{ printf "%.0f" $value }} outbox items are queued/sending/scheduled + (threshold 50 for 10m). Outbound mail may be delayed. + + - alert: UltidHTTP5xxRateHigh + expr: | + sum(rate(ultid_http_requests_total[5m])) > 0.5 + and ( + sum(rate(ultid_http_errors_total[5m])) + / sum(rate(ultid_http_requests_total[5m])) + ) > 0.05 + for: 5m + labels: + severity: warning + service: ultid + annotations: + summary: Elevated HTTP 5xx error rate + description: >- + 5xx rate is {{ printf "%.2f" $value }} (threshold 5% over 5m with + sufficient traffic). Check ultid logs and downstream dependencies. diff --git a/deploy/observability/prometheus/prometheus.yml b/deploy/observability/prometheus/prometheus.yml new file mode 100644 index 0000000..106b6f8 --- /dev/null +++ b/deploy/observability/prometheus/prometheus.yml @@ -0,0 +1,16 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +rule_files: + - /etc/prometheus/rules/alerts.yml + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ["localhost:9090"] + + - job_name: ultid + metrics_path: /metrics + static_configs: + - targets: ["ultid:8080"] diff --git a/go.mod b/go.mod index 89070cc..21fdad4 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/ultisuite/ulti-backend -go 1.23 +go 1.23.0 require ( github.com/coder/websocket v1.8.14 @@ -13,11 +13,13 @@ require ( github.com/google/uuid v1.6.0 github.com/jackc/pgx/v5 v5.7.1 github.com/minio/minio-go/v7 v7.0.80 + github.com/prometheus/client_golang v1.23.2 github.com/redis/go-redis/v9 v9.7.0 ) require ( - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/emersion/go-message v0.18.1 // indirect @@ -27,14 +29,21 @@ require ( github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect - github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/kr/text v0.2.0 // indirect github.com/minio/md5-simd v1.1.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/rs/xid v1.6.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/oauth2 v0.24.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/crypto v0.41.0 // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.16.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/text v0.28.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect ) diff --git a/go.sum b/go.sum index 7c24074..d76eff1 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,16 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= github.com/coreos/go-oidc/v3 v3.11.0 h1:Ia3MxdwpSw702YW0xgfmP1GVCMA9aEFWu12XUZ3/OtI= github.com/coreos/go-oidc/v3 v3.11.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -33,8 +36,8 @@ github.com/go-jose/go-jose/v4 v4.0.2 h1:R3l3kkBds16bO7ZFAEEcofK0MkrAJt3jlJznWZG0 github.com/go-jose/go-jose/v4 v4.0.2/go.mod h1:WVf9LFMHh/QVrmqrOfqun0C45tMe3RoiKJMPvgWwLfY= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -45,54 +48,76 @@ github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs= github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= github.com/minio/minio-go/v7 v7.0.80 h1:2mdUHXEykRdY/BigLt3Iuu1otL0JTogT0Nmltg0wujk= github.com/minio/minio-go/v7 v7.0.80/go.mod h1:84gmIilaX4zcvAWWzJ5Z1WI5axN+hAbM5w25xf8xvC0= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E= github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= +golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= -golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= +golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -101,14 +126,18 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/api/middleware/logging.go b/internal/api/middleware/logging.go index 9182636..19998e2 100644 --- a/internal/api/middleware/logging.go +++ b/internal/api/middleware/logging.go @@ -4,6 +4,8 @@ import ( "log/slog" "net/http" "time" + + "github.com/ultisuite/ulti-backend/internal/api/apiresponse" ) type responseWriter struct { @@ -23,11 +25,15 @@ func Logging(next http.Handler) http.Handler { next.ServeHTTP(wrapped, r) - slog.Info("request", + attrs := []any{ "method", r.Method, "path", r.URL.Path, "status", wrapped.status, "duration", time.Since(start), - ) + } + if id := apiresponse.TraceIDFromContext(r.Context()); id != "" { + attrs = append(attrs, "request_id", id) + } + slog.Info("request", attrs...) }) } diff --git a/internal/api/middleware/logging_test.go b/internal/api/middleware/logging_test.go new file mode 100644 index 0000000..6ce41b1 --- /dev/null +++ b/internal/api/middleware/logging_test.go @@ -0,0 +1,108 @@ +package middleware + +import ( + "bytes" + "encoding/json" + "log/slog" + "net/http" + "net/http/httptest" + "testing" + + "github.com/ultisuite/ulti-backend/internal/api/apiresponse" +) + +func withTestLogger(t *testing.T) *bytes.Buffer { + t.Helper() + + var buf bytes.Buffer + old := slog.Default() + slog.SetDefault(slog.New(slog.NewJSONHandler(&buf, nil))) + t.Cleanup(func() { slog.SetDefault(old) }) + return &buf +} + +func parseLogRecord(t *testing.T, buf *bytes.Buffer) map[string]any { + t.Helper() + + var record map[string]any + if err := json.Unmarshal(buf.Bytes(), &record); err != nil { + t.Fatalf("unmarshal log record: %v", err) + } + return record +} + +func TestLoggingIncludesRequestFields(t *testing.T) { + buf := withTestLogger(t) + + handler := Logging(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusCreated) + })) + + req := httptest.NewRequest(http.MethodPost, "/api/messages", nil) + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, req) + + record := parseLogRecord(t, buf) + if record["msg"] != "request" { + t.Fatalf("msg = %v, want request", record["msg"]) + } + if record["method"] != http.MethodPost { + t.Fatalf("method = %v, want POST", record["method"]) + } + if record["path"] != "/api/messages" { + t.Fatalf("path = %v, want /api/messages", record["path"]) + } + if record["status"] != float64(http.StatusCreated) { + t.Fatalf("status = %v, want %d", record["status"], http.StatusCreated) + } + if _, ok := record["duration"]; !ok { + t.Fatal("expected duration field in log record") + } +} + +func TestLoggingIncludesRequestIDFromContext(t *testing.T) { + buf := withTestLogger(t) + + handler := Logging(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + + req := httptest.NewRequest(http.MethodGet, "/", nil) + req = req.WithContext(apiresponse.WithTraceID(req.Context(), "trace-abc")) + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, req) + + record := parseLogRecord(t, buf) + if record["request_id"] != "trace-abc" { + t.Fatalf("request_id = %v, want trace-abc", record["request_id"]) + } +} + +func TestLoggingOmitsRequestIDWhenMissing(t *testing.T) { + buf := withTestLogger(t) + + handler := Logging(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + + req := httptest.NewRequest(http.MethodGet, "/", nil) + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, req) + + record := parseLogRecord(t, buf) + if _, ok := record["request_id"]; ok { + t.Fatalf("request_id = %v, want field omitted", record["request_id"]) + } +} + +func TestLoggingWithTraceMiddlewareUsesClientTraceID(t *testing.T) { + buf := withTestLogger(t) + + handler := TraceID(Logging(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))) + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + req.Header.Set(apiresponse.TraceIDHeader, "client-trace") + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, req) + + record := parseLogRecord(t, buf) + if record["request_id"] != "client-trace" { + t.Fatalf("request_id = %v, want client-trace", record["request_id"]) + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 360162e..e82aee7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -70,6 +70,12 @@ type Config struct { SearchEngine string MeilisearchURL string MeilisearchKey string + + // Observability + HealthNextcloudURL string + HealthImmichURL string + HealthJitsiURL string + HealthHTTPTimeout time.Duration } func Load() (*Config, error) { @@ -129,6 +135,11 @@ func Load() (*Config, error) { SearchEngine: envOrDefault("SEARCH_ENGINE", "postgres"), MeilisearchURL: os.Getenv("MEILISEARCH_URL"), MeilisearchKey: secrets.Env("MEILISEARCH_API_KEY"), + + HealthNextcloudURL: envOrDefault("HEALTH_NEXTCLOUD_URL", joinURL(envOrDefault("NEXTCLOUD_URL", "http://nextcloud:80"), "/status.php")), + HealthImmichURL: envOrDefault("HEALTH_IMMICH_URL", joinURL(envOrDefault("IMMICH_API_URL", "http://immich-server:2283/api"), "/server-info/ping")), + HealthJitsiURL: envOrDefault("HEALTH_JITSI_URL", defaultHealthJitsiURL(envOrDefault("JITSI_PUBLIC_URL", "https://localhost/meet"))), + HealthHTTPTimeout: envDuration("HEALTH_HTTP_TIMEOUT", 3*time.Second), }, nil } @@ -230,3 +241,13 @@ func envTime(key string) time.Time { } return ts } + +func joinURL(base, path string) string { + return strings.TrimRight(base, "/") + path +} + +func defaultHealthJitsiURL(publicURL string) string { + trimmed := strings.TrimRight(publicURL, "/") + trimmed = strings.TrimSuffix(trimmed, "/meet") + return trimmed + "/about/health" +} diff --git a/internal/mail/imap/sync.go b/internal/mail/imap/sync.go index 86560c4..4a56efe 100644 --- a/internal/mail/imap/sync.go +++ b/internal/mail/imap/sync.go @@ -12,6 +12,7 @@ import ( "github.com/emersion/go-imap/v2/imapclient" "github.com/jackc/pgx/v5/pgxpool" "github.com/ultisuite/ulti-backend/internal/mail/credentials" + "github.com/ultisuite/ulti-backend/internal/observability" ) type SyncWorker struct { @@ -37,7 +38,7 @@ func (w *SyncWorker) Start(ctx context.Context) { w.logger.Info("imap sync worker started", "interval", w.interval) // Initial sync - w.syncAllAccounts(ctx) + w.runSyncCycle(ctx) for { select { @@ -45,12 +46,22 @@ func (w *SyncWorker) Start(ctx context.Context) { w.logger.Info("imap sync worker stopped") return case <-ticker.C: - w.syncAllAccounts(ctx) + w.runSyncCycle(ctx) } } } -func (w *SyncWorker) syncAllAccounts(ctx context.Context) { +func (w *SyncWorker) runSyncCycle(ctx context.Context) { + start := time.Now() + if err := w.syncAllAccounts(ctx); err != nil { + observability.ObserveIMAPSync("error", time.Since(start)) + w.logger.Error("sync cycle failed", "error", err) + return + } + observability.ObserveIMAPSync("success", time.Since(start)) +} + +func (w *SyncWorker) syncAllAccounts(ctx context.Context) error { rows, err := w.db.Query(ctx, ` SELECT id, imap_host, imap_port, imap_tls, credentials, sync_state FROM mail_accounts @@ -58,10 +69,11 @@ func (w *SyncWorker) syncAllAccounts(ctx context.Context) { `) if err != nil { w.logger.Error("failed to query accounts", "error", err) - return + return err } defer rows.Close() + hasSyncError := false for rows.Next() { var ( accountID string @@ -73,13 +85,22 @@ func (w *SyncWorker) syncAllAccounts(ctx context.Context) { ) if err := rows.Scan(&accountID, &host, &port, &useTLS, &creds, &syncState); err != nil { w.logger.Error("failed to scan account", "error", err) + hasSyncError = true continue } if err := w.syncAccount(ctx, accountID, host, port, useTLS, creds, syncState); err != nil { w.logger.Error("sync failed", "account_id", accountID, "error", err) + hasSyncError = true } } + if err := rows.Err(); err != nil { + return err + } + if hasSyncError { + return errors.New("one or more account sync failed") + } + return nil } func (w *SyncWorker) syncAccount(ctx context.Context, accountID, host string, port int, useTLS bool, creds, syncState []byte) error { diff --git a/internal/mail/smtp/outbox.go b/internal/mail/smtp/outbox.go index d20bbd3..8cf4538 100644 --- a/internal/mail/smtp/outbox.go +++ b/internal/mail/smtp/outbox.go @@ -7,6 +7,7 @@ import ( "time" "github.com/jackc/pgx/v5/pgxpool" + "github.com/ultisuite/ulti-backend/internal/observability" ) type OutboxProcessor struct { @@ -39,6 +40,7 @@ func (p *OutboxProcessor) Start(ctx context.Context) { case <-ticker.C: p.processQueue(ctx) p.processScheduled(ctx) + p.updateQueueDepth(ctx) } } } @@ -112,11 +114,13 @@ func (p *OutboxProcessor) processQueue(ctx context.Context) { if err := p.sender.Send(ctx, req); err != nil { p.logger.Error("send failed", "outbox_id", id, "error", err) + observability.IncOutboxProcessed("error") _, _ = p.db.Exec(ctx, ` UPDATE outbox SET status = 'queued', retry_count = retry_count + 1, error = $2, updated_at = NOW() WHERE id = $1 `, id, err.Error()) } else { + observability.IncOutboxProcessed("success") _, _ = p.db.Exec(ctx, ` UPDATE outbox SET status = 'sent', sent_at = NOW(), updated_at = NOW() WHERE id = $1 @@ -135,6 +139,19 @@ func (p *OutboxProcessor) processScheduled(ctx context.Context) { } } +func (p *OutboxProcessor) updateQueueDepth(ctx context.Context) { + var count int64 + err := p.db.QueryRow(ctx, ` + SELECT COUNT(*) FROM outbox + WHERE status IN ('queued', 'scheduled', 'sending') + `).Scan(&count) + if err != nil { + p.logger.Error("failed to compute outbox queue depth", "error", err) + return + } + observability.SetOutboxQueueDepth(count) +} + func parseJSONAddresses(data []byte) []string { var addrs []struct { Address string `json:"address"` diff --git a/internal/mail/webhooks/executor.go b/internal/mail/webhooks/executor.go index e0ee756..63970e4 100644 --- a/internal/mail/webhooks/executor.go +++ b/internal/mail/webhooks/executor.go @@ -12,6 +12,7 @@ import ( "time" "github.com/jackc/pgx/v5/pgxpool" + "github.com/ultisuite/ulti-backend/internal/observability" ) type Executor struct { @@ -77,7 +78,8 @@ func (e *Executor) Execute(ctx context.Context, templateID string, msgCtx *Messa } resp, err := e.client.Do(req) - duration := time.Since(start).Milliseconds() + requestDuration := time.Since(start) + durationMS := requestDuration.Milliseconds() var statusCode int var responseBody string @@ -95,17 +97,20 @@ func (e *Executor) Execute(ctx context.Context, templateID string, msgCtx *Messa _, logErr := e.db.Exec(ctx, ` INSERT INTO webhook_logs (template_id, message_id, status_code, response_body, error, duration_ms) VALUES ($1, $2, $3, $4, $5, $6) - `, templateID, msgCtx.MessageID, statusCode, responseBody, execError, duration) + `, templateID, msgCtx.MessageID, statusCode, responseBody, execError, durationMS) if logErr != nil { e.logger.Error("failed to log webhook", "error", logErr) } if err != nil { + observability.ObserveWebhookExecution("error", statusCode, requestDuration) return fmt.Errorf("request failed: %w", err) } if statusCode >= 400 { + observability.ObserveWebhookExecution("error", statusCode, requestDuration) return fmt.Errorf("webhook returned %d", statusCode) } + observability.ObserveWebhookExecution("success", statusCode, requestDuration) return nil } diff --git a/internal/observability/health.go b/internal/observability/health.go new file mode 100644 index 0000000..7ef63e1 --- /dev/null +++ b/internal/observability/health.go @@ -0,0 +1,127 @@ +package observability + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/redis/go-redis/v9" + "github.com/ultisuite/ulti-backend/internal/config" +) + +type DependencyHealth struct { + Name string `json:"name"` + Status string `json:"status"` + LatencyMS int64 `json:"latency_ms"` + Error string `json:"error,omitempty"` +} + +type HealthReport struct { + Status string `json:"status"` + TimestampUTC string `json:"timestamp_utc"` + Checks []DependencyHealth `json:"checks"` +} + +type HealthChecker struct { + db *pgxpool.Pool + redis *redis.Client + httpClient *http.Client + + nextcloudEnabled bool + nextcloudURL string + immichEnabled bool + immichURL string + jitsiEnabled bool + jitsiURL string +} + +func NewHealthChecker(cfg *config.Config, db *pgxpool.Pool, redisClient *redis.Client) *HealthChecker { + timeout := cfg.HealthHTTPTimeout + if timeout <= 0 { + timeout = 3 * time.Second + } + return &HealthChecker{ + db: db, + redis: redisClient, + httpClient: &http.Client{Timeout: timeout}, + nextcloudEnabled: cfg.NextcloudEnabled, + nextcloudURL: cfg.HealthNextcloudURL, + immichEnabled: cfg.ImmichEnabled, + immichURL: cfg.HealthImmichURL, + jitsiEnabled: cfg.JitsiEnabled, + jitsiURL: cfg.HealthJitsiURL, + } +} + +func (h *HealthChecker) Check(ctx context.Context) HealthReport { + checks := []DependencyHealth{ + h.checkDB(ctx), + h.checkRedis(ctx), + } + checks = append(checks, h.checkHTTP(ctx, "nextcloud", h.nextcloudEnabled, h.nextcloudURL)) + checks = append(checks, h.checkHTTP(ctx, "immich", h.immichEnabled, h.immichURL)) + checks = append(checks, h.checkHTTP(ctx, "jitsi", h.jitsiEnabled, h.jitsiURL)) + + status := "ok" + for _, check := range checks { + if check.Status == "down" { + status = "degraded" + break + } + } + + return HealthReport{ + Status: status, + TimestampUTC: time.Now().UTC().Format(time.RFC3339), + Checks: checks, + } +} + +func (h *HealthChecker) checkDB(ctx context.Context) DependencyHealth { + start := time.Now() + err := h.db.Ping(ctx) + return toDependencyHealth("postgres", start, err) +} + +func (h *HealthChecker) checkRedis(ctx context.Context) DependencyHealth { + start := time.Now() + err := h.redis.Ping(ctx).Err() + return toDependencyHealth("keydb", start, err) +} + +func (h *HealthChecker) checkHTTP(ctx context.Context, name string, enabled bool, url string) DependencyHealth { + if !enabled { + return DependencyHealth{Name: name, Status: "disabled"} + } + start := time.Now() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return toDependencyHealth(name, start, err) + } + + resp, err := h.httpClient.Do(req) + if err != nil { + return toDependencyHealth(name, start, err) + } + defer resp.Body.Close() + if resp.StatusCode >= http.StatusBadRequest { + return toDependencyHealth(name, start, fmt.Errorf("unexpected status %d", resp.StatusCode)) + } + return toDependencyHealth(name, start, nil) +} + +func toDependencyHealth(name string, start time.Time, err error) DependencyHealth { + item := DependencyHealth{ + Name: name, + LatencyMS: time.Since(start).Milliseconds(), + } + if err != nil { + item.Status = "down" + item.Error = err.Error() + return item + } + item.Status = "up" + return item +} diff --git a/internal/observability/metrics.go b/internal/observability/metrics.go new file mode 100644 index 0000000..f523d18 --- /dev/null +++ b/internal/observability/metrics.go @@ -0,0 +1,117 @@ +package observability + +import ( + "net/http" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + httpRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "ultid_http_requests_total", + Help: "Total number of HTTP requests.", + }, []string{"method", "path", "status"}) + + httpRequestDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "ultid_http_request_duration_seconds", + Help: "HTTP request latency in seconds.", + Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10}, + }, []string{"method", "path", "status"}) + + httpErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "ultid_http_errors_total", + Help: "Total number of HTTP requests ending with 5xx.", + }, []string{"method", "path", "status"}) + + imapSyncRunsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "ultid_imap_sync_runs_total", + Help: "Total number of IMAP sync cycles.", + }, []string{"outcome"}) + + imapSyncDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "ultid_imap_sync_duration_seconds", + Help: "Duration of IMAP sync cycles.", + Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60, 120, 300}, + }, []string{"outcome"}) + + imapLastSuccessUnix = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "ultid_imap_sync_last_success_timestamp_seconds", + Help: "Unix timestamp of last successful IMAP sync cycle.", + }) + + outboxQueueDepth = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "ultid_outbox_queue_depth", + Help: "Current number of queued/sending/scheduled outbox items.", + }) + + outboxProcessedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "ultid_outbox_processed_total", + Help: "Total number of outbox jobs processed.", + }, []string{"outcome"}) + + webhookExecutionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "ultid_webhook_executions_total", + Help: "Total number of webhook executions.", + }, []string{"outcome", "status_class"}) + + webhookDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "ultid_webhook_duration_seconds", + Help: "Webhook execution latency in seconds.", + Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10}, + }, []string{"outcome"}) +) + +type metricsResponseWriter struct { + http.ResponseWriter + status int +} + +func (rw *metricsResponseWriter) WriteHeader(code int) { + rw.status = code + rw.ResponseWriter.WriteHeader(code) +} + +func HTTPMetrics(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + rw := &metricsResponseWriter{ResponseWriter: w, status: http.StatusOK} + next.ServeHTTP(rw, r) + + status := strconv.Itoa(rw.status) + labels := []string{r.Method, r.URL.Path, status} + + httpRequestsTotal.WithLabelValues(labels...).Inc() + httpRequestDurationSeconds.WithLabelValues(labels...).Observe(time.Since(start).Seconds()) + if rw.status >= http.StatusInternalServerError { + httpErrorsTotal.WithLabelValues(labels...).Inc() + } + }) +} + +func ObserveIMAPSync(outcome string, duration time.Duration) { + imapSyncRunsTotal.WithLabelValues(outcome).Inc() + imapSyncDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds()) + if outcome == "success" { + imapLastSuccessUnix.SetToCurrentTime() + } +} + +func SetOutboxQueueDepth(depth int64) { + outboxQueueDepth.Set(float64(depth)) +} + +func IncOutboxProcessed(outcome string) { + outboxProcessedTotal.WithLabelValues(outcome).Inc() +} + +func ObserveWebhookExecution(outcome string, statusCode int, duration time.Duration) { + statusClass := "none" + if statusCode > 0 { + statusClass = strconv.Itoa(statusCode/100) + "xx" + } + webhookExecutionsTotal.WithLabelValues(outcome, statusClass).Inc() + webhookDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds()) +} diff --git a/project-plan/checklist-execution.md b/project-plan/checklist-execution.md index 339a6ba..726eb5d 100644 --- a/project-plan/checklist-execution.md +++ b/project-plan/checklist-execution.md @@ -50,11 +50,11 @@ Objectif: transformer état actuel (partiellement implémenté) vers produit fon ### Observabilité & exploitation -- [ ] Ajouter request-id/correlation-id dans logs. -- [ ] Ajouter métriques Prometheus (latence, erreurs, jobs sync, queue outbox, webhook success rate). -- [ ] Ajouter health checks détaillés (DB, Redis/KeyDB, Nextcloud, Immich, Jitsi). -- [ ] Ajouter dashboard Grafana baseline. -- [ ] Définir alerting (mail sync bloquée, outbox bloquée, erreurs 5xx anormales). +- [x] Ajouter request-id/correlation-id dans logs. +- [x] Ajouter métriques Prometheus (latence, erreurs, jobs sync, queue outbox, webhook success rate). +- [x] Ajouter health checks détaillés (DB, Redis/KeyDB, Nextcloud, Immich, Jitsi). +- [x] Ajouter dashboard Grafana baseline. +- [x] Définir alerting (mail sync bloquée, outbox bloquée, erreurs 5xx anormales). ### Tests & CI