ultisuite-backend/internal/observability/health.go
R3D347HR4Y 2057ccd816 Add observability features with Prometheus and Grafana integration
- Introduced health checks for Nextcloud, Immich, and Jitsi in the .env.example file.
- Implemented Prometheus metrics for HTTP requests, IMAP sync, outbox processing, and webhook executions.
- Added Grafana configuration files for dashboards and data sources.
- Updated Docker Compose to include Prometheus and Grafana services.
- Enhanced logging middleware to include request IDs and metrics tracking.
- Created health checker for monitoring database and external service statuses.
- Updated README with observability setup instructions and service URLs.
2026-05-22 16:17:10 +02:00

128 lines
3.3 KiB
Go

package observability
import (
"context"
"fmt"
"net/http"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/redis/go-redis/v9"
"github.com/ultisuite/ulti-backend/internal/config"
)
type DependencyHealth struct {
Name string `json:"name"`
Status string `json:"status"`
LatencyMS int64 `json:"latency_ms"`
Error string `json:"error,omitempty"`
}
type HealthReport struct {
Status string `json:"status"`
TimestampUTC string `json:"timestamp_utc"`
Checks []DependencyHealth `json:"checks"`
}
type HealthChecker struct {
db *pgxpool.Pool
redis *redis.Client
httpClient *http.Client
nextcloudEnabled bool
nextcloudURL string
immichEnabled bool
immichURL string
jitsiEnabled bool
jitsiURL string
}
func NewHealthChecker(cfg *config.Config, db *pgxpool.Pool, redisClient *redis.Client) *HealthChecker {
timeout := cfg.HealthHTTPTimeout
if timeout <= 0 {
timeout = 3 * time.Second
}
return &HealthChecker{
db: db,
redis: redisClient,
httpClient: &http.Client{Timeout: timeout},
nextcloudEnabled: cfg.NextcloudEnabled,
nextcloudURL: cfg.HealthNextcloudURL,
immichEnabled: cfg.ImmichEnabled,
immichURL: cfg.HealthImmichURL,
jitsiEnabled: cfg.JitsiEnabled,
jitsiURL: cfg.HealthJitsiURL,
}
}
func (h *HealthChecker) Check(ctx context.Context) HealthReport {
checks := []DependencyHealth{
h.checkDB(ctx),
h.checkRedis(ctx),
}
checks = append(checks, h.checkHTTP(ctx, "nextcloud", h.nextcloudEnabled, h.nextcloudURL))
checks = append(checks, h.checkHTTP(ctx, "immich", h.immichEnabled, h.immichURL))
checks = append(checks, h.checkHTTP(ctx, "jitsi", h.jitsiEnabled, h.jitsiURL))
status := "ok"
for _, check := range checks {
if check.Status == "down" {
status = "degraded"
break
}
}
return HealthReport{
Status: status,
TimestampUTC: time.Now().UTC().Format(time.RFC3339),
Checks: checks,
}
}
func (h *HealthChecker) checkDB(ctx context.Context) DependencyHealth {
start := time.Now()
err := h.db.Ping(ctx)
return toDependencyHealth("postgres", start, err)
}
func (h *HealthChecker) checkRedis(ctx context.Context) DependencyHealth {
start := time.Now()
err := h.redis.Ping(ctx).Err()
return toDependencyHealth("keydb", start, err)
}
func (h *HealthChecker) checkHTTP(ctx context.Context, name string, enabled bool, url string) DependencyHealth {
if !enabled {
return DependencyHealth{Name: name, Status: "disabled"}
}
start := time.Now()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return toDependencyHealth(name, start, err)
}
resp, err := h.httpClient.Do(req)
if err != nil {
return toDependencyHealth(name, start, err)
}
defer resp.Body.Close()
if resp.StatusCode >= http.StatusBadRequest {
return toDependencyHealth(name, start, fmt.Errorf("unexpected status %d", resp.StatusCode))
}
return toDependencyHealth(name, start, nil)
}
func toDependencyHealth(name string, start time.Time, err error) DependencyHealth {
item := DependencyHealth{
Name: name,
LatencyMS: time.Since(start).Milliseconds(),
}
if err != nil {
item.Status = "down"
item.Error = err.Error()
return item
}
item.Status = "up"
return item
}