ultisuite-backend/internal/observability/metrics.go
R3D347HR4Y 7143a36c19
Some checks are pending
CI / Go tests (push) Waiting to run
CI / Integration tests (push) Waiting to run
CI / DB migrations (push) Waiting to run
feat(mail): integrate Stalwart hosted mail and migration features
- Added configuration options for Stalwart hosted mail in .env.example.
- Updated Docker Compose to include Stalwart service with health checks.
- Introduced new API endpoints for managing mail domains and migration projects.
- Enhanced Authentik blueprints for user enrollment and post-migration security.
- Updated OAuth handling for Google and Microsoft migration processes.
- Improved error handling and response structures in the mail API.
- Added integration tests for email claiming and migration workflows.
2026-06-13 12:47:08 +02:00

185 lines
5.9 KiB
Go

package observability
import (
"net/http"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
httpRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_http_requests_total",
Help: "Total number of HTTP requests.",
}, []string{"method", "path", "status"})
httpRequestDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "ultid_http_request_duration_seconds",
Help: "HTTP request latency in seconds.",
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
}, []string{"method", "path", "status"})
httpErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_http_errors_total",
Help: "Total number of HTTP requests ending with 5xx.",
}, []string{"method", "path", "status"})
imapSyncRunsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_imap_sync_runs_total",
Help: "Total number of IMAP sync cycles.",
}, []string{"outcome"})
imapSyncDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "ultid_imap_sync_duration_seconds",
Help: "Duration of IMAP sync cycles.",
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60, 120, 300},
}, []string{"outcome"})
imapLastSuccessUnix = promauto.NewGauge(prometheus.GaugeOpts{
Name: "ultid_imap_sync_last_success_timestamp_seconds",
Help: "Unix timestamp of last successful IMAP sync cycle.",
})
outboxQueueDepth = promauto.NewGauge(prometheus.GaugeOpts{
Name: "ultid_outbox_queue_depth",
Help: "Current number of queued/sending/scheduled outbox items.",
})
outboxProcessedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_outbox_processed_total",
Help: "Total number of outbox jobs processed.",
}, []string{"outcome"})
webhookExecutionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_webhook_executions_total",
Help: "Total number of webhook executions.",
}, []string{"outcome", "status_class"})
webhookDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "ultid_webhook_duration_seconds",
Help: "Webhook execution latency in seconds.",
Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
}, []string{"outcome"})
webhookRetriesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_webhook_retries_total",
Help: "Total number of webhook retries.",
}, []string{"reason"})
webhookDeadLettersTotal = promauto.NewCounter(prometheus.CounterOpts{
Name: "ultid_webhook_dead_letters_total",
Help: "Total number of webhook executions moved to dead-letter.",
})
webhookPayloadTruncatedTotal = promauto.NewCounter(prometheus.CounterOpts{
Name: "ultid_webhook_payload_truncated_total",
Help: "Total number of webhook payloads truncated in logs.",
})
migrationJobsProcessedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ultid_migration_jobs_processed_total",
Help: "Total number of migration worker job runs.",
}, []string{"service", "outcome"})
migrationJobDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "ultid_migration_job_duration_seconds",
Help: "Migration worker job run duration in seconds.",
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60, 120, 300},
}, []string{"service", "outcome"})
migrationPendingJobs = promauto.NewGauge(prometheus.GaugeOpts{
Name: "ultid_migration_pending_jobs",
Help: "Migration jobs picked up on the latest worker tick.",
})
migrationRateLimitRetriesTotal = promauto.NewCounter(prometheus.CounterOpts{
Name: "ultid_migration_rate_limit_retries_total",
Help: "Total number of migration provider API 429 retries.",
})
)
type metricsResponseWriter struct {
http.ResponseWriter
status int
}
func (rw *metricsResponseWriter) WriteHeader(code int) {
rw.status = code
rw.ResponseWriter.WriteHeader(code)
}
func HTTPMetrics(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
rw := &metricsResponseWriter{ResponseWriter: w, status: http.StatusOK}
next.ServeHTTP(rw, r)
status := strconv.Itoa(rw.status)
labels := []string{r.Method, r.URL.Path, status}
httpRequestsTotal.WithLabelValues(labels...).Inc()
httpRequestDurationSeconds.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
if rw.status >= http.StatusInternalServerError {
httpErrorsTotal.WithLabelValues(labels...).Inc()
}
})
}
func ObserveIMAPSync(outcome string, duration time.Duration) {
imapSyncRunsTotal.WithLabelValues(outcome).Inc()
imapSyncDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds())
if outcome == "success" {
imapLastSuccessUnix.SetToCurrentTime()
}
}
func SetOutboxQueueDepth(depth int64) {
outboxQueueDepth.Set(float64(depth))
}
func IncOutboxProcessed(outcome string) {
outboxProcessedTotal.WithLabelValues(outcome).Inc()
}
func ObserveWebhookExecution(outcome string, statusCode int, duration time.Duration) {
statusClass := "none"
if statusCode > 0 {
statusClass = strconv.Itoa(statusCode/100) + "xx"
}
webhookExecutionsTotal.WithLabelValues(outcome, statusClass).Inc()
webhookDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds())
}
func IncWebhookRetry(reason string) {
webhookRetriesTotal.WithLabelValues(reason).Inc()
}
func IncWebhookDeadLetter() {
webhookDeadLettersTotal.Inc()
}
func IncWebhookPayloadTruncated() {
webhookPayloadTruncatedTotal.Inc()
}
func ObserveMigrationJob(service, outcome string, duration time.Duration) {
if service == "" {
service = "unknown"
}
if outcome == "" {
outcome = "unknown"
}
migrationJobsProcessedTotal.WithLabelValues(service, outcome).Inc()
migrationJobDurationSeconds.WithLabelValues(service, outcome).Observe(duration.Seconds())
}
func SetMigrationPendingJobs(count int) {
migrationPendingJobs.Set(float64(count))
}
func IncMigrationRateLimitRetry() {
migrationRateLimitRetriesTotal.Inc()
}