- Updated .env.example to include new configuration options for the UltiAI branding and API endpoints. - Enhanced Nginx configuration to support new API routes for the MCP and WebSocket connections. - Introduced sub-filters for branding adjustments in Nginx responses. - Added new JavaScript patch for API endpoint adjustments. - Implemented tests for new API functionalities and improved error handling in the AI gateway.
191 lines
6.0 KiB
Go
191 lines
6.0 KiB
Go
package observability
|
|
|
|
import (
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
)
|
|
|
|
var (
|
|
httpRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_http_requests_total",
|
|
Help: "Total number of HTTP requests.",
|
|
}, []string{"method", "path", "status"})
|
|
|
|
httpRequestDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "ultid_http_request_duration_seconds",
|
|
Help: "HTTP request latency in seconds.",
|
|
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
|
|
}, []string{"method", "path", "status"})
|
|
|
|
httpErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_http_errors_total",
|
|
Help: "Total number of HTTP requests ending with 5xx.",
|
|
}, []string{"method", "path", "status"})
|
|
|
|
imapSyncRunsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_imap_sync_runs_total",
|
|
Help: "Total number of IMAP sync cycles.",
|
|
}, []string{"outcome"})
|
|
|
|
imapSyncDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "ultid_imap_sync_duration_seconds",
|
|
Help: "Duration of IMAP sync cycles.",
|
|
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60, 120, 300},
|
|
}, []string{"outcome"})
|
|
|
|
imapLastSuccessUnix = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "ultid_imap_sync_last_success_timestamp_seconds",
|
|
Help: "Unix timestamp of last successful IMAP sync cycle.",
|
|
})
|
|
|
|
outboxQueueDepth = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "ultid_outbox_queue_depth",
|
|
Help: "Current number of queued/sending/scheduled outbox items.",
|
|
})
|
|
|
|
outboxProcessedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_outbox_processed_total",
|
|
Help: "Total number of outbox jobs processed.",
|
|
}, []string{"outcome"})
|
|
|
|
webhookExecutionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_webhook_executions_total",
|
|
Help: "Total number of webhook executions.",
|
|
}, []string{"outcome", "status_class"})
|
|
|
|
webhookDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "ultid_webhook_duration_seconds",
|
|
Help: "Webhook execution latency in seconds.",
|
|
Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
|
|
}, []string{"outcome"})
|
|
|
|
webhookRetriesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_webhook_retries_total",
|
|
Help: "Total number of webhook retries.",
|
|
}, []string{"reason"})
|
|
|
|
webhookDeadLettersTotal = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "ultid_webhook_dead_letters_total",
|
|
Help: "Total number of webhook executions moved to dead-letter.",
|
|
})
|
|
|
|
webhookPayloadTruncatedTotal = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "ultid_webhook_payload_truncated_total",
|
|
Help: "Total number of webhook payloads truncated in logs.",
|
|
})
|
|
|
|
migrationJobsProcessedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ultid_migration_jobs_processed_total",
|
|
Help: "Total number of migration worker job runs.",
|
|
}, []string{"service", "outcome"})
|
|
|
|
migrationJobDurationSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "ultid_migration_job_duration_seconds",
|
|
Help: "Migration worker job run duration in seconds.",
|
|
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60, 120, 300},
|
|
}, []string{"service", "outcome"})
|
|
|
|
migrationPendingJobs = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "ultid_migration_pending_jobs",
|
|
Help: "Migration jobs picked up on the latest worker tick.",
|
|
})
|
|
|
|
migrationRateLimitRetriesTotal = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "ultid_migration_rate_limit_retries_total",
|
|
Help: "Total number of migration provider API 429 retries.",
|
|
})
|
|
)
|
|
|
|
type metricsResponseWriter struct {
|
|
http.ResponseWriter
|
|
status int
|
|
}
|
|
|
|
func (rw *metricsResponseWriter) WriteHeader(code int) {
|
|
rw.status = code
|
|
rw.ResponseWriter.WriteHeader(code)
|
|
}
|
|
|
|
func (rw *metricsResponseWriter) Flush() {
|
|
if f, ok := rw.ResponseWriter.(http.Flusher); ok {
|
|
f.Flush()
|
|
}
|
|
}
|
|
|
|
func HTTPMetrics(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
start := time.Now()
|
|
rw := &metricsResponseWriter{ResponseWriter: w, status: http.StatusOK}
|
|
next.ServeHTTP(rw, r)
|
|
|
|
status := strconv.Itoa(rw.status)
|
|
labels := []string{r.Method, r.URL.Path, status}
|
|
|
|
httpRequestsTotal.WithLabelValues(labels...).Inc()
|
|
httpRequestDurationSeconds.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
|
|
if rw.status >= http.StatusInternalServerError {
|
|
httpErrorsTotal.WithLabelValues(labels...).Inc()
|
|
}
|
|
})
|
|
}
|
|
|
|
func ObserveIMAPSync(outcome string, duration time.Duration) {
|
|
imapSyncRunsTotal.WithLabelValues(outcome).Inc()
|
|
imapSyncDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds())
|
|
if outcome == "success" {
|
|
imapLastSuccessUnix.SetToCurrentTime()
|
|
}
|
|
}
|
|
|
|
func SetOutboxQueueDepth(depth int64) {
|
|
outboxQueueDepth.Set(float64(depth))
|
|
}
|
|
|
|
func IncOutboxProcessed(outcome string) {
|
|
outboxProcessedTotal.WithLabelValues(outcome).Inc()
|
|
}
|
|
|
|
func ObserveWebhookExecution(outcome string, statusCode int, duration time.Duration) {
|
|
statusClass := "none"
|
|
if statusCode > 0 {
|
|
statusClass = strconv.Itoa(statusCode/100) + "xx"
|
|
}
|
|
webhookExecutionsTotal.WithLabelValues(outcome, statusClass).Inc()
|
|
webhookDurationSeconds.WithLabelValues(outcome).Observe(duration.Seconds())
|
|
}
|
|
|
|
func IncWebhookRetry(reason string) {
|
|
webhookRetriesTotal.WithLabelValues(reason).Inc()
|
|
}
|
|
|
|
func IncWebhookDeadLetter() {
|
|
webhookDeadLettersTotal.Inc()
|
|
}
|
|
|
|
func IncWebhookPayloadTruncated() {
|
|
webhookPayloadTruncatedTotal.Inc()
|
|
}
|
|
|
|
func ObserveMigrationJob(service, outcome string, duration time.Duration) {
|
|
if service == "" {
|
|
service = "unknown"
|
|
}
|
|
if outcome == "" {
|
|
outcome = "unknown"
|
|
}
|
|
migrationJobsProcessedTotal.WithLabelValues(service, outcome).Inc()
|
|
migrationJobDurationSeconds.WithLabelValues(service, outcome).Observe(duration.Seconds())
|
|
}
|
|
|
|
func SetMigrationPendingJobs(count int) {
|
|
migrationPendingJobs.Set(float64(count))
|
|
}
|
|
|
|
func IncMigrationRateLimitRetry() {
|
|
migrationRateLimitRetriesTotal.Inc()
|
|
}
|