- Refactored AI gateway to utilize new cost management structures for usage tracking. - Replaced deprecated token extraction methods with a unified cost parsing approach. - Enhanced usage fallback mechanisms and introduced detailed usage metrics in responses. - Added new metering functionality to record AI usage and costs effectively. - Updated tests to reflect changes in usage parsing and cost calculations. - Introduced new API endpoints for retrieving AI usage summaries and pricing information.
207 lines
5.8 KiB
Go
207 lines
5.8 KiB
Go
package cost
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
// ModelPrice holds per-million-token rates in micro-EUR.
|
|
type ModelPrice struct {
|
|
ModelID string
|
|
ProviderType string
|
|
InputMicroEURPerMTok int64
|
|
CachedInputMicroEURPerMTok int64
|
|
OutputMicroEURPerMTok int64
|
|
ReasoningMicroEURPerMTok int64
|
|
}
|
|
|
|
// Default fallback when model is unknown (~gpt-4o-mini input rate).
|
|
const fallbackInputMicroEURPerMTok int64 = 140000
|
|
const fallbackOutputMicroEURPerMTok int64 = 560000
|
|
|
|
type PricingStore struct {
|
|
db *pgxpool.Pool
|
|
}
|
|
|
|
func NewPricingStore(db *pgxpool.Pool) *PricingStore {
|
|
return &PricingStore{db: db}
|
|
}
|
|
|
|
func (s *PricingStore) LookupPrice(ctx context.Context, modelID string) (ModelPrice, bool) {
|
|
modelID = strings.TrimSpace(modelID)
|
|
if modelID == "" || s.db == nil {
|
|
return ModelPrice{}, false
|
|
}
|
|
var p ModelPrice
|
|
var cached, reasoning *int64
|
|
err := s.db.QueryRow(ctx, `
|
|
SELECT model_id, provider_type,
|
|
input_micro_eur_per_mtok,
|
|
cached_input_micro_eur_per_mtok,
|
|
output_micro_eur_per_mtok,
|
|
reasoning_micro_eur_per_mtok
|
|
FROM ai_model_pricing
|
|
WHERE model_id = $1 AND effective_from <= CURRENT_DATE
|
|
ORDER BY effective_from DESC
|
|
LIMIT 1
|
|
`, modelID).Scan(
|
|
&p.ModelID, &p.ProviderType,
|
|
&p.InputMicroEURPerMTok, &cached,
|
|
&p.OutputMicroEURPerMTok, &reasoning,
|
|
)
|
|
if err != nil {
|
|
if err != pgx.ErrNoRows {
|
|
// prefix match for bedrock/azure model ids
|
|
err = s.db.QueryRow(ctx, `
|
|
SELECT model_id, provider_type,
|
|
input_micro_eur_per_mtok,
|
|
cached_input_micro_eur_per_mtok,
|
|
output_micro_eur_per_mtok,
|
|
reasoning_micro_eur_per_mtok
|
|
FROM ai_model_pricing
|
|
WHERE $1 LIKE model_id || '%' AND effective_from <= CURRENT_DATE
|
|
ORDER BY LENGTH(model_id) DESC, effective_from DESC
|
|
LIMIT 1
|
|
`, modelID).Scan(
|
|
&p.ModelID, &p.ProviderType,
|
|
&p.InputMicroEURPerMTok, &cached,
|
|
&p.OutputMicroEURPerMTok, &reasoning,
|
|
)
|
|
}
|
|
if err != nil {
|
|
return ModelPrice{}, false
|
|
}
|
|
}
|
|
if cached != nil {
|
|
p.CachedInputMicroEURPerMTok = *cached
|
|
}
|
|
if reasoning != nil {
|
|
p.ReasoningMicroEURPerMTok = *reasoning
|
|
}
|
|
return p, true
|
|
}
|
|
|
|
// ComputeCostMicroEUR calculates estimated cost from usage and model pricing.
|
|
func ComputeCostMicroEUR(usage UsageDetail, price ModelPrice, found bool) (microEUR int64, estimated bool) {
|
|
if !found {
|
|
price = ModelPrice{
|
|
InputMicroEURPerMTok: fallbackInputMicroEURPerMTok,
|
|
OutputMicroEURPerMTok: fallbackOutputMicroEURPerMTok,
|
|
}
|
|
estimated = true
|
|
if usage.PromptTokens == 0 && usage.CompletionTokens == 0 {
|
|
microEUR = int64(usage.TotalTokens) * fallbackInputMicroEURPerMTok / 1_000_000
|
|
if microEUR == 0 && usage.TotalTokens > 0 {
|
|
microEUR = 1
|
|
}
|
|
return microEUR, true
|
|
}
|
|
}
|
|
|
|
cachedRate := price.CachedInputMicroEURPerMTok
|
|
if cachedRate == 0 {
|
|
cachedRate = price.InputMicroEURPerMTok / 2
|
|
}
|
|
reasoningRate := price.ReasoningMicroEURPerMTok
|
|
if reasoningRate == 0 {
|
|
reasoningRate = price.OutputMicroEURPerMTok
|
|
}
|
|
|
|
uncached := usage.UncachedInputTokens()
|
|
microEUR += int64(uncached) * price.InputMicroEURPerMTok / 1_000_000
|
|
microEUR += int64(usage.CachedInputTokens) * cachedRate / 1_000_000
|
|
completion := usage.CompletionTokens - usage.ReasoningTokens
|
|
if completion < 0 {
|
|
completion = usage.CompletionTokens
|
|
}
|
|
microEUR += int64(completion) * price.OutputMicroEURPerMTok / 1_000_000
|
|
microEUR += int64(usage.ReasoningTokens) * reasoningRate / 1_000_000
|
|
|
|
if microEUR == 0 && usage.TotalTokens > 0 {
|
|
microEUR = 1
|
|
}
|
|
return microEUR, estimated
|
|
}
|
|
|
|
// UpsertModelPrice stores or updates pricing for a model (effective today).
|
|
func (s *PricingStore) UpsertModelPrice(ctx context.Context, p ModelPrice) error {
|
|
if s.db == nil {
|
|
return nil
|
|
}
|
|
today := time.Now().UTC().Truncate(24 * time.Hour)
|
|
var cached, reasoning *int64
|
|
if p.CachedInputMicroEURPerMTok > 0 {
|
|
v := p.CachedInputMicroEURPerMTok
|
|
cached = &v
|
|
}
|
|
if p.ReasoningMicroEURPerMTok > 0 {
|
|
v := p.ReasoningMicroEURPerMTok
|
|
reasoning = &v
|
|
}
|
|
_, err := s.db.Exec(ctx, `
|
|
INSERT INTO ai_model_pricing (
|
|
model_id, provider_type,
|
|
input_micro_eur_per_mtok, cached_input_micro_eur_per_mtok,
|
|
output_micro_eur_per_mtok, reasoning_micro_eur_per_mtok,
|
|
effective_from, source
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, 'manual')
|
|
ON CONFLICT (model_id, effective_from) DO UPDATE SET
|
|
provider_type = EXCLUDED.provider_type,
|
|
input_micro_eur_per_mtok = EXCLUDED.input_micro_eur_per_mtok,
|
|
cached_input_micro_eur_per_mtok = EXCLUDED.cached_input_micro_eur_per_mtok,
|
|
output_micro_eur_per_mtok = EXCLUDED.output_micro_eur_per_mtok,
|
|
reasoning_micro_eur_per_mtok = EXCLUDED.reasoning_micro_eur_per_mtok,
|
|
source = EXCLUDED.source
|
|
`, p.ModelID, p.ProviderType, p.InputMicroEURPerMTok, cached,
|
|
p.OutputMicroEURPerMTok, reasoning, today)
|
|
return err
|
|
}
|
|
|
|
func (s *PricingStore) ListPrices(ctx context.Context) ([]ModelPrice, error) {
|
|
if s.db == nil {
|
|
return nil, nil
|
|
}
|
|
rows, err := s.db.Query(ctx, `
|
|
SELECT DISTINCT ON (model_id)
|
|
model_id, provider_type,
|
|
input_micro_eur_per_mtok,
|
|
cached_input_micro_eur_per_mtok,
|
|
output_micro_eur_per_mtok,
|
|
reasoning_micro_eur_per_mtok
|
|
FROM ai_model_pricing
|
|
WHERE effective_from <= CURRENT_DATE
|
|
ORDER BY model_id, effective_from DESC
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
var out []ModelPrice
|
|
for rows.Next() {
|
|
var p ModelPrice
|
|
var cached, reasoning *int64
|
|
if err := rows.Scan(&p.ModelID, &p.ProviderType,
|
|
&p.InputMicroEURPerMTok, &cached,
|
|
&p.OutputMicroEURPerMTok, &reasoning); err != nil {
|
|
return nil, err
|
|
}
|
|
if cached != nil {
|
|
p.CachedInputMicroEURPerMTok = *cached
|
|
}
|
|
if reasoning != nil {
|
|
p.ReasoningMicroEURPerMTok = *reasoning
|
|
}
|
|
out = append(out, p)
|
|
}
|
|
return out, rows.Err()
|
|
}
|
|
|
|
// MicroEURToEUR converts micro-EUR to float EUR for API responses.
|
|
func MicroEURToEUR(micro int64) float64 {
|
|
return float64(micro) / 1_000_000
|
|
}
|