- Refactored AI gateway to utilize new cost management structures for usage tracking. - Replaced deprecated token extraction methods with a unified cost parsing approach. - Enhanced usage fallback mechanisms and introduced detailed usage metrics in responses. - Added new metering functionality to record AI usage and costs effectively. - Updated tests to reflect changes in usage parsing and cost calculations. - Introduced new API endpoints for retrieving AI usage summaries and pricing information.
66 lines
1.9 KiB
Go
66 lines
1.9 KiB
Go
package cost
|
|
|
|
import "encoding/json"
|
|
|
|
type usagePayload struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
TotalTokens int `json:"total_tokens"`
|
|
PromptTokensDetails *struct {
|
|
CachedTokens int `json:"cached_tokens"`
|
|
} `json:"prompt_tokens_details,omitempty"`
|
|
CompletionTokensDetails *struct {
|
|
ReasoningTokens int `json:"reasoning_tokens"`
|
|
} `json:"completion_tokens_details,omitempty"`
|
|
}
|
|
|
|
type chatCompletionResponse struct {
|
|
Usage *usagePayload `json:"usage,omitempty"`
|
|
}
|
|
|
|
// ParseUsage extracts token details from an OpenAI-compatible chat completion payload.
|
|
func ParseUsage(payload []byte) UsageDetail {
|
|
var parsed chatCompletionResponse
|
|
if err := json.Unmarshal(payload, &parsed); err != nil || parsed.Usage == nil {
|
|
return UsageDetail{TotalTokens: 1}
|
|
}
|
|
return usageFromPayload(parsed.Usage)
|
|
}
|
|
|
|
func usageFromPayload(u *usagePayload) UsageDetail {
|
|
if u == nil {
|
|
return UsageDetail{TotalTokens: 1}
|
|
}
|
|
detail := UsageDetail{
|
|
PromptTokens: u.PromptTokens,
|
|
CompletionTokens: u.CompletionTokens,
|
|
TotalTokens: u.TotalTokens,
|
|
}
|
|
if u.PromptTokensDetails != nil {
|
|
detail.CachedInputTokens = u.PromptTokensDetails.CachedTokens
|
|
}
|
|
if u.CompletionTokensDetails != nil {
|
|
detail.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
|
|
}
|
|
if detail.TotalTokens == 0 {
|
|
detail.TotalTokens = detail.PromptTokens + detail.CompletionTokens
|
|
}
|
|
if detail.TotalTokens == 0 {
|
|
detail.TotalTokens = 1
|
|
}
|
|
return detail
|
|
}
|
|
|
|
// MergeStreamUsage keeps the latest non-zero usage from streaming chunks.
|
|
func MergeStreamUsage(acc UsageDetail, chunk []byte) UsageDetail {
|
|
var parsed chatCompletionResponse
|
|
if err := json.Unmarshal(chunk, &parsed); err != nil || parsed.Usage == nil {
|
|
return acc
|
|
}
|
|
next := usageFromPayload(parsed.Usage)
|
|
if next.TotalTokens == 0 && next.PromptTokens == 0 && next.CompletionTokens == 0 {
|
|
return acc
|
|
}
|
|
return next
|
|
}
|