- Updated .env.example to include new configuration options for the UltiAI branding and API endpoints. - Enhanced Nginx configuration to support new API routes for the MCP and WebSocket connections. - Introduced sub-filters for branding adjustments in Nginx responses. - Added new JavaScript patch for API endpoint adjustments. - Implemented tests for new API functionalities and improved error handling in the AI gateway.
105 lines
2.4 KiB
Go
105 lines
2.4 KiB
Go
package websearch
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
duckResultLinkRe = regexp.MustCompile(`(?is)<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>`)
|
|
duckResultSnipRe = regexp.MustCompile(`(?is)<a[^>]+class="result__snippet"[^>]*>(.*?)</a>`)
|
|
htmlTagRe = regexp.MustCompile(`(?is)<[^>]+>`)
|
|
)
|
|
|
|
func (c *Client) searchDuckDuckGo(ctx context.Context, provider Provider, query string, count int) ([]Result, error) {
|
|
endpoint := strings.TrimSpace(provider.BaseURL)
|
|
if endpoint == "" {
|
|
endpoint = duckDuckGoHTMLURL
|
|
}
|
|
|
|
form := url.Values{}
|
|
form.Set("q", query)
|
|
form.Set("b", "")
|
|
form.Set("kl", "")
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
req.Header.Set("Accept", "text/html")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if resp.StatusCode >= 400 {
|
|
return nil, fmt.Errorf("duckduckgo search failed (%d)", resp.StatusCode)
|
|
}
|
|
|
|
html := string(body)
|
|
links := duckResultLinkRe.FindAllStringSubmatch(html, count)
|
|
snips := duckResultSnipRe.FindAllStringSubmatch(html, count)
|
|
if len(links) == 0 {
|
|
return []Result{}, nil
|
|
}
|
|
|
|
results := make([]Result, 0, len(links))
|
|
for i, match := range links {
|
|
if len(match) < 3 {
|
|
continue
|
|
}
|
|
title := stripHTML(match[2])
|
|
href := decodeDuckDuckGoURL(strings.TrimSpace(match[1]))
|
|
desc := ""
|
|
if i < len(snips) && len(snips[i]) > 1 {
|
|
desc = stripHTML(snips[i][1])
|
|
}
|
|
if title == "" || href == "" {
|
|
continue
|
|
}
|
|
results = append(results, Result{
|
|
Title: title,
|
|
URL: href,
|
|
Description: desc,
|
|
})
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func stripHTML(raw string) string {
|
|
return strings.TrimSpace(htmlTagRe.ReplaceAllString(raw, ""))
|
|
}
|
|
|
|
func decodeDuckDuckGoURL(raw string) string {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return ""
|
|
}
|
|
if strings.HasPrefix(raw, "//") {
|
|
raw = "https:" + raw
|
|
}
|
|
parsed, err := url.Parse(raw)
|
|
if err != nil {
|
|
return raw
|
|
}
|
|
if strings.Contains(parsed.Host, "duckduckgo.com") && parsed.Path == "/l/" {
|
|
if uddg := parsed.Query().Get("uddg"); uddg != "" {
|
|
if decoded, err := url.QueryUnescape(uddg); err == nil && decoded != "" {
|
|
return decoded
|
|
}
|
|
}
|
|
}
|
|
return parsed.String()
|
|
}
|