package websearch
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
)
var (
duckResultLinkRe = regexp.MustCompile(`(?is)]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)`)
duckResultSnipRe = regexp.MustCompile(`(?is)]+class="result__snippet"[^>]*>(.*?)`)
htmlTagRe = regexp.MustCompile(`(?is)<[^>]+>`)
)
func (c *Client) searchDuckDuckGo(ctx context.Context, provider Provider, query string, count int) ([]Result, error) {
endpoint := strings.TrimSpace(provider.BaseURL)
if endpoint == "" {
endpoint = duckDuckGoHTMLURL
}
form := url.Values{}
form.Set("q", query)
form.Set("b", "")
form.Set("kl", "")
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Accept", "text/html")
resp, err := c.http.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
if err != nil {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("duckduckgo search failed (%d)", resp.StatusCode)
}
html := string(body)
links := duckResultLinkRe.FindAllStringSubmatch(html, count)
snips := duckResultSnipRe.FindAllStringSubmatch(html, count)
if len(links) == 0 {
return []Result{}, nil
}
results := make([]Result, 0, len(links))
for i, match := range links {
if len(match) < 3 {
continue
}
title := stripHTML(match[2])
href := decodeDuckDuckGoURL(strings.TrimSpace(match[1]))
desc := ""
if i < len(snips) && len(snips[i]) > 1 {
desc = stripHTML(snips[i][1])
}
if title == "" || href == "" {
continue
}
results = append(results, Result{
Title: title,
URL: href,
Description: desc,
})
}
return results, nil
}
func stripHTML(raw string) string {
return strings.TrimSpace(htmlTagRe.ReplaceAllString(raw, ""))
}
func decodeDuckDuckGoURL(raw string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
parsed, err := url.Parse(raw)
if err != nil {
return raw
}
if strings.Contains(parsed.Host, "duckduckgo.com") && parsed.Path == "/l/" {
if uddg := parsed.Query().Get("uddg"); uddg != "" {
if decoded, err := url.QueryUnescape(uddg); err == nil && decoded != "" {
return decoded
}
}
}
return parsed.String()
}