package websearch import ( "context" "fmt" "io" "net/http" "net/url" "regexp" "strings" ) var ( duckResultLinkRe = regexp.MustCompile(`(?is)]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)`) duckResultSnipRe = regexp.MustCompile(`(?is)]+class="result__snippet"[^>]*>(.*?)`) htmlTagRe = regexp.MustCompile(`(?is)<[^>]+>`) ) func (c *Client) searchDuckDuckGo(ctx context.Context, provider Provider, query string, count int) ([]Result, error) { endpoint := strings.TrimSpace(provider.BaseURL) if endpoint == "" { endpoint = duckDuckGoHTMLURL } form := url.Values{} form.Set("q", query) form.Set("b", "") form.Set("kl", "") req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode())) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("Accept", "text/html") resp, err := c.http.Do(req) if err != nil { return nil, err } defer resp.Body.Close() body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) if err != nil { return nil, err } if resp.StatusCode >= 400 { return nil, fmt.Errorf("duckduckgo search failed (%d)", resp.StatusCode) } html := string(body) links := duckResultLinkRe.FindAllStringSubmatch(html, count) snips := duckResultSnipRe.FindAllStringSubmatch(html, count) if len(links) == 0 { return []Result{}, nil } results := make([]Result, 0, len(links)) for i, match := range links { if len(match) < 3 { continue } title := stripHTML(match[2]) href := decodeDuckDuckGoURL(strings.TrimSpace(match[1])) desc := "" if i < len(snips) && len(snips[i]) > 1 { desc = stripHTML(snips[i][1]) } if title == "" || href == "" { continue } results = append(results, Result{ Title: title, URL: href, Description: desc, }) } return results, nil } func stripHTML(raw string) string { return strings.TrimSpace(htmlTagRe.ReplaceAllString(raw, "")) } func decodeDuckDuckGoURL(raw string) string { raw = strings.TrimSpace(raw) if raw == "" { return "" } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } parsed, err := url.Parse(raw) if err != nil { return raw } if strings.Contains(parsed.Host, "duckduckgo.com") && parsed.Path == "/l/" { if uddg := parsed.Query().Get("uddg"); uddg != "" { if decoded, err := url.QueryUnescape(uddg); err == nil && decoded != "" { return decoded } } } return parsed.String() }