ultisuite-backend/internal/search/service.go
R3D347HR4Y 0435e27ce6 Enhance search functionality with multi-engine support and configuration updates
- Added support for Typesense as a search engine alongside Meilisearch and PostgreSQL.
- Updated configuration structure to include Typesense parameters in `Config` and `.env.example`.
- Enhanced search handler and service to accommodate external search clients and filters.
- Implemented new tests for external search clients and search service functionalities.
- Updated project checklist to reflect completion of multi-index search features and contextual snippets.
2026-05-22 19:14:27 +02:00

695 lines
16 KiB
Go

package search
import (
"context"
"fmt"
"regexp"
"sort"
"strings"
"time"
"unicode"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/ultisuite/ulti-backend/internal/api/query"
"github.com/ultisuite/ulti-backend/internal/nextcloud"
)
type Service struct {
db *pgxpool.Pool
nc *nextcloud.Client
engine string
external externalSearchClient
}
type ServiceOptions struct {
Nextcloud *nextcloud.Client
Engine string
MeilisearchURL string
MeilisearchKey string
MeilisearchIndex string
TypesenseURL string
TypesenseKey string
TypesenseCollection string
}
func NewService(db *pgxpool.Pool, opts ServiceOptions) *Service {
engine := normalizeEngine(opts.Engine)
return &Service{
db: db,
nc: opts.Nextcloud,
engine: engine,
external: newExternalSearchClient(engine, opts),
}
}
type Result struct {
Type string `json:"type"`
ID string `json:"id"`
Title string `json:"title"`
Snippet string `json:"snippet"`
Date any `json:"date,omitempty"`
AccountID string `json:"account_id,omitempty"`
Score float64 `json:"score,omitempty"`
}
type SearchResponse struct {
Query string `json:"query"`
Engine string `json:"engine,omitempty"`
Results []Result `json:"results"`
Count int `json:"count"`
Pagination query.PaginationMeta `json:"pagination,omitempty"`
}
type SearchFilters struct {
AccountID string
}
func (s *Service) Search(ctx context.Context, externalID, q, typesRaw string, params query.ListParams, filters SearchFilters) (SearchResponse, error) {
typeList := parseTypes(typesRaw)
usedEngine := s.engine
if s.external == nil && s.engine != "postgres" {
usedEngine = "postgres"
}
var (
allResults []Result
err error
)
if s.external != nil {
allResults, err = s.external.Search(ctx, ExternalSearchParams{
ExternalID: externalID,
Query: q,
Types: typeList,
Params: params,
Filters: filters,
})
if err != nil {
usedEngine = "postgres"
}
}
if s.external == nil || err != nil {
allResults, err = s.searchFederated(ctx, externalID, q, typeList, params, filters)
if err != nil {
return SearchResponse{}, err
}
}
finalizeExternalResults(allResults, q, params)
allResults = discardFilteredResults(allResults)
sortResultsByScore(allResults)
total := int64(len(allResults))
page, _ := paginateResults(allResults, params.Offset(), params.Limit())
return SearchResponse{
Query: q,
Engine: usedEngine,
Results: page,
Count: len(page),
Pagination: params.Meta(&total),
}, nil
}
func (s *Service) searchFederated(ctx context.Context, externalID, q string, typeList []string, params query.ListParams, filters SearchFilters) ([]Result, error) {
allResults := make([]Result, 0, params.Offset()+params.Limit())
for _, t := range typeList {
switch t {
case "mail":
mailResults, err := s.searchMail(ctx, externalID, q, params, filters)
if err != nil {
return nil, err
}
allResults = append(allResults, mailResults...)
case "contacts":
contactResults, err := s.searchContacts(ctx, externalID, q, filters)
if err != nil {
return nil, err
}
allResults = append(allResults, contactResults...)
case "files":
fileResults, err := s.searchFiles(ctx, externalID, q, params, filters)
if err != nil {
return nil, err
}
allResults = append(allResults, fileResults...)
case "events":
eventResults, err := s.searchEvents(ctx, externalID, q, params, filters)
if err != nil {
return nil, err
}
allResults = append(allResults, eventResults...)
}
}
return allResults, nil
}
func sortResultsByScore(results []Result) {
sort.SliceStable(results, func(i, j int) bool {
if results[i].Score == results[j].Score {
ti, okI := dateFromAny(results[i].Date)
tj, okJ := dateFromAny(results[j].Date)
if okI && okJ {
return ti.After(tj)
}
return okI
}
return results[i].Score > results[j].Score
})
}
func dateFromAny(raw any) (time.Time, bool) {
switch t := raw.(type) {
case time.Time:
return t, true
case *time.Time:
if t == nil {
return time.Time{}, false
}
return *t, true
case string:
return parseDateString(t)
default:
return time.Time{}, false
}
}
func parseDateString(raw string) (time.Time, bool) {
layouts := []string{
time.RFC3339,
"20060102T150405Z",
"20060102T150405",
"20060102",
}
for _, layout := range layouts {
if ts, err := time.Parse(layout, raw); err == nil {
return ts, true
}
}
return time.Time{}, false
}
func normalizeEngine(raw string) string {
switch strings.ToLower(strings.TrimSpace(raw)) {
case "meilisearch":
return "meilisearch"
case "typesense":
return "typesense"
default:
return "postgres"
}
}
func defaultCalendarRange(params query.ListParams) (time.Time, time.Time) {
from := time.Now().UTC().AddDate(0, -6, 0)
to := time.Now().UTC().AddDate(1, 0, 0)
if params.From != nil {
from = params.From.UTC()
}
if params.To != nil {
to = params.To.UTC()
}
if from.After(to) {
from, to = to, from
}
return from, to
}
func parseTypes(typesRaw string) []string {
if strings.TrimSpace(typesRaw) == "" {
return []string{"mail", "contacts", "files", "events"}
}
parts := strings.Split(typesRaw, ",")
out := make([]string, 0, len(parts))
seen := make(map[string]struct{}, len(parts))
for _, p := range parts {
t := strings.TrimSpace(p)
if t == "" {
continue
}
if _, ok := seen[t]; ok {
continue
}
seen[t] = struct{}{}
out = append(out, t)
}
return out
}
func paginateResults(results []Result, offset, limit int) ([]Result, int64) {
total := int64(len(results))
if offset >= len(results) {
return []Result{}, total
}
end := offset + limit
if end > len(results) {
end = len(results)
}
return results[offset:end], total
}
func (s *Service) searchMail(ctx context.Context, externalID, queryText string, params query.ListParams, filters SearchFilters) ([]Result, error) {
tsQuery := toTSQuery(queryText)
if tsQuery == "" {
return []Result{}, nil
}
limit := params.Offset() + params.Limit()
if limit < 20 {
limit = 20
}
args := []any{tsQuery, externalID}
argIdx := 3
base := `
FROM messages m
JOIN mail_accounts ma ON m.account_id = ma.id
WHERE ma.user_id = (SELECT id FROM users WHERE external_id = $2)
AND m.search_vector @@ to_tsquery('simple', $1)
`
if filters.AccountID != "" {
base += fmt.Sprintf(" AND m.account_id = $%d", argIdx)
args = append(args, filters.AccountID)
argIdx++
}
if params.From != nil {
base += fmt.Sprintf(" AND m.date >= $%d", argIdx)
args = append(args, params.From.UTC())
argIdx++
}
if params.To != nil {
base += fmt.Sprintf(" AND m.date <= $%d", argIdx)
args = append(args, params.To.UTC())
argIdx++
}
querySQL := `
SELECT m.id, m.account_id, m.subject, m.snippet, m.date,
ts_rank(m.search_vector, to_tsquery('simple', $1)) as rank,
ts_headline(
'simple',
COALESCE(m.subject, '') || ' ' || COALESCE(m.snippet, ''),
to_tsquery('simple', $1),
'StartSel=<mark>,StopSel=</mark>,MaxWords=24,MinWords=8,MaxFragments=2'
) as highlighted
` + base + fmt.Sprintf(`
ORDER BY rank DESC, m.date DESC
LIMIT $%d
`, argIdx)
args = append(args, limit)
rows, err := s.db.Query(ctx, querySQL, args...)
if err != nil {
return nil, err
}
defer rows.Close()
results := make([]Result, 0)
for rows.Next() {
var id, accountID, subject, snippet string
var date time.Time
var rank float64
var highlighted string
if err := rows.Scan(&id, &accountID, &subject, &snippet, &date, &rank, &highlighted); err != nil {
return nil, err
}
finalSnippet := strings.TrimSpace(highlighted)
if finalSnippet == "" {
finalSnippet = contextualSnippet(snippet, queryText, 220)
}
results = append(results, Result{
Type: "mail",
ID: id,
Title: highlightTerms(subject, queryText),
Snippet: finalSnippet,
Date: date.UTC(),
AccountID: accountID,
Score: rank + recencyBoost(date),
})
}
if err := rows.Err(); err != nil {
return nil, err
}
return results, nil
}
func (s *Service) searchContacts(ctx context.Context, userID, queryText string, filters SearchFilters) ([]Result, error) {
if s.nc == nil {
return []Result{}, nil
}
bookIDs := make([]string, 0, 4)
if filters.AccountID != "" {
bookIDs = append(bookIDs, filters.AccountID)
} else {
books, err := s.nc.ListAddressBooks(ctx, userID)
if err != nil {
return nil, err
}
for _, b := range books {
if b.ID == "" {
continue
}
bookIDs = append(bookIDs, b.ID)
}
}
seen := make(map[string]struct{})
results := make([]Result, 0, 16)
for _, bookID := range bookIDs {
contacts, err := s.nc.ListContacts(ctx, userID, cardBookPath(userID, bookID))
if err != nil {
return nil, err
}
for _, c := range contacts {
content := strings.TrimSpace(strings.Join([]string{c.FullName, c.Email, c.Phone, c.Org}, " "))
if !matchesQuery(content, queryText) {
continue
}
key := strings.TrimSpace(bookID + ":" + c.UID + ":" + c.Email)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
title := strings.TrimSpace(c.FullName)
if title == "" {
title = strings.TrimSpace(c.Email)
}
snippetSrc := joinNonEmpty(" • ", c.Email, c.Phone, c.Org)
results = append(results, Result{
Type: "contacts",
ID: key,
Title: highlightTerms(title, queryText),
Snippet: contextualSnippet(snippetSrc, queryText, 180),
AccountID: bookID,
Score: textMatchScore(content, queryText, 1.2),
})
}
}
return results, nil
}
func (s *Service) searchEvents(ctx context.Context, userID, queryText string, params query.ListParams, filters SearchFilters) ([]Result, error) {
if s.nc == nil {
return []Result{}, nil
}
from, to := defaultCalendarRange(params)
calIDs := make([]string, 0, 4)
if filters.AccountID != "" {
calIDs = append(calIDs, filters.AccountID)
} else {
cals, err := s.nc.ListCalendars(ctx, userID)
if err != nil {
return nil, err
}
for _, c := range cals {
if c.ID == "" {
continue
}
calIDs = append(calIDs, c.ID)
}
}
seen := make(map[string]struct{})
results := make([]Result, 0, 16)
for _, calID := range calIDs {
events, err := s.nc.ListEvents(ctx, userID, calPath(userID, calID), from, to)
if err != nil {
return nil, err
}
for _, ev := range events {
content := strings.TrimSpace(strings.Join([]string{ev.Summary, ev.Description, ev.Location}, " "))
if !matchesQuery(content, queryText) {
continue
}
eventDate, _ := parseDateString(ev.Start)
key := strings.TrimSpace(calID + ":" + ev.UID + ":" + ev.Start)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
snippetSrc := joinNonEmpty(" • ", ev.Description, ev.Location)
result := Result{
Type: "events",
ID: key,
Title: highlightTerms(ev.Summary, queryText),
Snippet: contextualSnippet(snippetSrc, queryText, 200),
AccountID: calID,
Score: textMatchScore(content, queryText, 1.1),
}
if !eventDate.IsZero() {
result.Date = eventDate.UTC()
result.Score += recencyBoost(eventDate)
}
results = append(results, result)
}
}
return results, nil
}
func (s *Service) searchFiles(ctx context.Context, userID, queryText string, params query.ListParams, filters SearchFilters) ([]Result, error) {
if s.nc == nil {
return []Result{}, nil
}
basePath := "/"
if strings.TrimSpace(filters.AccountID) != "" {
basePath = strings.TrimSpace(filters.AccountID)
}
files, err := s.nc.ListFiles(ctx, userID, basePath)
if err != nil {
return nil, err
}
results := make([]Result, 0, len(files))
for _, f := range files {
content := strings.TrimSpace(strings.Join([]string{f.Name, f.Path, f.MimeType}, " "))
if !matchesQuery(content, queryText) {
continue
}
var modifiedTime time.Time
if ts, err := time.Parse(time.RFC1123, strings.TrimSpace(f.LastModified)); err == nil {
modifiedTime = ts
}
if !modifiedTime.IsZero() {
if params.From != nil && modifiedTime.Before(*params.From) {
continue
}
if params.To != nil && modifiedTime.After(*params.To) {
continue
}
}
result := Result{
Type: "files",
ID: f.Path,
Title: highlightTerms(f.Name, queryText),
Snippet: contextualSnippet(joinNonEmpty(" • ", f.Path, f.MimeType), queryText, 220),
AccountID: basePath,
Score: textMatchScore(content, queryText, 1.0),
}
if !modifiedTime.IsZero() {
result.Date = modifiedTime.UTC()
result.Score += recencyBoost(modifiedTime)
}
results = append(results, result)
}
return results, nil
}
func recencyBoost(ts time.Time) float64 {
if ts.IsZero() {
return 0
}
days := time.Since(ts).Hours() / 24
if days < 0 {
days = -days / 2
}
switch {
case days <= 1:
return 1.5
case days <= 7:
return 1.0
case days <= 30:
return 0.6
case days <= 180:
return 0.3
default:
return 0
}
}
func textMatchScore(content, q string, weight float64) float64 {
content = strings.ToLower(strings.TrimSpace(content))
q = strings.ToLower(strings.TrimSpace(q))
if content == "" || q == "" {
return 0
}
score := 0.0
if strings.Contains(content, q) {
score += 3.0 * weight
}
for _, term := range splitSearchTerms(q) {
hits := strings.Count(content, term)
if hits == 0 {
continue
}
score += float64(hits) * weight
if strings.HasPrefix(content, term) {
score += 0.25 * weight
}
}
return score
}
func toTSQuery(input string) string {
terms := splitSearchTerms(input)
if len(terms) == 0 {
return ""
}
out := make([]string, 0, len(terms))
for _, term := range terms {
out = append(out, term+":*")
}
return strings.Join(out, " & ")
}
func splitSearchTerms(input string) []string {
rawWords := strings.Fields(strings.ToLower(strings.TrimSpace(input)))
terms := make([]string, 0, len(rawWords))
seen := make(map[string]struct{}, len(rawWords))
for _, raw := range rawWords {
clean := strings.Map(func(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return r
}
return ' '
}, raw)
for _, part := range strings.Fields(clean) {
if len(part) == 0 {
continue
}
if _, ok := seen[part]; ok {
continue
}
seen[part] = struct{}{}
terms = append(terms, part)
}
}
return terms
}
func highlightTerms(text, q string) string {
text = strings.TrimSpace(text)
if text == "" {
return ""
}
terms := splitSearchTerms(q)
if len(terms) == 0 {
return text
}
patterns := make([]string, 0, len(terms))
for _, term := range terms {
patterns = append(patterns, regexp.QuoteMeta(term))
}
re := regexp.MustCompile(`(?i)\b(` + strings.Join(patterns, "|") + `)\b`)
return re.ReplaceAllStringFunc(text, func(match string) string {
return "<mark>" + match + "</mark>"
})
}
func contextualSnippet(text, q string, maxLen int) string {
text = strings.Join(strings.Fields(strings.TrimSpace(text)), " ")
if text == "" {
return ""
}
if maxLen <= 0 {
maxLen = 180
}
snippet := text
if len(text) > maxLen {
snippet = text[:maxLen]
}
terms := splitSearchTerms(q)
lower := strings.ToLower(text)
bestIdx := -1
for _, term := range terms {
idx := strings.Index(lower, term)
if idx >= 0 {
bestIdx = idx
break
}
}
if bestIdx >= 0 {
start := bestIdx - (maxLen / 3)
if start < 0 {
start = 0
}
end := start + maxLen
if end > len(text) {
end = len(text)
}
snippet = text[start:end]
if start > 0 {
snippet = "..." + snippet
}
if end < len(text) {
snippet += "..."
}
}
return highlightTerms(snippet, q)
}
func matchesQuery(content, q string) bool {
terms := splitSearchTerms(q)
if len(terms) == 0 {
return true
}
content = strings.ToLower(content)
for _, term := range terms {
if strings.Contains(content, term) {
return true
}
}
return false
}
func joinNonEmpty(sep string, parts ...string) string {
filtered := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
filtered = append(filtered, part)
}
}
return strings.Join(filtered, sep)
}
func discardFilteredResults(results []Result) []Result {
out := make([]Result, 0, len(results))
for _, r := range results {
if r.Score < 0 {
continue
}
out = append(out, r)
}
return out
}
func cardBookPath(userID, bookID string) string {
return "/remote.php/dav/addressbooks/users/" + userID + "/" + bookID + "/"
}
func calPath(userID, calID string) string {
return "/remote.php/dav/calendars/" + userID + "/" + calID + "/"
}