Improve cross-entity search with weighted ranking, prefix matching, and richer results

Backend: Use ts_rank_cd with A/D weights so name matches rank above description
matches. Switch to 'simple' text search config with :* prefix suffix for partial
word matching (e.g. "toma" finds "Tomate"). Include bilingual DE/EN fields in
search vectors. Cultivar search JOINs species so "tomato" finds tomato cultivars.
Return extra metadata: plant_layer, food_uses, species_name, is_organic, snippet.

Frontend: Show colored entity type badges (Familie/Art/Sorte). Display localized
common names, plant layer tags, food uses for species, species link + organic
badge for cultivars, and truncated description snippets. Add DE/EN i18n keys for
search result type labels.
This commit is contained in:
2026-03-17 21:26:02 +01:00
parent 00e26b3a84
commit 7546c3a9dc
6 changed files with 516 additions and 87 deletions
+156 -36
View File
@@ -12,81 +12,201 @@ pub struct SearchParams {
pub limit: Option<i64>,
}
/// Build a tsquery string with prefix matching from user input.
/// "tomato red" -> "tomato:* & red:*"
fn build_prefix_tsquery(input: &str) -> String {
input
.split_whitespace()
.filter(|w| !w.is_empty())
.map(|w| format!("{}:*", w.replace('\'', "").replace('\\', "")))
.collect::<Vec<_>>()
.join(" & ")
}
/// Truncate a string to roughly `max_chars`, breaking at word boundary.
fn truncate_snippet(s: &str, max_chars: usize) -> String {
if s.len() <= max_chars {
return s.to_string();
}
match s[..max_chars].rfind(' ') {
Some(pos) => format!("{}...", &s[..pos]),
None => format!("{}...", &s[..max_chars]),
}
}
pub async fn search(
State(state): State<AppState>,
Query(params): Query<SearchParams>,
) -> Result<Json<Vec<SearchResult>>> {
let limit = params.limit.unwrap_or(20).min(100);
let tsquery = params.q.split_whitespace().collect::<Vec<_>>().join(" & ");
let tsquery = build_prefix_tsquery(&params.q);
if tsquery.is_empty() {
return Ok(Json(Vec::new()));
}
// Search across families, species, cultivars
let mut results = Vec::new();
// Families
let families: Vec<(uuid::Uuid, String, String, Option<String>, f32)> = sqlx::query_as(
"SELECT id, slug, name_scientific, description,
ts_rank(to_tsvector('english', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'')),
to_tsquery('english', $1)) AS rank
// --- Families ---
// Weighted: A = names, D = description
let families: Vec<(
uuid::Uuid, String, String, Option<String>, Option<String>, Option<String>, f32,
)> = sqlx::query_as(
"SELECT id, slug, name_scientific, name_en, name_de, description,
ts_rank_cd(
setweight(to_tsvector('simple', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(description,'')), 'D'),
to_tsquery('simple', $1)
) AS rank
FROM families
WHERE to_tsvector('english', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,''))
@@ to_tsquery('english', $1)
ORDER BY rank DESC LIMIT $2"
WHERE (
setweight(to_tsvector('simple', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(description,'')), 'D')
) @@ to_tsquery('simple', $1)
ORDER BY rank DESC LIMIT $2",
)
.bind(&tsquery).bind(limit)
.bind(&tsquery)
.bind(limit)
.fetch_all(&state.pool)
.await?;
for (id, slug, name, desc, rank) in families {
for (id, slug, name, name_en, name_de, desc, rank) in families {
let snippet = desc.as_deref().map(|d| truncate_snippet(d, 160));
results.push(SearchResult {
entity_type: "family".to_string(),
id, slug, name,
id,
slug,
name,
name_de,
name_en,
description: desc,
snippet,
plant_layer: None,
food_uses: None,
species_name: None,
species_slug: None,
is_organic: None,
rank,
});
}
// Species
let species: Vec<(uuid::Uuid, String, String, Option<String>, f32)> = sqlx::query_as(
"SELECT id, slug, name_scientific, description,
ts_rank(to_tsvector('english', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'') || ' ' || coalesce(description,'')),
to_tsquery('english', $1)) AS rank
// --- Species ---
// Weighted: A = names, D = descriptions + food_uses
let species: Vec<(
uuid::Uuid,
String,
String,
Option<String>,
Option<String>,
Option<String>,
Option<String>,
Option<String>,
Option<String>,
Option<String>,
f32,
)> = sqlx::query_as(
"SELECT id, slug, name_scientific, name_en, name_de,
description, description_de, description_en,
plant_layer, food_uses,
ts_rank_cd(
setweight(to_tsvector('simple', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(description,'') || ' ' || coalesce(description_de,'') || ' ' || coalesce(description_en,'') || ' ' || coalesce(food_uses,'') || ' ' || coalesce(food_uses_de,'') || ' ' || coalesce(food_uses_en,'')), 'D'),
to_tsquery('simple', $1)
) AS rank
FROM species
WHERE to_tsvector('english', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'') || ' ' || coalesce(description,''))
@@ to_tsquery('english', $1)
ORDER BY rank DESC LIMIT $2"
WHERE (
setweight(to_tsvector('simple', coalesce(name_scientific,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(description,'') || ' ' || coalesce(description_de,'') || ' ' || coalesce(description_en,'') || ' ' || coalesce(food_uses,'') || ' ' || coalesce(food_uses_de,'') || ' ' || coalesce(food_uses_en,'')), 'D')
) @@ to_tsquery('simple', $1)
ORDER BY rank DESC LIMIT $2",
)
.bind(&tsquery).bind(limit)
.bind(&tsquery)
.bind(limit)
.fetch_all(&state.pool)
.await?;
for (id, slug, name, desc, rank) in species {
for (id, slug, name, name_en, name_de, desc, desc_de, desc_en, plant_layer, food_uses, rank) in
species
{
// Build snippet from whichever description is available
let snippet_text = desc
.as_deref()
.or(desc_en.as_deref())
.or(desc_de.as_deref());
let snippet = snippet_text.map(|d| truncate_snippet(d, 160));
results.push(SearchResult {
entity_type: "species".to_string(),
id, slug, name,
id,
slug,
name,
name_de,
name_en,
description: desc,
snippet,
plant_layer,
food_uses,
species_name: None,
species_slug: None,
is_organic: None,
rank,
});
}
// Cultivars
let cultivars: Vec<(uuid::Uuid, String, String, Option<String>, f32)> = sqlx::query_as(
"SELECT id, slug, name, description,
ts_rank(to_tsvector('english', coalesce(name,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'') || ' ' || coalesce(name_scientific,'') || ' ' || coalesce(description,'')),
to_tsquery('english', $1)) AS rank
FROM cultivars
WHERE to_tsvector('english', coalesce(name,'') || ' ' || coalesce(name_en,'') || ' ' || coalesce(name_de,'') || ' ' || coalesce(name_scientific,'') || ' ' || coalesce(description,''))
@@ to_tsquery('english', $1)
ORDER BY rank DESC LIMIT $2"
// --- Cultivars ---
// Join species so that searching "tomato" finds tomato cultivars.
// Weighted: A = cultivar names + species names, D = descriptions
let cultivars: Vec<(
uuid::Uuid,
String,
String,
Option<String>,
Option<String>,
Option<String>,
bool,
String,
String,
f32,
)> = sqlx::query_as(
"SELECT c.id, c.slug, c.name, c.name_en, c.name_de, c.description, c.is_organic,
s.name_scientific AS species_name, s.slug AS species_slug,
ts_rank_cd(
setweight(to_tsvector('simple', coalesce(c.name,'') || ' ' || coalesce(c.name_en,'') || ' ' || coalesce(c.name_de,'') || ' ' || coalesce(c.name_scientific,'')
|| ' ' || coalesce(s.name_scientific,'') || ' ' || coalesce(s.name_en,'') || ' ' || coalesce(s.name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(c.description,'') || ' ' || coalesce(c.description_de,'') || ' ' || coalesce(c.description_en,'')), 'D'),
to_tsquery('simple', $1)
) AS rank
FROM cultivars c
JOIN species s ON c.species_id = s.id
WHERE (
setweight(to_tsvector('simple', coalesce(c.name,'') || ' ' || coalesce(c.name_en,'') || ' ' || coalesce(c.name_de,'') || ' ' || coalesce(c.name_scientific,'')
|| ' ' || coalesce(s.name_scientific,'') || ' ' || coalesce(s.name_en,'') || ' ' || coalesce(s.name_de,'')), 'A')
|| setweight(to_tsvector('simple', coalesce(c.description,'') || ' ' || coalesce(c.description_de,'') || ' ' || coalesce(c.description_en,'')), 'D')
) @@ to_tsquery('simple', $1)
ORDER BY rank DESC LIMIT $2",
)
.bind(&tsquery).bind(limit)
.bind(&tsquery)
.bind(limit)
.fetch_all(&state.pool)
.await?;
for (id, slug, name, desc, rank) in cultivars {
for (id, slug, name, name_en, name_de, desc, is_organic, species_name, species_slug, rank) in
cultivars
{
let snippet = desc.as_deref().map(|d| truncate_snippet(d, 160));
results.push(SearchResult {
entity_type: "cultivar".to_string(),
id, slug, name,
id,
slug,
name,
name_de,
name_en,
description: desc,
snippet,
plant_layer: None,
food_uses: None,
species_name: Some(species_name),
species_slug: Some(species_slug),
is_organic: Some(is_organic),
rank,
});
}
+8
View File
@@ -505,6 +505,14 @@ pub struct SearchResult {
pub id: Uuid,
pub slug: String,
pub name: String,
pub name_de: Option<String>,
pub name_en: Option<String>,
pub description: Option<String>,
pub snippet: Option<String>,
pub plant_layer: Option<String>,
pub food_uses: Option<String>,
pub species_name: Option<String>,
pub species_slug: Option<String>,
pub is_organic: Option<bool>,
pub rank: f32,
}