Add scraper and enrichment scripts to tools/ directory

2026-03-16 11:10:18 +01:00
parent 83ab8c4cf9
commit 0ef902cc91
13 changed files with 6031 additions and 0 deletions
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Enrich HerbAPI species with Wikidata QID, GBIF ID, and EPPO code."""
+
+import json
+import time
+import urllib.parse
+import urllib.request
+
+HERBAPI_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+HERBAPI_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
+
+HEADERS_WD = {
+    "User-Agent": "HerbAPI-Enrichment/1.0 (florian.berthold@sub-net.at)",
+    "Accept": "application/json",
+}
+
+
+def herbapi_request(path, method="GET", data=None):
+    url = f"{HERBAPI_BASE}{path}"
+    body = json.dumps(data).encode() if data else None
+    req = urllib.request.Request(url, data=body, method=method, headers={
+        "Authorization": f"Bearer {HERBAPI_TOKEN}",
+        "Content-Type": "application/json",
+    })
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def query_wikidata_batch(names):
+    """Query Wikidata for a batch of scientific names."""
+    values = " ".join(f'"{n}"' for n in names)
+    sparql = f"""SELECT ?name ?item ?gbifId ?eppoCode WHERE {{
+  VALUES ?name {{ {values} }}
+  ?item wdt:P225 ?name .
+  OPTIONAL {{ ?item wdt:P846 ?gbifId }}
+  OPTIONAL {{ ?item wdt:P3031 ?eppoCode }}
+}}"""
+    encoded = urllib.parse.quote(sparql)
+    url = f"{WIKIDATA_SPARQL}?query={encoded}&format=json"
+    req = urllib.request.Request(url, headers=HEADERS_WD)
+    with urllib.request.urlopen(req, timeout=60) as resp:
+        data = json.loads(resp.read())
+
+    results = {}
+    for binding in data.get("results", {}).get("bindings", []):
+        name = binding["name"]["value"]
+        qid_url = binding["item"]["value"]
+        qid = qid_url.rsplit("/", 1)[-1]
+        gbif = binding.get("gbifId", {}).get("value")
+        eppo = binding.get("eppoCode", {}).get("value")
+        results[name] = {"qid": qid, "gbif_id": gbif, "eppo_code": eppo}
+    return results
+
+
+def main():
+    # 1. Fetch all species
+    resp = herbapi_request("/species?per_page=200")
+    species_list = resp["data"]
+    print(f"Fetched {len(species_list)} species from HerbAPI\n")
+
+    # 2. Collect species needing enrichment
+    to_enrich = [sp for sp in species_list
+                 if not sp["wikidata_qid"] or not sp["gbif_id"] or not sp["eppo_code"]]
+
+    if not to_enrich:
+        print("All species already enriched.")
+        return
+
+    print(f"{len(to_enrich)} species need enrichment\n")
+
+    # 3. Batch query Wikidata
+    BATCH_SIZE = 20
+    wikidata_results = {}
+    names = [sp["name_scientific"] for sp in to_enrich]
+
+    for i in range(0, len(names), BATCH_SIZE):
+        batch = names[i:i + BATCH_SIZE]
+        print(f"Querying Wikidata batch {i // BATCH_SIZE + 1}: {len(batch)} species...")
+        try:
+            results = query_wikidata_batch(batch)
+            wikidata_results.update(results)
+            print(f"  Got {len(results)} matches")
+        except Exception as e:
+            print(f"  ERROR: {e}")
+        if i + BATCH_SIZE < len(names):
+            time.sleep(2)
+
+    print(f"\nWikidata returned data for {len(wikidata_results)} / {len(names)} species\n")
+
+    # 4. Update HerbAPI - GET full object by slug, merge, PUT by UUID
+    updated = 0
+    skipped = 0
+    not_found = 0
+    errors = 0
+
+    for sp in to_enrich:
+        name = sp["name_scientific"]
+        wd = wikidata_results.get(name)
+        if not wd:
+            print(f"  SKIP (no Wikidata match): {name}")
+            not_found += 1
+            continue
+
+        # Check what needs updating
+        needs_qid = not sp["wikidata_qid"] and wd["qid"]
+        needs_gbif = not sp["gbif_id"] and wd["gbif_id"]
+        needs_eppo = not sp["eppo_code"] and wd["eppo_code"]
+
+        if not (needs_qid or needs_gbif or needs_eppo):
+            print(f"  SKIP (nothing new): {name}")
+            skipped += 1
+            continue
+
+        try:
+            # GET full species by slug for the complete object
+            full_sp = herbapi_request(f"/species/{sp['slug']}")
+
+            # Remove read-only fields
+            species_id = full_sp.pop("id")
+            full_sp.pop("slug", None)
+            full_sp.pop("created_at", None)
+            full_sp.pop("updated_at", None)
+
+            # Merge new data (only null fields)
+            if needs_qid:
+                full_sp["wikidata_qid"] = wd["qid"]
+            if needs_gbif:
+                full_sp["gbif_id"] = str(wd["gbif_id"])  # API expects string
+            if needs_eppo:
+                full_sp["eppo_code"] = wd["eppo_code"]
+
+            # PUT by UUID
+            herbapi_request(f"/species/{species_id}", method="PUT", data=full_sp)
+
+            fields = []
+            if needs_qid: fields.append(f"qid={wd['qid']}")
+            if needs_gbif: fields.append(f"gbif={wd['gbif_id']}")
+            if needs_eppo: fields.append(f"eppo={wd['eppo_code']}")
+            print(f"  UPDATED: {name} -> {', '.join(fields)}")
+            updated += 1
+        except Exception as e:
+            print(f"  ERROR updating {name}: {e}")
+            errors += 1
+
+    print(f"\n{'=' * 60}")
+    print(f"RESULTS:")
+    print(f"  Updated:               {updated}")
+    print(f"  Skipped (no new data): {skipped}")
+    print(f"  Not found on Wikidata: {not_found}")
+    print(f"  Errors:                {errors}")
+    print(f"  Total species:         {len(species_list)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,305 @@
+#!/usr/bin/env python3
+"""Expand HerbAPI species database with common permaculture/garden species."""
+
+import json
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+import ssl
+
+BASE_URL = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+AUTH = "Bearer km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+DELAY = 0.15
+
+# SSL context for GBIF (https)
+ssl_ctx = ssl.create_default_context()
+
+
+def api_get(path):
+    req = urllib.request.Request(f"{BASE_URL}{path}", headers={"Authorization": AUTH})
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path, data):
+    body = json.dumps(data).encode()
+    req = urllib.request.Request(
+        f"{BASE_URL}{path}",
+        data=body,
+        headers={"Authorization": AUTH, "Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req) as resp:
+            return json.loads(resp.read()), resp.status
+    except urllib.error.HTTPError as e:
+        err_body = e.read().decode()
+        print(f"  ERROR {e.code}: {err_body}")
+        return None, e.code
+
+
+def gbif_get_german_name(scientific_name):
+    """Query GBIF for the German vernacular name."""
+    try:
+        url = f"https://api.gbif.org/v1/species/match?name={urllib.parse.quote(scientific_name)}"
+        req = urllib.request.Request(url)
+        with urllib.request.urlopen(req, context=ssl_ctx, timeout=10) as resp:
+            match = json.loads(resp.read())
+
+        usage_key = match.get("usageKey")
+        if not usage_key:
+            return None
+
+        url2 = f"https://api.gbif.org/v1/species/{usage_key}/vernacularNames?limit=100"
+        req2 = urllib.request.Request(url2)
+        with urllib.request.urlopen(req2, context=ssl_ctx, timeout=10) as resp:
+            vn = json.loads(resp.read())
+
+        for r in vn.get("results", []):
+            if r.get("language") == "deu":
+                return r["vernacularName"]
+        return None
+    except Exception as e:
+        print(f"  GBIF lookup failed for {scientific_name}: {e}")
+        return None
+
+
+# ── Families to ensure exist ─────────────────────────────────────────
+FAMILIES_NEEDED = {
+    "Fabaceae":        {"name_en": "Legumes", "name_de": "Hülsenfrüchtler"},
+    "Solanaceae":      {"name_en": "Nightshade family", "name_de": "Nachtschattengewächse"},
+    "Cucurbitaceae":   {"name_en": "Gourd family", "name_de": "Kürbisgewächse"},
+    "Asteraceae":      {"name_en": "Daisy family", "name_de": "Korbblütler"},
+    "Chenopodiaceae":  {"name_en": "Goosefoot family", "name_de": "Gänsefußgewächse"},
+    "Brassicaceae":    {"name_en": "Cabbage family", "name_de": "Kreuzblütler"},
+    "Amaryllidaceae":  {"name_en": "Amaryllis family", "name_de": "Amaryllisgewächse"},
+    "Apiaceae":        {"name_en": "Carrot family", "name_de": "Doldenblütler"},
+    "Poaceae":         {"name_en": "Grass family", "name_de": "Süßgräser"},
+    "Lamiaceae":       {"name_en": "Mint family", "name_de": "Lippenblütler"},
+    "Caprifoliaceae":  {"name_en": "Honeysuckle family", "name_de": "Geißblattgewächse"},
+    "Rosaceae":        {"name_en": "Rose family", "name_de": "Rosengewächse"},
+    "Grossulariaceae": {"name_en": "Gooseberry family", "name_de": "Stachelbeergewächse"},
+    "Ericaceae":       {"name_en": "Heath family", "name_de": "Heidekrautgewächse"},
+    "Moraceae":        {"name_en": "Mulberry family", "name_de": "Maulbeergewächse"},
+    # New families not yet in the DB:
+    "Hypericaceae":    {"name_en": "St John's wort family", "name_de": "Johanniskrautgewächse"},
+    "Tropaeolaceae":   {"name_en": "Nasturtium family", "name_de": "Kapuzinerkressengewächse"},
+    "Elaeagnaceae":    {"name_en": "Oleaster family", "name_de": "Ölweidengewächse"},
+}
+
+# ── Species to add ───────────────────────────────────────────────────
+# Format: (scientific_name, family, name_en, name_de, plant_layer, extra_fields)
+SPECIES = [
+    # Vegetables
+    ("Phaseolus vulgaris", "Fabaceae", "common bean", "Gartenbohne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds)"}),
+    ("Phaseolus coccineus", "Fabaceae", "runner bean", "Feuerbohne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds), flowers", "attracts_pollinators": True}),
+    ("Pisum sativum", "Fabaceae", "pea", "Erbse", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Peas, shoots"}),
+    ("Capsicum annuum", "Solanaceae", "pepper", "Paprika", "herbaceous",
+     {"food_uses": "Fruit"}),
+    ("Cucumis sativus", "Cucurbitaceae", "cucumber", "Gurke", "ground_cover",
+     {"food_uses": "Fruit"}),
+    ("Cucurbita maxima", "Cucurbitaceae", "winter squash", "Riesenkürbis", "ground_cover",
+     {"food_uses": "Fruit, seeds, flowers"}),
+    ("Cucurbita moschata", "Cucurbitaceae", "butternut squash", "Moschuskürbis", "ground_cover",
+     {"food_uses": "Fruit, seeds"}),
+    ("Lactuca sativa", "Asteraceae", "lettuce", "Salat", "herbaceous",
+     {"food_uses": "Leaves"}),
+    ("Spinacia oleracea", "Chenopodiaceae", "spinach", "Spinat", "herbaceous",
+     {"food_uses": "Leaves"}),
+    ("Brassica oleracea", "Brassicaceae", "cabbage / kale", "Kohl", "herbaceous",
+     {"food_uses": "Leaves, flower buds, stems"}),
+    ("Brassica rapa", "Brassicaceae", "turnip", "Rübe", "herbaceous",
+     {"food_uses": "Root, leaves"}),
+    ("Raphanus sativus", "Brassicaceae", "radish", "Rettich", "herbaceous",
+     {"food_uses": "Root, leaves, seed pods"}),
+    ("Allium cepa", "Amaryllidaceae", "onion", "Zwiebel", "herbaceous",
+     {"food_uses": "Bulb, leaves"}),
+    ("Allium sativum", "Amaryllidaceae", "garlic", "Knoblauch", "herbaceous",
+     {"food_uses": "Bulb, scapes", "medicinal_uses": "Antimicrobial, cardiovascular"}),
+    ("Allium schoenoprasum", "Amaryllidaceae", "chives", "Schnittlauch", "herbaceous",
+     {"food_uses": "Leaves, flowers", "attracts_pollinators": True}),
+    ("Petroselinum crispum", "Apiaceae", "parsley", "Petersilie", "herbaceous",
+     {"food_uses": "Leaves, root"}),
+    ("Apium graveolens", "Apiaceae", "celery", "Sellerie", "herbaceous",
+     {"food_uses": "Stalks, root, leaves"}),
+    ("Foeniculum vulgare", "Apiaceae", "fennel", "Fenchel", "herbaceous",
+     {"food_uses": "Bulb, fronds, seeds", "attracts_beneficial_insects": True}),
+    ("Pastinaca sativa", "Apiaceae", "parsnip", "Pastinake", "herbaceous",
+     {"food_uses": "Root"}),
+    ("Zea mays", "Poaceae", "corn", "Mais", "herbaceous",
+     {"food_uses": "Kernels, cobs"}),
+    ("Solanum melongena", "Solanaceae", "eggplant", "Melanzani", "herbaceous",
+     {"food_uses": "Fruit"}),
+
+    # Herbs
+    ("Ocimum basilicum", "Lamiaceae", "basil", "Basilikum", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True}),
+    ("Origanum vulgare", "Lamiaceae", "oregano", "Oregano", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
+    ("Mentha x piperita", "Lamiaceae", "peppermint", "Pfefferminze", "herbaceous",
+     {"food_uses": "Leaves (tea, culinary)", "medicinal_uses": "Digestive, headache relief", "invasiveness": "spreading"}),
+    ("Rosmarinus officinalis", "Lamiaceae", "rosemary", "Rosmarin", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True}),
+    ("Anethum graveolens", "Apiaceae", "dill", "Dill", "herbaceous",
+     {"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
+    ("Coriandrum sativum", "Apiaceae", "coriander", "Koriander", "herbaceous",
+     {"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
+    ("Artemisia absinthium", "Asteraceae", "wormwood", "Wermut", "herbaceous",
+     {"medicinal_uses": "Digestive, anti-parasitic", "other_uses": "Companion plant pest deterrent", "allelopathic": True}),
+    ("Achillea millefolium", "Asteraceae", "yarrow", "Schafgarbe", "herbaceous",
+     {"food_uses": "Young leaves (salad)", "medicinal_uses": "Wound healing, anti-inflammatory",
+      "dynamic_accumulator": True, "dynamic_accumulator_nutrients": "K, P, Cu",
+      "attracts_beneficial_insects": True, "attracts_pollinators": True}),
+    ("Hypericum perforatum", "Hypericaceae", "St John's wort", "Johanniskraut", "herbaceous",
+     {"medicinal_uses": "Antidepressant, wound healing", "attracts_pollinators": True}),
+    ("Echinacea purpurea", "Asteraceae", "echinacea", "Sonnenhut", "herbaceous",
+     {"medicinal_uses": "Immune stimulant", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
+    ("Valeriana officinalis", "Caprifoliaceae", "valerian", "Baldrian", "herbaceous",
+     {"medicinal_uses": "Sedative, sleep aid", "attracts_pollinators": True,
+      "other_uses": "Earthworm attractant (biodynamic)"}),
+
+    # Flowers & cover crops
+    ("Tagetes patula", "Asteraceae", "French marigold", "Studentenblume", "herbaceous",
+     {"other_uses": "Nematode suppression, companion plant", "attracts_pollinators": True}),
+    ("Helianthus annuus", "Asteraceae", "sunflower", "Sonnenblume", "herbaceous",
+     {"food_uses": "Seeds, oil", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
+    ("Tropaeolum majus", "Tropaeolaceae", "nasturtium", "Kapuzinerkresse", "ground_cover",
+     {"food_uses": "Leaves, flowers, seeds (capers)", "other_uses": "Trap crop for aphids"}),
+    ("Centaurea cyanus", "Asteraceae", "cornflower", "Kornblume", "herbaceous",
+     {"food_uses": "Flowers (edible garnish)", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
+    ("Sinapis alba", "Brassicaceae", "white mustard", "Weißer Senf", "herbaceous",
+     {"food_uses": "Seeds, young leaves", "other_uses": "Green manure, biofumigant"}),
+    ("Trifolium repens", "Fabaceae", "white clover", "Weißklee", "ground_cover",
+     {"nitrogen_fixer": True, "food_uses": "Flowers (tea), young leaves",
+      "ground_cover_quality": "excellent", "attracts_pollinators": True}),
+    ("Medicago sativa", "Fabaceae", "alfalfa", "Luzerne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Sprouts",
+      "dynamic_accumulator": True, "dynamic_accumulator_nutrients": "N, K, Ca, Mg, Fe",
+      "other_uses": "Green manure, deep-rooting soil improver"}),
+
+    # Fruit / Trees
+    ("Prunus avium", "Rosaceae", "sweet cherry", "Süßkirsche", "canopy",
+     {"food_uses": "Fruit", "attracts_pollinators": True, "wildlife_value": "Fruit for birds"}),
+    ("Prunus cerasus", "Rosaceae", "sour cherry", "Sauerkirsche", "understory",
+     {"food_uses": "Fruit (cooking, preserves)", "attracts_pollinators": True}),
+    ("Pyrus communis", "Rosaceae", "pear", "Birne", "canopy",
+     {"food_uses": "Fruit", "attracts_pollinators": True}),
+    ("Ribes uva-crispa", "Grossulariaceae", "gooseberry", "Stachelbeere", "shrub",
+     {"food_uses": "Berries"}),
+    ("Rubus fruticosus", "Rosaceae", "blackberry", "Brombeere", "shrub",
+     {"food_uses": "Berries, leaves (tea)", "attracts_pollinators": True,
+      "wildlife_value": "Berries for birds, nesting habitat", "invasiveness": "spreading"}),
+    ("Vaccinium myrtillus", "Ericaceae", "bilberry", "Heidelbeere", "shrub",
+     {"food_uses": "Berries", "medicinal_uses": "Antioxidant, eye health"}),
+    ("Hippophae rhamnoides", "Elaeagnaceae", "sea buckthorn", "Sanddorn", "shrub",
+     {"nitrogen_fixer": True, "food_uses": "Berries (juice, oil)",
+      "medicinal_uses": "High vitamin C, skin care",
+      "other_uses": "Erosion control, windbreak"}),
+    ("Morus alba", "Moraceae", "white mulberry", "Weiße Maulbeere", "canopy",
+     {"food_uses": "Fruit, young leaves", "wildlife_value": "Fruit for birds"}),
+]
+
+
+def main():
+    # 1. Load existing families
+    print("=== Loading existing families ===")
+    fam_resp = api_get("/families?per_page=100")
+    family_map = {}  # name_scientific -> id
+    for f in fam_resp["data"]:
+        family_map[f["name_scientific"]] = f["id"]
+    print(f"  Found {len(family_map)} existing families")
+
+    # 2. Create missing families
+    print("\n=== Creating missing families ===")
+    families_created = 0
+    for fam_name, fam_info in FAMILIES_NEEDED.items():
+        if fam_name in family_map:
+            print(f"  SKIP (exists): {fam_name}")
+            continue
+        payload = {
+            "name_scientific": fam_name,
+            "name_en": fam_info["name_en"],
+            "name_de": fam_info["name_de"],
+        }
+        print(f"  CREATE: {fam_name} ...", end=" ")
+        result, status = api_post("/families", payload)
+        if result and "id" in result:
+            family_map[fam_name] = result["id"]
+            print(f"OK ({result['id']})")
+            families_created += 1
+        else:
+            print(f"FAILED (status={status})")
+        time.sleep(DELAY)
+
+    print(f"\n  Families created: {families_created}")
+
+    # 3. Load existing species
+    print("\n=== Loading existing species ===")
+    sp_resp = api_get("/species?per_page=200")
+    existing_species = set()
+    for s in sp_resp["data"]:
+        existing_species.add(s["name_scientific"])
+    print(f"  Found {len(existing_species)} existing species")
+
+    # 4. Add new species
+    print("\n=== Adding new species ===")
+    created = 0
+    skipped = 0
+    failed = 0
+
+    for sci_name, family, name_en, name_de, plant_layer, extras in SPECIES:
+        if sci_name in existing_species:
+            print(f"  SKIP (exists): {sci_name}")
+            skipped += 1
+            continue
+
+        # Look up family ID
+        fam_id = family_map.get(family)
+        if not fam_id:
+            print(f"  SKIP (no family '{family}'): {sci_name}")
+            failed += 1
+            continue
+
+        # Try GBIF for German name
+        gbif_de = gbif_get_german_name(sci_name)
+        if gbif_de:
+            print(f"  GBIF name for {sci_name}: {gbif_de}")
+            # Use GBIF name if it differs (prefer catalog name as primary, GBIF as validation)
+            # Keep our curated name_de but log the GBIF one
+
+        payload = {
+            "name_scientific": sci_name,
+            "family_id": fam_id,
+            "name_en": name_en,
+            "name_de": name_de,
+            "plant_layer": plant_layer,
+        }
+        # Add extra fields
+        for k, v in extras.items():
+            payload[k] = v
+
+        print(f"  CREATE: {sci_name} ({name_de}) ...", end=" ")
+        result, status = api_post("/species", payload)
+        if result and "id" in result:
+            print(f"OK ({result['id']})")
+            created += 1
+        else:
+            print(f"FAILED (status={status})")
+            failed += 1
+        time.sleep(DELAY)
+
+    print(f"\n{'='*50}")
+    print(f"SUMMARY")
+    print(f"  Families created: {families_created}")
+    print(f"  Species created:  {created}")
+    print(f"  Species skipped:  {skipped}")
+    print(f"  Species failed:   {failed}")
+    print(f"  Total species now: {len(existing_species) + created}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+"""Import CC-licensed plant images from Wikimedia Commons via Wikidata into HerbAPI."""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+import urllib.parse
+import urllib.request
+
+# Force unbuffered output
+sys.stdout.reconfigure(line_buffering=True)
+sys.stderr.reconfigure(line_buffering=True)
+
+# --- Configuration ---
+S3_ENDPOINT = "http://garage.sub-net.at:3900"
+S3_BUCKET = "herbapi"
+S3_ACCESS_KEY = "GK1a89859373a6ac56bf11958f"
+S3_SECRET_KEY = "bea45a333b5c7b1efdd7466bdbcac54d8642fa19f0c617ca2fd64bd07951b899"
+S3_REGION = "garage"
+
+DB_HOST = "10.31.3.90"
+DB_USER = "herbapi"
+DB_PASS = "_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj"
+DB_NAME = "herbapi"
+
+USER_AGENT = "HerbAPI/1.0 (https://herbapi.naturalised.at; florian.berthold@sub-net.at)"
+THUMB_WIDTH = 800
+REQUEST_DELAY = 0.3
+
+ALLOWED_LICENSES = {
+    "cc0", "cc-zero", "cc0 1.0", "cc-zero 1.0",
+    "public domain", "pd", "pd-self", "pd-old", "pd-old-auto", "pd-old-100",
+    "pd-us", "pd-usgov", "pd-author",
+    "cc by 1.0", "cc by 2.0", "cc by 2.5", "cc by 3.0", "cc by 4.0",
+    "cc-by-1.0", "cc-by-2.0", "cc-by-2.5", "cc-by-3.0", "cc-by-4.0",
+    "cc by-sa 1.0", "cc by-sa 2.0", "cc by-sa 2.5", "cc by-sa 3.0", "cc by-sa 4.0",
+    "cc-by-sa-1.0", "cc-by-sa-2.0", "cc-by-sa-2.5", "cc-by-sa-3.0", "cc-by-sa-4.0",
+}
+
+
+def slugify(name: str) -> str:
+    """Convert scientific name to a URL-safe slug."""
+    return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-')
+
+
+def psql(query: str) -> str:
+    """Run a psql query and return output."""
+    env = os.environ.copy()
+    env["PGPASSWORD"] = DB_PASS
+    result = subprocess.run(
+        ["psql", "-h", DB_HOST, "-U", DB_USER, DB_NAME, "-t", "-A", "-c", query],
+        capture_output=True, text=True, env=env
+    )
+    if result.returncode != 0:
+        print(f"  psql error: {result.stderr.strip()}", file=sys.stderr)
+    return result.stdout.strip()
+
+
+def fetch_json(url: str) -> dict | None:
+    """Fetch JSON from a URL with proper User-Agent."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read())
+    except Exception as e:
+        print(f"  HTTP error fetching {url}: {e}")
+        return None
+
+
+def get_wikidata_image(qid: str) -> str | None:
+    """Query Wikidata SPARQL for P18 image filename."""
+    sparql = f"SELECT ?image WHERE {{ wd:{qid} wdt:P18 ?image }} LIMIT 1"
+    url = "https://query.wikidata.org/sparql?" + urllib.parse.urlencode({
+        "query": sparql, "format": "json"
+    })
+    data = fetch_json(url)
+    if not data:
+        return None
+    bindings = data.get("results", {}).get("bindings", [])
+    if not bindings:
+        return None
+    image_url = bindings[0]["image"]["value"]
+    # URL like http://commons.wikimedia.org/wiki/Special:FilePath/Filename.jpg
+    filename = urllib.parse.unquote(image_url.rsplit("/", 1)[-1])
+    return filename
+
+
+def get_commons_info(filename: str) -> dict | None:
+    """Get image info from Wikimedia Commons API."""
+    url = "https://commons.wikimedia.org/w/api.php?" + urllib.parse.urlencode({
+        "action": "query",
+        "titles": f"File:{filename}",
+        "prop": "imageinfo",
+        "iiprop": "url|extmetadata",
+        "iiurlwidth": str(THUMB_WIDTH),
+        "format": "json",
+    })
+    data = fetch_json(url)
+    if not data:
+        return None
+    pages = data.get("query", {}).get("pages", {})
+    for page_id, page in pages.items():
+        if page_id == "-1":
+            return None
+        imageinfo = page.get("imageinfo", [])
+        if not imageinfo:
+            return None
+        info = imageinfo[0]
+        meta = info.get("extmetadata", {})
+
+        thumb_url = info.get("thumburl") or info.get("url")
+        desc_url = info.get("descriptionurl", "")
+
+        license_short = meta.get("LicenseShortName", {}).get("value", "")
+        artist_html = meta.get("Artist", {}).get("value", "")
+        # Strip HTML tags from artist
+        artist = re.sub(r'<[^>]+>', '', artist_html).strip()
+        # Clean up whitespace
+        artist = re.sub(r'\s+', ' ', artist)
+
+        return {
+            "thumb_url": thumb_url,
+            "description_url": desc_url,
+            "license": license_short,
+            "artist": artist,
+            "filename": filename,
+        }
+    return None
+
+
+def is_license_allowed(license_str: str) -> bool:
+    """Check if a license is in our allowed list."""
+    normalized = license_str.lower().strip()
+    # Direct match
+    if normalized in ALLOWED_LICENSES:
+        return True
+    # Check for NC or ND
+    if "nc" in normalized or "nd" in normalized:
+        return False
+    # Check patterns
+    if normalized.startswith("public domain") or normalized.startswith("pd"):
+        return True
+    if re.match(r'^cc[- ]?by[- ]?sa[- ]?\d', normalized):
+        return True
+    if re.match(r'^cc[- ]?by[- ]?\d', normalized):
+        return True
+    if re.match(r'^cc[- ]?0', normalized) or normalized == "cc zero":
+        return True
+    return False
+
+
+def normalize_license(license_str: str) -> str:
+    """Normalize license string for storage."""
+    low = license_str.lower().strip()
+    if "public domain" in low or low.startswith("pd"):
+        return "Public domain"
+    if re.match(r'^cc[- ]?0', low) or "cc-zero" in low or "cc zero" in low:
+        return "CC0 1.0"
+    # CC BY-SA X.0
+    m = re.match(r'^cc[- ]?by[- ]?sa[- ]?(\d+\.?\d*)', low)
+    if m:
+        return f"CC BY-SA {m.group(1)}"
+    # CC BY X.0
+    m = re.match(r'^cc[- ]?by[- ]?(\d+\.?\d*)', low)
+    if m:
+        return f"CC BY {m.group(1)}"
+    return license_str
+
+
+def s3_upload(s3_key: str, data: bytes, content_type: str = "image/jpeg"):
+    """Upload to S3 Garage using AWS CLI."""
+    tmp_path = "/tmp/_herbapi_upload_tmp_file_file"
+    with open(tmp_path, "wb") as f:
+        f.write(data)
+
+    env = os.environ.copy()
+    env["AWS_ACCESS_KEY_ID"] = S3_ACCESS_KEY
+    env["AWS_SECRET_ACCESS_KEY"] = S3_SECRET_KEY
+    env["AWS_DEFAULT_REGION"] = S3_REGION
+
+    result = subprocess.run(
+        [
+            "aws", "s3", "cp", tmp_path,
+            f"s3://{S3_BUCKET}/{s3_key}",
+            "--endpoint-url", S3_ENDPOINT,
+            "--content-type", content_type,
+        ],
+        capture_output=True, text=True, env=env
+    )
+    os.unlink(tmp_path)
+    if result.returncode != 0:
+        raise RuntimeError(f"S3 upload failed: {result.stderr.strip()}")
+
+
+def download_image(url: str) -> bytes | None:
+    """Download image data from URL."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            return resp.read()
+    except Exception as e:
+        print(f"  Download error: {e}")
+        return None
+
+
+def main():
+    # 1. Get species
+    rows = psql(
+        "SELECT id, name_scientific, wikidata_qid FROM species "
+        "WHERE wikidata_qid IS NOT NULL AND wikidata_qid <> '' "
+        "ORDER BY name_scientific"
+    )
+    if not rows:
+        print("No species with wikidata_qid found.")
+        return
+
+    species_list = []
+    for line in rows.split("\n"):
+        parts = line.split("|")
+        if len(parts) == 3:
+            species_list.append({
+                "id": parts[0],
+                "name": parts[1],
+                "qid": parts[2],
+            })
+
+    print(f"Found {len(species_list)} species with Wikidata QIDs.")
+
+    # 2. Get existing images
+    existing = set()
+    existing_rows = psql("SELECT entity_id FROM images WHERE entity_type = 'species'")
+    if existing_rows:
+        for line in existing_rows.split("\n"):
+            line = line.strip()
+            if line:
+                existing.add(line)
+
+    print(f"Found {len(existing)} species that already have images.")
+
+    imported = 0
+    skipped_existing = 0
+    skipped_no_image = 0
+    skipped_license = 0
+    skipped_download = 0
+    errors = 0
+
+    for i, sp in enumerate(species_list):
+        name = sp["name"]
+        qid = sp["qid"]
+        sp_id = sp["id"]
+        slug = slugify(name)
+
+        print(f"\n[{i+1}/{len(species_list)}] {name} ({qid})")
+
+        if sp_id in existing:
+            print("  Already has image, skipping.")
+            skipped_existing += 1
+            continue
+
+        # Query Wikidata for image
+        time.sleep(REQUEST_DELAY)
+        filename = get_wikidata_image(qid)
+        if not filename:
+            print("  No image on Wikidata.")
+            skipped_no_image += 1
+            continue
+
+        # Get Commons info
+        time.sleep(REQUEST_DELAY)
+        info = get_commons_info(filename)
+        if not info:
+            print(f"  Could not get Commons info for {filename}")
+            skipped_no_image += 1
+            continue
+
+        # Check license
+        raw_license = info["license"]
+        if not is_license_allowed(raw_license):
+            print(f"  License not allowed: {raw_license}")
+            skipped_license += 1
+            continue
+
+        norm_license = normalize_license(raw_license)
+        artist = info["artist"]
+        thumb_url = info["thumb_url"]
+        desc_url = info["description_url"]
+
+        print(f"  License: {raw_license} -> {norm_license}")
+        print(f"  Artist: {artist[:80]}")
+        print(f"  Thumbnail: {thumb_url[:100]}...")
+
+        # Download image
+        time.sleep(REQUEST_DELAY)
+        image_data = download_image(thumb_url)
+        if not image_data:
+            print("  Failed to download image.")
+            skipped_download += 1
+            continue
+
+        print(f"  Downloaded {len(image_data)} bytes")
+
+        # Determine file extension from URL
+        ext = "jpg"
+        if ".png" in thumb_url.lower():
+            ext = "png"
+        elif ".svg" in thumb_url.lower():
+            ext = "svg"
+        elif ".gif" in thumb_url.lower():
+            ext = "gif"
+
+        s3_key = f"species/{slug}.{ext}"
+        content_type = {
+            "jpg": "image/jpeg",
+            "png": "image/png",
+            "svg": "image/svg+xml",
+            "gif": "image/gif",
+        }.get(ext, "image/jpeg")
+
+        # Upload to S3
+        try:
+            s3_upload(s3_key, image_data, content_type)
+            print(f"  Uploaded to s3://{S3_BUCKET}/{s3_key}")
+        except RuntimeError as e:
+            print(f"  S3 upload failed: {e}")
+            errors += 1
+            continue
+
+        # Insert into database
+        caption = f"Photo: {artist}" if artist else "Wikimedia Commons"
+        # Escape single quotes for SQL
+        caption_esc = caption.replace("'", "''")
+        desc_url_esc = desc_url.replace("'", "''")
+        norm_license_esc = norm_license.replace("'", "''")
+        s3_key_esc = s3_key.replace("'", "''")
+
+        insert_sql = (
+            f"INSERT INTO images (id, entity_type, entity_id, s3_key, caption, source_url, license, is_primary) "
+            f"VALUES (gen_random_uuid(), 'species', '{sp_id}', '{s3_key_esc}', "
+            f"'{caption_esc}', '{desc_url_esc}', '{norm_license_esc}', true)"
+        )
+
+        result = psql(insert_sql)
+        # psql returns empty on success for INSERT
+        print(f"  Inserted into images table.")
+        imported += 1
+
+    print(f"\n{'='*60}")
+    print(f"DONE!")
+    print(f"  Imported:          {imported}")
+    print(f"  Skipped (existing):{skipped_existing}")
+    print(f"  Skipped (no image):{skipped_no_image}")
+    print(f"  Skipped (license): {skipped_license}")
+    print(f"  Skipped (download):{skipped_download}")
+    print(f"  Errors:            {errors}")
+    print(f"  Total processed:   {len(species_list)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""Import CC-licensed plant images from Wikimedia Commons into HerbAPI."""
+
+import hashlib
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+import urllib.parse
+import urllib.request
+
+# Config
+DB_HOST = "10.31.3.90"
+DB_USER = "herbapi"
+DB_PASS = "_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj"
+DB_NAME = "herbapi"
+S3_BUCKET = "herbapi"
+S3_ENDPOINT = "http://10.31.3.170:3900"
+USER_AGENT = "HerbAPI/1.0 (https://herbapi.naturalised.at; florian.berthold@sub-net.at)"
+REQUEST_DELAY = 0.3
+
+# AWS env for subprocess calls
+AWS_ENV = {
+    **os.environ,
+    "AWS_ACCESS_KEY_ID": "GK1a89859373a6ac56bf11958f",
+    "AWS_SECRET_ACCESS_KEY": "bea45a333b5c7b1efdd7466bdbcac54d8642fa19f0c617ca2fd64bd07951b899",
+    "AWS_DEFAULT_REGION": "garage",
+}
+
+# Stats
+stats = {"total": 0, "imported": 0, "no_p18": 0, "bad_license": 0, "download_fail": 0, "upload_fail": 0, "errors": 0}
+
+
+def fetch_url(url):
+    """Fetch URL with custom User-Agent."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read()
+
+
+def fetch_json(url):
+    """Fetch URL and parse JSON."""
+    return json.loads(fetch_url(url))
+
+
+def psql(sql):
+    """Run psql command and return output."""
+    result = subprocess.run(
+        ["psql", "-h", DB_HOST, "-U", DB_USER, DB_NAME, "-t", "-A", "-c", sql],
+        capture_output=True, text=True,
+        env={**os.environ, "PGPASSWORD": DB_PASS},
+    )
+    return result.stdout.strip()
+
+
+def is_license_allowed(license_str):
+    """Check if license is CC0/CC-BY/CC-BY-SA or Public Domain.
+    Wikimedia returns things like 'CC BY-SA 3.0', 'CC BY 4.0', 'CC0', 'Public domain'.
+    We allow CC0, Public Domain, CC BY (any version), CC BY-SA (any version).
+    We reject: GFDL, CC BY-NC, CC BY-ND, CC BY-NC-SA, CC BY-NC-ND, FAL, Copyrighted free use.
+    """
+    if not license_str:
+        return False
+    ls = license_str.lower().strip()
+
+    # Reject NC and ND explicitly first
+    if "nc" in ls.split() or "-nc" in ls or "nd" in ls.split() or "-nd" in ls:
+        return False
+
+    # Public domain / CC0
+    if ls in ("cc0", "cc-zero", "cc0 1.0", "cc0 1.0 universal"):
+        return True
+    if "public domain" in ls or ls.startswith("pd"):
+        return True
+
+    # CC BY-SA (any version, any jurisdiction)
+    if re.match(r"cc\s+by-sa\b", ls):
+        return True
+
+    # CC BY (any version, any jurisdiction) -- but NOT CC BY-NC or CC BY-ND
+    if re.match(r"cc\s+by\b", ls):
+        return True
+
+    return False
+
+
+def get_wikidata_image(qid):
+    """Query Wikidata SPARQL for P18 image filename."""
+    sparql = f"SELECT ?image WHERE {{ wd:{qid} wdt:P18 ?image }} LIMIT 1"
+    url = f"https://query.wikidata.org/sparql?query={urllib.parse.quote(sparql)}&format=json"
+    data = fetch_json(url)
+    bindings = data.get("results", {}).get("bindings", [])
+    if not bindings:
+        return None
+    image_url = bindings[0]["image"]["value"]
+    # Extract filename from commons URL
+    filename = urllib.parse.unquote(image_url.split("/")[-1])
+    return filename
+
+
+def get_commons_info(filename):
+    """Get image info from Commons API: license, artist, thumbnail URL."""
+    title = f"File:{filename}"
+    url = (
+        f"https://commons.wikimedia.org/w/api.php?action=query"
+        f"&titles={urllib.parse.quote(title)}"
+        f"&prop=imageinfo&iiprop=url|extmetadata"
+        f"&iiurlwidth=800&format=json"
+    )
+    data = fetch_json(url)
+    pages = data.get("query", {}).get("pages", {})
+    for page_id, page in pages.items():
+        if page_id == "-1":
+            return None
+        imageinfo = page.get("imageinfo", [{}])[0]
+        meta = imageinfo.get("extmetadata", {})
+
+        license_short = meta.get("LicenseShortName", {}).get("value", "").strip()
+        artist_html = meta.get("Artist", {}).get("value", "")
+
+        # Clean up artist: strip HTML tags
+        artist = re.sub(r"<[^>]+>", "", artist_html).strip()
+        # Collapse whitespace
+        artist = re.sub(r"\s+", " ", artist)
+        if len(artist) > 120:
+            artist = artist[:117] + "..."
+
+        # Use the API-provided thumbnail URL (iiurlwidth=800)
+        thumb_url = imageinfo.get("thumburl", "")
+        # Also get the description URL
+        desc_url = imageinfo.get("descriptionurl", "")
+
+        return {
+            "license": license_short,
+            "artist": artist,
+            "thumb_url": thumb_url,
+            "desc_url": desc_url,
+            "filename": filename,
+        }
+    return None
+
+
+def process_species(species_id, slug, name_sci, qid):
+    """Process a single species: fetch image from Wikidata/Commons, upload to S3, insert to DB."""
+    stats["total"] += 1
+
+    # Step 1: Get image filename from Wikidata
+    try:
+        filename = get_wikidata_image(qid)
+    except Exception as e:
+        print(f"  ERROR querying Wikidata for {qid}: {e}")
+        stats["errors"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    if not filename:
+        print(f"  No P18 image for {qid}")
+        stats["no_p18"] += 1
+        return False
+
+    # Step 2: Get Commons info (license, artist, thumb URL)
+    try:
+        info = get_commons_info(filename)
+    except Exception as e:
+        print(f"  ERROR querying Commons for {filename}: {e}")
+        stats["errors"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    if not info:
+        print(f"  No Commons info for {filename}")
+        stats["errors"] += 1
+        return False
+
+    # Step 3: Check license
+    if not is_license_allowed(info["license"]):
+        print(f"  Bad license: {info['license']} for {filename}")
+        stats["bad_license"] += 1
+        return False
+
+    # Step 4: Download thumbnail using API-provided URL
+    thumb_url = info["thumb_url"]
+    if not thumb_url:
+        print(f"  No thumbnail URL available for {filename}")
+        stats["download_fail"] += 1
+        return False
+
+    # Determine file extension from thumbnail URL
+    ext = "jpg"
+    if ".png" in thumb_url.lower().split("?")[0].split("/")[-1]:
+        ext = "png"
+    elif ".gif" in thumb_url.lower().split("?")[0].split("/")[-1]:
+        ext = "gif"
+
+    tmp_path = f"/tmp/herbapi_img_{slug}.{ext}"
+    try:
+        img_data = fetch_url(thumb_url)
+        with open(tmp_path, "wb") as f:
+            f.write(img_data)
+    except Exception as e:
+        print(f"  ERROR downloading {thumb_url}: {e}")
+        stats["download_fail"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    # Step 5: Upload to S3
+    s3_key = f"species/{slug}.{ext}"
+    try:
+        result = subprocess.run(
+            ["aws", "s3", "cp", tmp_path, f"s3://{S3_BUCKET}/{s3_key}", "--endpoint-url", S3_ENDPOINT],
+            capture_output=True, text=True, env=AWS_ENV, timeout=60,
+        )
+        if result.returncode != 0:
+            print(f"  S3 upload failed: {result.stderr}")
+            stats["upload_fail"] += 1
+            return False
+    except Exception as e:
+        print(f"  ERROR uploading to S3: {e}")
+        stats["upload_fail"] += 1
+        return False
+    finally:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+
+    # Step 6: Insert into DB
+    caption = f"Photo: {info['artist']}" if info["artist"] else ""
+    caption_sql = caption.replace("'", "''")
+    source_url = info["desc_url"] or f"https://commons.wikimedia.org/wiki/File:{urllib.parse.quote(filename)}"
+    source_url_sql = source_url.replace("'", "''")
+    license_sql = info["license"].replace("'", "''")
+
+    sql = (
+        f"INSERT INTO images (entity_type, entity_id, s3_key, caption, source_url, license, is_primary) "
+        f"VALUES ('species', '{species_id}', '{s3_key}', '{caption_sql}', '{source_url_sql}', '{license_sql}', true);"
+    )
+    try:
+        psql(sql)
+    except Exception as e:
+        print(f"  ERROR inserting to DB: {e}")
+        stats["errors"] += 1
+        return False
+
+    stats["imported"] += 1
+    return True
+
+
+def main():
+    # Get species without images
+    rows = psql(
+        "SELECT s.id, s.slug, s.name_scientific, s.wikidata_qid "
+        "FROM species s "
+        "LEFT JOIN images i ON i.entity_type = 'species' AND i.entity_id = s.id "
+        "WHERE s.wikidata_qid IS NOT NULL AND s.wikidata_qid != '' AND i.id IS NULL "
+        "ORDER BY s.name_scientific;"
+    )
+    if not rows:
+        print("No species need images.")
+        return
+
+    species_list = []
+    for line in rows.split("\n"):
+        parts = line.strip().split("|")
+        if len(parts) == 4:
+            species_list.append(parts)
+
+    print(f"Processing {len(species_list)} species...\n")
+
+    for i, (sid, slug, name_sci, qid) in enumerate(species_list, 1):
+        print(f"[{i}/{len(species_list)}] {name_sci} ({qid})")
+        ok = process_species(sid, slug, name_sci, qid)
+        if ok:
+            print(f"  OK - imported")
+
+    print(f"\n{'='*50}")
+    print(f"RESULTS:")
+    print(f"  Total species processed: {stats['total']}")
+    print(f"  Successfully imported:   {stats['imported']}")
+    print(f"  No P18 image:            {stats['no_p18']}")
+    print(f"  Bad license (NC/ND/GFDL):{stats['bad_license']}")
+    print(f"  Download failures:       {stats['download_fail']}")
+    print(f"  Upload failures:         {stats['upload_fail']}")
+    print(f"  Other errors:            {stats['errors']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Seed HerbAPI with common permaculture plant families and species via GBIF + API."""
+import json, urllib.request, urllib.parse, time, sys
+
+API = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+GBIF = "https://api.gbif.org/v1"
+
+def api_post(path, data):
+    req = urllib.request.Request(f"{API}{path}", 
+        data=json.dumps(data).encode(),
+        headers={"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"})
+    try:
+        resp = urllib.request.urlopen(req)
+        return json.loads(resp.read())
+    except urllib.error.HTTPError as e:
+        print(f"  ERR {e.code}: {e.read().decode()[:120]}", file=sys.stderr)
+        return None
+
+def gbif_de_name(name):
+    """Get German common name from GBIF."""
+    url = f"{GBIF}/species/match?name={urllib.parse.quote(name)}"
+    try:
+        match = json.loads(urllib.request.urlopen(url).read())
+        if not match.get("usageKey"): return None
+        url2 = f"{GBIF}/species/{match['usageKey']}/vernacularNames?limit=100"
+        data = json.loads(urllib.request.urlopen(url2).read())
+        for r in data.get("results", []):
+            if r.get("language") == "deu":
+                return r["vernacularName"]
+    except: pass
+    return None
+
+FAMILIES = [
+    ("Fabaceae", "Hülsenfrüchtler", "Legumes"),
+    ("Rosaceae", "Rosengewächse", "Rose family"),
+    ("Brassicaceae", "Kreuzblütler", "Cabbage family"),
+    ("Apiaceae", "Doldenblütler", "Carrot family"),
+    ("Lamiaceae", "Lippenblütler", "Mint family"),
+    ("Asteraceae", "Korbblütler", "Daisy family"),
+    ("Solanaceae", "Nachtschattengewächse", "Nightshade family"),
+    ("Cucurbitaceae", "Kürbisgewächse", "Gourd family"),
+    ("Poaceae", "Süßgräser", "Grass family"),
+    ("Amaryllidaceae", "Amaryllisgewächse", "Amaryllis family"),
+    ("Boraginaceae", "Raublattgewächse", "Borage family"),
+    ("Adoxaceae", "Moschuskrautgewächse", "Moschatel family"),
+    ("Betulaceae", "Birkengewächse", "Birch family"),
+    ("Fagaceae", "Buchengewächse", "Beech family"),
+    ("Juglandaceae", "Walnussgewächse", "Walnut family"),
+    ("Caprifoliaceae", "Geißblattgewächse", "Honeysuckle family"),
+    ("Grossulariaceae", "Stachelbeergewächse", "Gooseberry family"),
+    ("Ericaceae", "Heidekrautgewächse", "Heath family"),
+    ("Moraceae", "Maulbeergewächse", "Mulberry family"),
+    ("Urticaceae", "Brennnesselgewächse", "Nettle family"),
+    ("Malvaceae", "Malvengewächse", "Mallow family"),
+    ("Polygonaceae", "Knöterichgewächse", "Buckwheat family"),
+    ("Chenopodiaceae", "Gänsefußgewächse", "Goosefoot family"),
+    ("Asparagaceae", "Spargelgewächse", "Asparagus family"),
+    ("Plantaginaceae", "Wegerichgewächse", "Plantain family"),
+]
+
+SPECIES = [
+    ("Sambucus nigra", "Adoxaceae", {"plant_layer": "understory", "nitrogen_fixer": False, "food_uses": "Flowers (cordial, fritters), berries (cooked — syrup, wine)", "medicinal_uses": "Cold/flu remedy, immune support, diaphoretic", "succession_stage": "secondary"}),
+    ("Symphytum officinale", "Boraginaceae", {"plant_layer": "herbaceous", "dynamic_accumulator": True, "food_uses": "Young leaves (limited, contains pyrrolizidine alkaloids)", "medicinal_uses": "Wound healing, bone knitting (external only)", "other_uses": "Dynamic accumulator, mulch/compost activator, animal fodder"}),
+    ("Trifolium pratense", "Fabaceae", {"plant_layer": "ground_cover", "nitrogen_fixer": True, "food_uses": "Flowers, young leaves", "medicinal_uses": "Respiratory, menopausal symptoms", "other_uses": "Green manure, nitrogen fixer, bee forage"}),
+    ("Corylus avellana", "Betulaceae", {"plant_layer": "shrub", "food_uses": "Nuts", "other_uses": "Coppice wood, hedging, wildlife habitat", "succession_stage": "secondary"}),
+    ("Ribes nigrum", "Grossulariaceae", {"plant_layer": "shrub", "food_uses": "Berries, leaves (tea)", "medicinal_uses": "High vitamin C, anti-inflammatory"}),
+    ("Rubus idaeus", "Rosaceae", {"plant_layer": "shrub", "food_uses": "Berries, leaves (tea)", "medicinal_uses": "Leaf tea for pregnancy/digestion", "succession_stage": "pioneer"}),
+    ("Urtica dioica", "Urticaceae", {"plant_layer": "herbaceous", "dynamic_accumulator": True, "food_uses": "Young leaves, seeds", "medicinal_uses": "Anti-inflammatory, prostate, allergies", "other_uses": "Compost activator, fibre, liquid fertiliser"}),
+    ("Borago officinalis", "Boraginaceae", {"plant_layer": "herbaceous", "food_uses": "Flowers, young leaves", "other_uses": "Bee forage, companion plant", "attracts_pollinators": True}),
+    ("Lavandula angustifolia", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Flowers", "medicinal_uses": "Calming, antiseptic, sleep aid", "other_uses": "Bee forage, pest repellent, fragrance", "attracts_pollinators": True}),
+    ("Malus domestica", "Rosaceae", {"plant_layer": "canopy", "food_uses": "Fruit", "pollination_type": "Insect-pollinated"}),
+    ("Prunus domestica", "Rosaceae", {"plant_layer": "canopy", "food_uses": "Fruit", "pollination_type": "Insect-pollinated"}),
+    ("Juglans regia", "Juglandaceae", {"plant_layer": "canopy", "food_uses": "Nuts", "other_uses": "Timber, dye", "allelopathic": True}),
+    ("Fragaria vesca", "Rosaceae", {"plant_layer": "ground_cover", "food_uses": "Berries, leaves (tea)", "ground_cover_quality": "Good"}),
+    ("Allium ursinum", "Amaryllidaceae", {"plant_layer": "ground_cover", "food_uses": "Leaves, flowers, bulbs", "medicinal_uses": "Antimicrobial, blood pressure"}),
+    ("Phacelia tanacetifolia", "Boraginaceae", {"plant_layer": "herbaceous", "other_uses": "Green manure, bee forage, cover crop", "attracts_pollinators": True}),
+    ("Lupinus polyphyllus", "Fabaceae", {"plant_layer": "herbaceous", "nitrogen_fixer": True, "other_uses": "Nitrogen fixer, green manure, ornamental"}),
+    ("Vicia faba", "Fabaceae", {"plant_layer": "herbaceous", "nitrogen_fixer": True, "food_uses": "Beans", "other_uses": "Nitrogen fixer, green manure"}),
+    ("Solanum lycopersicum", "Solanaceae", {"plant_layer": "herbaceous", "food_uses": "Fruit"}),
+    ("Cucurbita pepo", "Cucurbitaceae", {"plant_layer": "ground_cover", "food_uses": "Fruit, seeds, flowers"}),
+    ("Beta vulgaris", "Chenopodiaceae", {"plant_layer": "herbaceous", "food_uses": "Roots, leaves"}),
+    ("Daucus carota", "Apiaceae", {"plant_layer": "herbaceous", "food_uses": "Root"}),
+    ("Calendula officinalis", "Asteraceae", {"plant_layer": "herbaceous", "food_uses": "Flowers", "medicinal_uses": "Wound healing, anti-inflammatory, skin care", "other_uses": "Companion plant, pest deterrent", "attracts_pollinators": True}),
+    ("Melissa officinalis", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Leaves", "medicinal_uses": "Calming, antiviral, digestive", "attracts_pollinators": True}),
+    ("Salvia officinalis", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Leaves", "medicinal_uses": "Sore throat, digestive, antimicrobial"}),
+    ("Thymus vulgaris", "Lamiaceae", {"plant_layer": "ground_cover", "food_uses": "Leaves", "medicinal_uses": "Respiratory, antimicrobial, cough"}),
+]
+
+# Create families
+print("=== Creating families ===")
+family_map = {}
+for sci, de, en in FAMILIES:
+    r = api_post("/families", {"name_scientific": sci, "name_de": de, "name_en": en})
+    if r:
+        family_map[sci] = r["id"]
+        print(f"  ✓ {sci}")
+    time.sleep(0.05)
+print(f"Created {len(family_map)} families\n")
+
+# Create species
+print("=== Creating species (with GBIF German names) ===")
+created = 0
+for sci_name, family_sci, extra in SPECIES:
+    fam_id = family_map.get(family_sci)
+    if not fam_id:
+        print(f"  ✗ {sci_name} — family {family_sci} missing")
+        continue
+    de_name = gbif_de_name(sci_name)
+    data = {"name_scientific": sci_name, "name_de": de_name or "", "name_en": "", "family_id": fam_id, **extra}
+    r = api_post("/species", data)
+    if r:
+        created += 1
+        print(f"  ✓ {sci_name} → {de_name or '(no DE name)'}")
+    time.sleep(0.15)
+print(f"Created {created} species\n")
+
+# Create suppliers  
+print("=== Creating suppliers ===")
+for name, url, country, organic, demeter, notes in [
+    ("Reinsaat", "https://www.reinsaat.at", "AT", True, True, "Austrian biodynamic seed producer, open-pollinated varieties"),
+    ("Magic Garden Seeds", "https://www.magicgardenseeds.com", "DE", False, False, "Specialist seed shop with rare and heritage varieties"),
+]:
+    r = api_post("/suppliers", {"name": name, "url": url, "country": country, "is_organic": organic, "is_demeter": demeter, "notes": notes})
+    if r: print(f"  ✓ {name}")
+print("\nDone!")
@@ -0,0 +1,514 @@
+#!/usr/bin/env python3
+"""
+Scrape Arche Noah seed catalog and import cultivars into HerbAPI.
+
+Uses the shop.arche-noah.at Angular SPA's backend API (ACM) to fetch
+product listings and details, then creates cultivars in HerbAPI matched
+to existing species.
+"""
+
+import json
+import re
+import time
+import urllib.request
+import urllib.error
+import urllib.parse
+import sys
+from datetime import datetime, timezone
+
+# --- Configuration -----------------------------------------------------------
+
+HERBAPI_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+HERBAPI_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+
+SHOP_BASE = "https://shop.arche-noah.at/ACM/api/"
+SHOP_UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+
+REQUEST_DELAY = 0.5  # seconds between requests
+
+# Only import products from these Arche Noah article lines (their own seeds)
+ARCHE_NOAH_LINES = {
+    "Bio-Saatgut von ARCHE NOAH",
+    "Kostbarkeiten aus dem ARCHE NOAH Samenarchiv",
+}
+
+# Search terms to discover all seed products across the shop
+SEARCH_TERMS = [
+    "Tomate", "Paradeiser", "Paprika", "Chili", "Gurke", "Kürbis", "Zucchini",
+    "Bohne", "Erbse", "Fisole", "Salat", "Kohl", "Kraut", "Melanzani", "Aubergine",
+    "Mais", "Zwiebel", "Lauch", "Karotte", "Rübe", "Basilikum", "Kräuter",
+    "Blume", "Sonnenblume", "Dill", "Petersilie", "Spinat", "Mangold",
+    "Melone", "Fenchel", "Sellerie", "Rettich", "Radieschen",
+    "Koriander", "Oregano", "Thymian", "Salbei", "Rosmarin", "Minze",
+    "Ringelblume", "Kornblume", "Kapuzinerkresse", "Senf",
+    "Erdbeere", "Lupine", "Luzerne", "Klee", "Bohne", "Mohn",
+    "Radicchio", "Rucola", "Endivie", "Artischocke", "Pastinake",
+    "Schnittlauch", "Knoblauch", "Bärlauch", "Wermut",
+    "Baldrian", "Johanniskraut", "Sonnenhut", "Beinwell",
+    "Studentenblume", "Tagetes", "Phacelia", "Buchweizen",
+    "Rote Bete", "Rote Rübe", "Mangold", "Melde",
+    "Kohlrabi", "Brokkoli", "Blumenkohl", "Rosenkohl", "Wirsing",
+    "Pflücksalat", "Kopfsalat", "Feldsalat", "Asiasalat",
+    "Zuckermais", "Popcorn",
+]
+
+# --- Helpers -----------------------------------------------------------------
+
+def herbapi_request(method, path, data=None):
+    """Make a request to HerbAPI."""
+    url = f"{HERBAPI_BASE}/{path}"
+    body = json.dumps(data).encode() if data else None
+    req = urllib.request.Request(url, data=body, method=method, headers={
+        "Authorization": f"Bearer {HERBAPI_TOKEN}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    })
+    try:
+        resp = urllib.request.urlopen(req, timeout=30)
+        raw = resp.read().decode("utf-8")
+        return json.loads(raw) if raw.strip() else None
+    except urllib.error.HTTPError as e:
+        body = e.read().decode("utf-8", errors="replace")
+        print(f"  HerbAPI {method} {path}: HTTP {e.code} - {body[:200]}", file=sys.stderr)
+        raise
+
+
+def shop_create_session():
+    """Create an anonymous session on the Arche Noah shop."""
+    req = urllib.request.Request(
+        SHOP_BASE + "webshop/createanonymoususer",
+        data=json.dumps({}).encode(),
+        headers={
+            "User-Agent": SHOP_UA,
+            "Content-Type": "application/json",
+            "Origin": "https://shop.arche-noah.at",
+            "Referer": "https://shop.arche-noah.at/",
+        },
+    )
+    resp = urllib.request.urlopen(req, timeout=15)
+    cookie = resp.headers.get("Set-Cookie", "")
+    session = cookie.split("JSESSIONID=")[1].split(";")[0] if "JSESSIONID=" in cookie else ""
+    if not session:
+        raise RuntimeError("Failed to get shop session")
+    return session
+
+
+def shop_request(session, endpoint, payload):
+    """Make a POST request to the shop API."""
+    req = urllib.request.Request(
+        SHOP_BASE + endpoint,
+        data=json.dumps(payload).encode(),
+        headers={
+            "User-Agent": SHOP_UA,
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Cookie": f"JSESSIONID={session}",
+            "Origin": "https://shop.arche-noah.at",
+            "Referer": "https://shop.arche-noah.at/",
+        },
+    )
+    resp = urllib.request.urlopen(req, timeout=30)
+    raw = resp.read().decode("utf-8")
+    return json.loads(raw) if raw.strip() else None
+
+
+def extract_latin_name(detail_headline3):
+    """Extract the Latin/botanical name from the product detail headline3 field."""
+    if not detail_headline3:
+        return None
+    # Remove HTML tags
+    text = re.sub(r"<[^>]+>", "", detail_headline3).strip()
+    # Remove "Hier geht es zu unseren..." trailing text
+    text = text.split("Hier geht")[0].strip()
+    # Should be something like "Solanum lycopersicum" or "Capsicum annuum"
+    if text and re.match(r"^[A-Z][a-z]+ [a-z]", text):
+        return text
+    return None
+
+
+def match_species(latin_name, species_by_scientific):
+    """
+    Match a Latin name to a species, handling subspecies/variety suffixes.
+    E.g., "Phaseolus vulgaris var. nanus" should match "Phaseolus vulgaris".
+    Also handles "subsp.", "convar.", "f." qualifiers.
+    """
+    if not latin_name:
+        return None
+
+    normalized = latin_name.strip().lower()
+
+    # Direct match
+    species = species_by_scientific.get(normalized)
+    if species:
+        return species
+
+    # Strip subspecies/variety/convar/forma qualifiers and try genus + species only
+    # Pattern: "Genus species [var.|subsp.|convar.|f.|ssp.] ..."
+    m = re.match(r"^([A-Za-z]+ [a-z]+)", normalized)
+    if m:
+        base = m.group(1).strip()
+        species = species_by_scientific.get(base)
+        if species:
+            return species
+
+    return None
+
+
+def extract_cultivar_name(product_name):
+    """
+    Extract the cultivar/variety name from the product name.
+    Format examples:
+      "Salatparadeiser 'Naama' HG026" -> "Naama"
+      "Cocktailparadeiser 'Golden Perfection' TO019" -> "Golden Perfection"
+      "Buschbohne 'Marmorierter Mond' HG055" -> "Marmorierter Mond"
+    """
+    # Try to extract name in quotes (various quote styles)
+    m = re.search(r"['\u2018\u2019`\u00b4]+([^'\u2018\u2019`\u00b4]+)['\u2018\u2019`\u00b4]+", product_name)
+    if m:
+        return m.group(1).strip()
+    # Fallback: remove the article number suffix and type prefix
+    # Remove trailing article number like HG026, TO019, etc.
+    name = re.sub(r"\s+[A-Z]{1,3}\d{2,4}\s*$", "", product_name).strip()
+    # Remove common prefixes like "Salatparadeiser", "Buschbohne", etc.
+    # Just return the full cleaned name
+    return name
+
+
+def parse_pack_info(unit_desc):
+    """
+    Parse pack size info from unitDesc like '20-30 Korn' or '2g'.
+    Returns (pack_size, pack_unit) or (None, None).
+    """
+    if not unit_desc:
+        return None, None
+    # "20-30 Korn" -> take the lower bound
+    m = re.match(r"(\d+)(?:-\d+)?\s*(\w+)", unit_desc)
+    if m:
+        return float(m.group(1)), m.group(2)
+    return None, None
+
+
+# --- Main scraping logic -----------------------------------------------------
+
+def fetch_all_arche_noah_products(session):
+    """Search the shop API to find all Arche Noah seed products."""
+    all_products = {}
+    seen_terms = set()
+
+    for term in SEARCH_TERMS:
+        if term.lower() in seen_terms:
+            continue
+        seen_terms.add(term.lower())
+
+        offset = 0
+        while True:
+            payload = {
+                "searchCriteria": term,
+                "startIndex": offset,
+                "numDataSets": 200,
+                "allowAllProducts": False,
+            }
+            try:
+                data = shop_request(session, "webshop/getproducts", payload)
+            except Exception as e:
+                print(f"  Search '{term}' offset={offset} failed: {e}", file=sys.stderr)
+                break
+
+            if not data:
+                break
+
+            new_count = 0
+            for p in data:
+                if p["sid"] not in all_products:
+                    all_products[p["sid"]] = p
+                    new_count += 1
+
+            if len(data) < 200:
+                break
+            offset += len(data)
+            time.sleep(REQUEST_DELAY)
+
+        time.sleep(REQUEST_DELAY)
+
+    # Filter to Arche Noah's own seed products only
+    an_products = {
+        sid: p for sid, p in all_products.items()
+        if (p.get("articleLineDesc") or "") in ARCHE_NOAH_LINES
+    }
+
+    print(f"Found {len(all_products)} total products, {len(an_products)} Arche Noah seed products")
+    return an_products
+
+
+def fetch_product_details(session, products):
+    """Fetch detailed info (Latin names) for each product."""
+    details = {}
+    total = len(products)
+    for i, (sid, product) in enumerate(products.items()):
+        try:
+            detail = shop_request(session, "webshop/getproductdetail", {"productSid": sid})
+            if detail:
+                details[sid] = detail
+        except Exception as e:
+            print(f"  Detail for {sid} failed: {e}", file=sys.stderr)
+
+        if (i + 1) % 20 == 0:
+            print(f"  Fetched details: {i + 1}/{total}")
+        time.sleep(REQUEST_DELAY)
+
+    print(f"Fetched {len(details)} product details")
+    return details
+
+
+def load_herbapi_species():
+    """Load all species from HerbAPI and build lookup maps (handles pagination)."""
+    page = 1
+    species_list = []
+    while True:
+        result = herbapi_request("GET", f"species?per_page=100&page={page}")
+        if isinstance(result, dict) and "data" in result:
+            data = result["data"]
+            total = result.get("total", 0)
+        elif isinstance(result, list):
+            data = result
+            total = len(data)
+        else:
+            break
+        species_list.extend(data)
+        if len(species_list) >= total or not data:
+            break
+        page += 1
+
+    # Build lookup by scientific name (normalized lowercase)
+    by_scientific = {}
+    for s in species_list:
+        key = s["name_scientific"].strip().lower()
+        by_scientific[key] = s
+    return species_list, by_scientific
+
+
+def load_herbapi_cultivars():
+    """Load all existing cultivars from HerbAPI (handles pagination, max 100/page)."""
+    page = 1
+    all_cultivars = []
+    while True:
+        result = herbapi_request("GET", f"cultivars?per_page=100&page={page}")
+        if isinstance(result, dict) and "data" in result:
+            data = result["data"]
+            total = result.get("total", 0)
+        elif isinstance(result, list):
+            data = result
+            total = len(data)
+        else:
+            break
+
+        all_cultivars.extend(data)
+        if len(all_cultivars) >= total or not data:
+            break
+        page += 1
+
+    # Build lookup by (species_id, normalized cultivar name)
+    by_key = {}
+    for c in all_cultivars:
+        key = (c["species_id"], c["name"].strip().lower())
+        by_key[key] = c
+
+    return all_cultivars, by_key
+
+
+def ensure_supplier():
+    """Create the Arche Noah supplier if it doesn't exist, return its ID."""
+    suppliers = herbapi_request("GET", "suppliers")
+    if isinstance(suppliers, dict) and "data" in suppliers:
+        suppliers = suppliers["data"]
+
+    for s in suppliers:
+        if "arche" in s["name"].lower() and "noah" in s["name"].lower():
+            print(f"Supplier 'Arche Noah' already exists: {s['id']}")
+            return s["id"]
+
+    print("Creating supplier 'Arche Noah'...")
+    result = herbapi_request("POST", "suppliers", {
+        "name": "Arche Noah",
+        "url": "https://www.arche-noah.at",
+        "country": "AT",
+        "is_organic": True,
+        "is_demeter": False,
+        "notes": "Austrian society for heritage seed preservation and biodiversity",
+    })
+    print(f"Created supplier: {result['id']}")
+    return result["id"]
+
+
+def load_existing_supplier_links(cultivar_id):
+    """Load existing supplier links for a cultivar."""
+    try:
+        result = herbapi_request("GET", f"cultivars/{cultivar_id}/suppliers")
+        if isinstance(result, list):
+            return result
+        if isinstance(result, dict) and "data" in result:
+            return result["data"]
+        return []
+    except Exception:
+        return []
+
+
+def main():
+    now_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    print(f"=== Arche Noah Seed Catalog Scraper ===")
+    print(f"Started at {now_str}\n")
+
+    # Step 1: Create Arche Noah supplier in HerbAPI
+    print("[1/6] Ensuring Arche Noah supplier exists...")
+    supplier_id = ensure_supplier()
+    print()
+
+    # Step 2: Load HerbAPI species for matching
+    print("[2/6] Loading HerbAPI species...")
+    species_list, species_by_scientific = load_herbapi_species()
+    print(f"Loaded {len(species_list)} species")
+    print()
+
+    # Step 3: Load existing cultivars for idempotency
+    print("[3/6] Loading existing cultivars...")
+    existing_cultivars, cultivars_by_key = load_herbapi_cultivars()
+    print(f"Loaded {len(existing_cultivars)} existing cultivars")
+    print()
+
+    # Step 4: Scrape Arche Noah shop
+    print("[4/6] Scraping Arche Noah shop catalog...")
+    session = shop_create_session()
+    print(f"Got shop session")
+    products = fetch_all_arche_noah_products(session)
+    print()
+
+    # Step 5: Fetch product details (to get Latin names)
+    print("[5/6] Fetching product details for Latin name matching...")
+    details = fetch_product_details(session, products)
+    print()
+
+    # Step 6: Create cultivars in HerbAPI
+    print("[6/6] Creating cultivars in HerbAPI...")
+    stats = {
+        "created": 0,
+        "skipped_existing": 0,
+        "skipped_no_species": 0,
+        "supplier_linked": 0,
+        "supplier_link_existed": 0,
+        "errors": 0,
+    }
+
+    for sid, product in sorted(products.items()):
+        detail = details.get(sid, {})
+
+        # Extract Latin name from detail
+        latin_name = extract_latin_name(detail.get("detailHeadline3", ""))
+        if not latin_name:
+            # Fallback: try from category mapping
+            latin_name = None
+
+        # Match to HerbAPI species (handles subspecies/variety suffixes)
+        species = match_species(latin_name, species_by_scientific)
+
+        if not species:
+            print(f"  SKIP (no species match): {product['name']} | latin={latin_name}")
+            stats["skipped_no_species"] += 1
+            continue
+
+        # Extract cultivar name
+        cultivar_name = extract_cultivar_name(product["name"])
+        if not cultivar_name:
+            print(f"  SKIP (no cultivar name): {product['name']}")
+            stats["skipped_no_species"] += 1
+            continue
+
+        # Check if cultivar already exists (idempotency)
+        lookup_key = (species["id"], cultivar_name.strip().lower())
+        existing = cultivars_by_key.get(lookup_key)
+
+        if existing:
+            cultivar_id = existing["id"]
+            stats["skipped_existing"] += 1
+        else:
+            # Determine if this is organic
+            is_organic = product.get("articleLineDesc") == "Bio-Saatgut von ARCHE NOAH"
+
+            # Build product URL
+            alias = product.get("alias") or detail.get("alias", "")
+            product_url = f"https://shop.arche-noah.at/produkt/{alias}" if alias else None
+
+            # Create cultivar
+            cultivar_data = {
+                "species_id": species["id"],
+                "name": cultivar_name,
+                "name_de": cultivar_name,
+                "is_organic": is_organic,
+                "source_urls": [product_url] if product_url else None,
+            }
+
+            try:
+                result = herbapi_request("POST", "cultivars", cultivar_data)
+                cultivar_id = result["id"]
+                stats["created"] += 1
+                # Add to lookup for idempotency within this run
+                cultivars_by_key[lookup_key] = result
+                print(f"  CREATED: {cultivar_name} ({species['name_scientific']})")
+            except Exception as e:
+                print(f"  ERROR creating '{cultivar_name}': {e}", file=sys.stderr)
+                stats["errors"] += 1
+                continue
+
+        # Link cultivar to supplier
+        existing_links = load_existing_supplier_links(cultivar_id)
+        already_linked = any(
+            link["supplier_id"] == supplier_id for link in existing_links
+        )
+
+        if already_linked:
+            stats["supplier_link_existed"] += 1
+        else:
+            # Parse pack info
+            unit_desc = product.get("unitDesc") or detail.get("unitDesc", "")
+            pack_size, pack_unit = parse_pack_info(unit_desc)
+
+            # Get price
+            price = None
+            price_list = product.get("priceListPos") or detail.get("priceListPos", [])
+            if price_list:
+                price = price_list[0].get("singleUnitPrice")
+
+            # Build product URL
+            alias = product.get("alias") or detail.get("alias", "")
+            product_url = f"https://shop.arche-noah.at/produkt/{alias}" if alias else None
+
+            link_data = {
+                "supplier_id": supplier_id,
+                "article_number": str(product.get("articleNr", "")),
+                "product_url": product_url,
+                "price_eur": price,
+                "pack_size": pack_size,
+                "pack_unit": pack_unit,
+            }
+
+            try:
+                herbapi_request("POST", f"cultivars/{cultivar_id}/suppliers", link_data)
+                stats["supplier_linked"] += 1
+            except Exception as e:
+                print(f"  ERROR linking supplier for '{cultivar_name}': {e}", file=sys.stderr)
+                stats["errors"] += 1
+
+        time.sleep(0.1)  # small delay between HerbAPI calls
+
+    # Summary
+    print(f"\n{'='*60}")
+    print(f"Scraping complete!")
+    print(f"  Cultivars created:          {stats['created']}")
+    print(f"  Cultivars already existed:   {stats['skipped_existing']}")
+    print(f"  Skipped (no species match):  {stats['skipped_no_species']}")
+    print(f"  Supplier links created:      {stats['supplier_linked']}")
+    print(f"  Supplier links existed:      {stats['supplier_link_existed']}")
+    print(f"  Errors:                      {stats['errors']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,843 @@
+#!/usr/bin/env python3
+"""
+Scraper for Bingenheimer Saatgut (https://www.bingenheimersaatgut.de/)
+Extracts cultivar data and imports into HerbAPI.
+
+Categories scraped: Gemüse (vegetables), Kräuter (herbs), Gründüngung (green manure).
+"""
+
+import json
+import re
+import sys
+import time
+import urllib.request
+import urllib.error
+import urllib.parse
+from html.parser import HTMLParser
+from typing import Optional
+
+# ── Configuration ─────────────────────────────────────────────────────────
+API_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+API_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+SITE_BASE = "https://www.bingenheimersaatgut.de"
+DELAY = 0.5
+USER_AGENT = "HerbAPI-Scraper/1.0 (+https://sub-net.at)"
+
+# ── Category URLs to scrape ───────────────────────────────────────────────
+# (url_path, default_species_scientific_name)
+
+VEGETABLE_CATEGORIES = [
+    ("gemuese/tomaten", "Solanum lycopersicum"),
+    ("gemuese/gurken/gewuerzgurke", "Cucumis sativus"),
+    ("gemuese/gurken/salatgurken", "Cucumis sativus"),
+    ("gemuese/aubergine", "Solanum melongena"),
+    ("gemuese/bohnen/buschbohne", "Phaseolus vulgaris"),
+    ("gemuese/bohnen/stangenbohne", "Phaseolus vulgaris"),
+    ("gemuese/bohnen/dicke-bohne", "Vicia faba"),
+    ("gemuese/bohnen/feuerbohne", "Phaseolus coccineus"),
+    ("gemuese/bohnen/edamame-sojabohne", "Glycine max"),
+    ("gemuese/bohnen/spaghettibohne", "Vigna unguiculata"),
+    ("gemuese/erbsen/markerbse", "Pisum sativum"),
+    ("gemuese/erbsen/schalerbse", "Pisum sativum"),
+    ("gemuese/erbsen/zuckererbse", "Pisum sativum"),
+    ("gemuese/feldsalat", "Valerianella locusta"),
+    ("gemuese/knollenfenchel", "Foeniculum vulgare"),
+    ("gemuese/kohl/blumenkohl", "Brassica oleracea"),
+    ("gemuese/kohl/brokkoli", "Brassica oleracea"),
+    ("gemuese/kohl/chinakohlpak-choi", "Brassica rapa"),
+    ("gemuese/kohl/gruenkohl", "Brassica oleracea"),
+    ("gemuese/kohl/kohlrabi", "Brassica oleracea"),
+    ("gemuese/kohl/rotkohl", "Brassica oleracea"),
+    ("gemuese/kohl/weisskohl", "Brassica oleracea"),
+    ("gemuese/kohl/wirsing", "Brassica oleracea"),
+    ("gemuese/kohl/rosenkohl", "Brassica oleracea"),
+    ("gemuese/kresse", "Lepidium sativum"),
+    ("gemuese/kuerbis", "Cucurbita maxima"),
+    ("gemuese/zuckermais", "Zea mays"),
+    ("gemuese/mangold", "Beta vulgaris"),
+    ("gemuese/melone", "Cucumis melo"),
+    ("gemuese/moehren", "Daucus carota"),
+    ("gemuese/paprika/gemuesepaprika", "Capsicum annuum"),
+    ("gemuese/paprika/chili", "Capsicum annuum"),
+    ("gemuese/pastinaken", "Pastinaca sativa"),
+    ("gemuese/petersilienwurzel", "Petroselinum crispum"),
+    ("gemuese/physalis", "Physalis peruviana"),
+    ("gemuese/porreelauch", "Allium porrum"),
+    ("gemuese/radies", "Raphanus sativus"),
+    ("gemuese/rettich", "Raphanus sativus"),
+    ("gemuese/rote-bete", "Beta vulgaris"),
+    ("gemuese/rueben/mai-herbstruebennavets", "Brassica rapa"),
+    ("gemuese/rueben/kohlruebe", "Brassica napus"),
+    ("gemuese/rucola", "Eruca vesicaria"),
+    ("gemuese/salat/bataviasalat", "Lactuca sativa"),
+    ("gemuese/salat/eichblattsalat", "Lactuca sativa"),
+    ("gemuese/salat/eissalat", "Lactuca sativa"),
+    ("gemuese/salat/endivien", "Cichorium endivia"),
+    ("gemuese/salat/hirschhornwegerich", "Plantago coronopus"),
+    ("gemuese/salat/kopfsalat", "Lactuca sativa"),
+    ("gemuese/salat/lollosalat", "Lactuca sativa"),
+    ("gemuese/salat/romanasalat", "Lactuca sativa"),
+    ("gemuese/salat/baby-leaf", "Lactuca sativa"),
+    ("gemuese/sellerie/knollensellerie", "Apium graveolens"),
+    ("gemuese/sellerie/stangen--bleichsellerie", "Apium graveolens"),
+    ("gemuese/spinatspinat-aehnliche/spinat", "Spinacia oleracea"),
+    ("gemuese/spinatspinat-aehnliche/neuseelaender-spinat", "Tetragonia tetragonioides"),
+    ("gemuese/blattstielgemuese", "Beta vulgaris"),
+    ("gemuese/zwiebeln", "Allium cepa"),
+    ("gemuese/lauchzwiebeln", "Allium fistulosum"),
+    ("gemuese/artischocke", "Cynara cardunculus"),
+    ("gemuese/asia-salate", "Brassica juncea"),
+    ("gemuese/chicoree", "Cichorium intybus"),
+    ("gemuese/schwarz-haferwurzel", "Scorzonera hispanica"),
+    ("gemuese/winterpostelein", "Claytonia perfoliata"),
+    ("gemuese/zucchini", "Cucurbita pepo"),
+    ("gemuese/catalogna", "Cichorium intybus"),
+    ("gemuese/zichoriensalate", "Cichorium intybus"),
+]
+
+HERB_CATEGORIES = [
+    ("kraeuter/basilikum", "Ocimum basilicum"),
+    ("kraeuter/bohnenkraut", "Satureja hortensis"),
+    ("kraeuter/borretsch", "Borago officinalis"),
+    ("kraeuter/dill", "Anethum graveolens"),
+    ("kraeuter/kuemmel", "Carum carvi"),
+    ("kraeuter/kerbel", "Anthriscus cerefolium"),
+    ("kraeuter/koriander", "Coriandrum sativum"),
+    ("kraeuter/gewuerzfenchel", "Foeniculum vulgare"),
+    ("kraeuter/kultursauerampfer", "Rumex acetosa"),
+    ("kraeuter/lavendel", "Lavandula angustifolia"),
+    ("kraeuter/liebstock", "Levisticum officinale"),
+    ("kraeuter/majoran", "Origanum majorana"),
+    ("kraeuter/oregano", "Origanum vulgare"),
+    ("kraeuter/pimpinelle", "Sanguisorba minor"),
+    ("kraeuter/estragon", "Artemisia dracunculus"),
+    ("kraeuter/salbei", "Salvia officinalis"),
+    ("kraeuter/schnittlauch", "Allium schoenoprasum"),
+    ("kraeuter/schnittknoblauch", "Allium tuberosum"),
+    ("kraeuter/schwarzkuemmel", "Nigella sativa"),
+    ("kraeuter/speisechrysantheme", "Glebionis coronaria"),
+    ("kraeuter/thymian", "Thymus vulgaris"),
+    ("kraeuter/ysop", "Hyssopus officinalis"),
+    ("kraeuter/winterkresse", "Barbarea vulgaris"),
+    ("kraeuter/brunnenkresse", "Nasturtium officinale"),
+    ("kraeuter/melisse", "Melissa officinalis"),
+    ("kraeuter/petersilie", "Petroselinum crispum"),
+    ("kraeuter/schnittsellerie", "Apium graveolens"),
+    ("kraeuter/beifuss", "Artemisia vulgaris"),
+]
+
+GREEN_MANURE_CATEGORIES = [
+    ("gruenduengung", None),
+]
+
+ALL_CATEGORIES = VEGETABLE_CATEGORIES + HERB_CATEGORIES + GREEN_MANURE_CATEGORIES
+
+# ── Stats ─────────────────────────────────────────────────────────────────
+stats = {
+    "categories_scraped": 0,
+    "products_found": 0,
+    "detail_pages_fetched": 0,
+    "cultivars_created": 0,
+    "cultivars_existed": 0,
+    "supplier_links_created": 0,
+    "supplier_links_existed": 0,
+    "species_created": 0,
+    "families_created": 0,
+    "species_not_matched": [],
+    "errors": [],
+}
+
+
+# ── HTTP helpers ──────────────────────────────────────────────────────────
+def fetch_page(url: str) -> str:
+    """Fetch a web page with User-Agent header."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.read().decode("utf-8", errors="replace")
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            return ""
+        raise
+
+
+def api_get(path: str, params: dict = None) -> dict:
+    """GET from HerbAPI."""
+    url = f"{API_BASE}{path}"
+    if params:
+        url += "?" + urllib.parse.urlencode(params)
+    req = urllib.request.Request(url, headers={
+        "Authorization": f"Bearer {API_TOKEN}",
+        "Accept": "application/json",
+    })
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path: str, data: dict) -> tuple:
+    """POST to HerbAPI. Returns (response_dict, status_code)."""
+    url = f"{API_BASE}{path}"
+    body = json.dumps(data).encode("utf-8")
+    req = urllib.request.Request(url, data=body, method="POST", headers={
+        "Authorization": f"Bearer {API_TOKEN}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    })
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read()), resp.status
+    except urllib.error.HTTPError as e:
+        err_body = e.read().decode("utf-8", errors="replace")
+        return {"error": err_body, "_status": e.code}, e.code
+
+
+# ── HTML parsing helpers ──────────────────────────────────────────────────
+def parse_product_links(html: str) -> list:
+    """Parse product links from listing page using regex."""
+    links = []
+    # Magento product-item-link pattern
+    pattern = re.compile(
+        r'<a[^>]+href="([^"]*?/de/bio-saatgut/[^"]+?)"[^>]*class="[^"]*product-item-link[^"]*"[^>]*>\s*(.*?)\s*</a>',
+        re.DOTALL | re.IGNORECASE
+    )
+    for match in pattern.finditer(html):
+        url = match.group(1)
+        name = re.sub(r'<[^>]+>', '', match.group(2)).strip()
+        if name:
+            if not url.startswith("http"):
+                url = SITE_BASE + url
+            links.append((url, name))
+
+    if not links:
+        # Broader pattern for product detail links
+        pattern2 = re.compile(
+            r'href="([^"]*?/de/bio-saatgut/(?:gemuese|kraeuter|gruenduengung)/[^"]+?/[^"/.]+)"[^>]*>\s*([^<]{3,})',
+            re.IGNORECASE
+        )
+        seen = set()
+        for match in pattern2.finditer(html):
+            url = match.group(1).strip()
+            name = match.group(2).strip()
+            if name and url not in seen and not url.endswith(".html"):
+                seen.add(url)
+                if not url.startswith("http"):
+                    url = SITE_BASE + url
+                links.append((url, name))
+
+    # Deduplicate by URL
+    seen_urls = set()
+    unique = []
+    for url, name in links:
+        if url not in seen_urls:
+            seen_urls.add(url)
+            unique.append((url, name))
+    return unique
+
+
+def extract_latin_from_detail(html: str) -> Optional[str]:
+    """Extract Latin/botanical name from product detail page."""
+    patterns = [
+        r'<(?:em|i)[^>]*>\s*([A-Z][a-z]+\s+[a-z]{2,}(?:\s+(?:var\.|subsp\.)\s+[a-z]+)?)\s*</(?:em|i)>',
+        r'class="[^"]*(?:botanical|latin|species)[^"]*"[^>]*>\s*([A-Z][a-z]+\s+[a-z]{2,})',
+        r'(?:Botanischer?\s+Name|Lateinischer?\s+Name|Art)\s*:?\s*(?:<[^>]+>)*\s*([A-Z][a-z]+\s+[a-z]{2,})',
+    ]
+    for pat in patterns:
+        m = re.search(pat, html, re.IGNORECASE)
+        if m:
+            name = m.group(1).strip()
+            parts = name.split()
+            if len(parts) >= 2 and parts[0][0].isupper() and parts[1][0].islower():
+                return name
+    return None
+
+
+def extract_description_from_detail(html: str) -> str:
+    """Extract product description from detail page."""
+    desc_patterns = [
+        r'<div[^>]*class="[^"]*product[- ]description[^"]*"[^>]*>(.*?)</div>',
+        r'<div[^>]*class="[^"]*beschreibung[^"]*"[^>]*>(.*?)</div>',
+        r'data-content-type="description"[^>]*>(.*?)</div>',
+    ]
+    for pat in desc_patterns:
+        m = re.search(pat, html, re.DOTALL | re.IGNORECASE)
+        if m:
+            raw = m.group(1)
+            text = re.sub(r'<[^>]+>', ' ', raw)
+            text = re.sub(r'\s+', ' ', text).strip()
+            if len(text) > 20:
+                return text[:2000]
+    return ""
+
+
+def extract_article_number(product_name: str, url: str) -> Optional[str]:
+    """Extract article number from product name or URL."""
+    m = re.search(r'\(([A-Z]\s*\d+[A-Z]?)\)', product_name)
+    if m:
+        return m.group(1).replace(" ", "")
+    slug = url.rstrip("/").split("/")[-1]
+    m = re.search(r'-([a-z]\d+[a-z]?)$', slug, re.IGNORECASE)
+    if m:
+        return m.group(1).upper()
+    return None
+
+
+def extract_variety_name(product_name: str) -> str:
+    """Extract the variety/cultivar name from the full product name."""
+    name = product_name.strip()
+
+    # Remove article number suffix like (G802)
+    name = re.sub(r'\s*\([A-Z]\s*\d+[A-Z]?\)\s*$', '', name)
+
+    # Common German vegetable/herb type prefixes to strip
+    prefixes = [
+        # Tomatoes
+        r'(?:Normal(?:früchtige)?|Fleisch|Cherry|Balkon|Wild|Freiland|Roma|Ochsenherz|'
+        r'Cocktail|Dattel|Mini|Snack|Stab|Busch|Salat|Zwerg)[\s-]*[Tt]omate\s+',
+        # Beans
+        r'(?:Busch|Stangen|Dicke|Feuer|Spaghetti)[\s-]*[Bb]ohne\s+',
+        r'Edamame(?:-Sojabohne)?\s+',
+        # Peas
+        r'(?:Mark|Schal|Zucker|Pal)[\s-]*[Ee]rbse\s+',
+        # Cucurbits
+        r'(?:Salat|Einlege|Gewürz|Freiland|Schlangen)[\s-]*[Gg]urke\s+',
+        r'Zucchini\s+',
+        r'Kürbis\s+',
+        r'(?:Wasser)?[Mm]elone\s+',
+        # Brassicas
+        r'(?:Blumen|Grün|Rot|Weiß|Rosen)[\s-]*[Kk]ohl\s+',
+        r'Kohlrabi\s+',
+        r'Wirsing\s+',
+        r'Brokkoli\s+',
+        r'Chinakohl\s+',
+        r'Pak\s+Choi\s+',
+        r'Kohlrübe\s+',
+        r'Mai-/Herbstrüben?(?:/Navets)?\s+',
+        # Root vegetables
+        r'Möhre\s+',
+        r'Karotten?(?:\s*-?\s*Mix)?\s+',
+        r'Pastinake\s+',
+        r'Radies(?:chen)?\s+',
+        r'Rettich\s+',
+        r'Schwarzwurzel\s+',
+        r'Haferwurzel\s+',
+        r'Petersilienwurzel\s+',
+        # Beets
+        r'(?:Rote|Gelbe|Weiße)\s+Bete?\s+',
+        r'Mangold\s+',
+        # Lettuce & leafy
+        r'(?:Kopf|Eichblatt|Batavia|Eis|Lollo|Romana|Baby-Leaf)[\s-]*[Ss]alat\s+',
+        r'Feldsalat\s+',
+        r'Endivie\s+',
+        r'Asia[\s-]*Salat\s+',
+        r'Spinat\s+',
+        # Alliums
+        r'Zwiebel\s+',
+        r'Lauchzwiebel\s+',
+        r'Porree(?:/Lauch)?\s+',
+        r'Schnittlauch\s+',
+        r'Schnittknoblauch\s+',
+        # Peppers
+        r'(?:Gemüse|Block|Spitz|Papier)[\s-]*[Pp]aprika\s+',
+        r'Chili\s+',
+        # Celery
+        r'(?:Knollen|Stangen|Bleich|Schnitt)[\s-]*[Ss]ellerie\s+',
+        # Herbs
+        r'Basilikum\s+',
+        r'Koriander\s+',
+        r'Dill\s+',
+        r'Petersilie\s+',
+        r'(?:Knollen|Gewürz)[\s-]*[Ff]enchel\s+',
+        r'Salbei\s+',
+        r'Thymian\s+',
+        r'Oregano\s+',
+        r'Lavendel\s+',
+        r'Melisse\s+',
+        r'Majoran\s+',
+        r'Estragon\s+',
+        r'Kresse\s+',
+        r'Bohnenkraut\s+',
+        r'Borretsch\s+',
+        r'Kümmel\s+',
+        r'Kerbel\s+',
+        r'Liebstock\s+',
+        r'Ysop\s+',
+        r'Pimpinelle\s+',
+        r'Beifuß\s+',
+        r'Schwarzkümmel\s+',
+        # Other
+        r'Zuckermais\s+',
+        r'Artischocke\s+',
+        r'Physalis\s+',
+        r'Aubergine\s+',
+        r'Catalogna\s+',
+    ]
+    for prefix in prefixes:
+        name = re.sub(r'^' + prefix, '', name, flags=re.IGNORECASE)
+
+    name = name.strip().strip("'\"")
+    return name
+
+
+# ── API data caches ───────────────────────────────────────────────────────
+species_cache = {}      # scientific_name_lower -> {id, name_scientific, ...}
+family_cache = {}       # name_scientific_lower -> {id, name_scientific}
+cultivar_cache = {}     # slug -> {id, name, species_id, ...}
+supplier_id = None
+
+
+def load_api_data():
+    """Load all existing data from HerbAPI for matching."""
+    global supplier_id
+
+    print("Loading existing HerbAPI data...")
+
+    # Load families
+    page = 1
+    while True:
+        resp = api_get("/families", {"per_page": 100, "page": page})
+        for f in resp["data"]:
+            family_cache[f["name_scientific"].lower()] = f
+        if len(resp["data"]) < 100:
+            break
+        page += 1
+    print(f"  Loaded {len(family_cache)} families")
+
+    # Load species
+    page = 1
+    while True:
+        resp = api_get("/species", {"per_page": 100, "page": page})
+        for s in resp["data"]:
+            species_cache[s["name_scientific"].lower()] = s
+        if len(resp["data"]) < 100:
+            break
+        page += 1
+    print(f"  Loaded {len(species_cache)} species")
+
+    # Load ALL cultivars (slug + id + name + species_id)
+    page = 1
+    while True:
+        resp = api_get("/cultivars", {"per_page": 100, "page": page})
+        for c in resp["data"]:
+            cultivar_cache[c["slug"]] = {
+                "id": c["id"],
+                "name": c["name"],
+                "species_id": c["species_id"],
+            }
+        if len(resp["data"]) < 100:
+            break
+        page += 1
+    print(f"  Loaded {len(cultivar_cache)} cultivars")
+
+    # Create or find Bingenheimer supplier
+    resp = api_get("/suppliers")
+    for s in resp:
+        if "bingenheimer" in s["name"].lower():
+            supplier_id = s["id"]
+            print(f"  Found existing supplier: {s['name']} ({s['id']})")
+            break
+
+    if not supplier_id:
+        print("  Creating Bingenheimer Saatgut supplier...")
+        s, code = api_post("/suppliers", {
+            "name": "Bingenheimer Saatgut",
+            "url": "https://www.bingenheimersaatgut.de",
+            "country": "DE",
+            "is_organic": True,
+            "is_demeter": True,
+            "notes": "German biodynamic seed company, Demeter certified, open-pollinated varieties"
+        })
+        if "id" in s:
+            supplier_id = s["id"]
+            print(f"  Created supplier: {s['id']}")
+        else:
+            print(f"  ERROR creating supplier: {s}")
+            sys.exit(1)
+
+
+def find_or_create_species(latin_name: str) -> Optional[str]:
+    """Find species by Latin name or create it. Returns species ID."""
+    if not latin_name:
+        return None
+
+    key = latin_name.lower().strip()
+
+    # Direct match
+    if key in species_cache:
+        return species_cache[key]["id"]
+
+    # Try without subspecies/variety
+    base = " ".join(key.split()[:2])
+    if base in species_cache:
+        return species_cache[base]["id"]
+
+    # Handle synonyms
+    synonyms = {
+        "lycopersicon esculentum": "solanum lycopersicum",
+        "capsicum annuum var. annuum": "capsicum annuum",
+        "brassica oleracea var. botrytis": "brassica oleracea",
+        "brassica oleracea var. italica": "brassica oleracea",
+        "brassica oleracea var. gemmifera": "brassica oleracea",
+        "brassica oleracea var. gongylodes": "brassica oleracea",
+        "brassica oleracea var. capitata": "brassica oleracea",
+        "brassica oleracea var. sabauda": "brassica oleracea",
+        "brassica oleracea var. sabellica": "brassica oleracea",
+        "brassica rapa var. rapa": "brassica rapa",
+        "brassica rapa subsp. pekinensis": "brassica rapa",
+        "brassica rapa subsp. chinensis": "brassica rapa",
+        "beta vulgaris var. conditiva": "beta vulgaris",
+        "beta vulgaris subsp. vulgaris": "beta vulgaris",
+        "beta vulgaris var. vulgaris": "beta vulgaris",
+        "allium porrum": "allium cepa",
+        "allium ampeloprasum": "allium cepa",
+        "origanum majorana": "origanum vulgare",
+        "cichorium intybus var. foliosum": "cichorium intybus",
+        "petroselinum crispum var. tuberosum": "petroselinum crispum",
+        "apium graveolens var. rapaceum": "apium graveolens",
+        "apium graveolens var. dulce": "apium graveolens",
+        "lactuca sativa var. capitata": "lactuca sativa",
+        "lactuca sativa var. crispa": "lactuca sativa",
+        "lactuca sativa var. longifolia": "lactuca sativa",
+    }
+    if key in synonyms:
+        syn_key = synonyms[key]
+        if syn_key in species_cache:
+            return species_cache[syn_key]["id"]
+
+    # Try to create the species
+    genus = latin_name.split()[0]
+    family_map = {
+        "Solanum": "Solanaceae", "Capsicum": "Solanaceae", "Physalis": "Solanaceae",
+        "Nicandra": "Solanaceae",
+        "Cucumis": "Cucurbitaceae", "Cucurbita": "Cucurbitaceae", "Citrullus": "Cucurbitaceae",
+        "Phaseolus": "Fabaceae", "Pisum": "Fabaceae", "Vicia": "Fabaceae",
+        "Glycine": "Fabaceae", "Lens": "Fabaceae", "Lupinus": "Fabaceae",
+        "Trifolium": "Fabaceae", "Medicago": "Fabaceae", "Vigna": "Fabaceae",
+        "Brassica": "Brassicaceae", "Raphanus": "Brassicaceae", "Eruca": "Brassicaceae",
+        "Lepidium": "Brassicaceae", "Nasturtium": "Brassicaceae", "Barbarea": "Brassicaceae",
+        "Sinapis": "Brassicaceae", "Crambe": "Brassicaceae", "Diplotaxis": "Brassicaceae",
+        "Allium": "Amaryllidaceae",
+        "Daucus": "Apiaceae", "Petroselinum": "Apiaceae", "Apium": "Apiaceae",
+        "Foeniculum": "Apiaceae", "Pastinaca": "Apiaceae", "Coriandrum": "Apiaceae",
+        "Anethum": "Apiaceae", "Levisticum": "Apiaceae", "Anthriscus": "Apiaceae",
+        "Carum": "Apiaceae", "Myrrhis": "Apiaceae", "Pimpinella": "Apiaceae",
+        "Sanguisorba": "Rosaceae",
+        "Lactuca": "Asteraceae", "Cichorium": "Asteraceae", "Cynara": "Asteraceae",
+        "Helianthus": "Asteraceae", "Calendula": "Asteraceae", "Tagetes": "Asteraceae",
+        "Scorzonera": "Asteraceae", "Tragopogon": "Asteraceae", "Glebionis": "Asteraceae",
+        "Artemisia": "Asteraceae",
+        "Beta": "Chenopodiaceae", "Spinacia": "Chenopodiaceae",
+        "Atriplex": "Chenopodiaceae", "Chenopodium": "Chenopodiaceae",
+        "Ocimum": "Lamiaceae", "Origanum": "Lamiaceae", "Thymus": "Lamiaceae",
+        "Salvia": "Lamiaceae", "Melissa": "Lamiaceae", "Lavandula": "Lamiaceae",
+        "Satureja": "Lamiaceae", "Hyssopus": "Lamiaceae", "Rosmarinus": "Lamiaceae",
+        "Mentha": "Lamiaceae",
+        "Zea": "Poaceae",
+        "Borago": "Boraginaceae", "Phacelia": "Boraginaceae",
+        "Valerianella": "Caprifoliaceae",
+        "Tropaeolum": "Tropaeolaceae",
+        "Rumex": "Polygonaceae",
+        "Nigella": "Ranunculaceae",
+        "Claytonia": "Montiaceae",
+        "Tetragonia": "Aizoaceae",
+        "Basella": "Basellaceae",
+        "Plantago": "Plantaginaceae",
+    }
+
+    family_name = family_map.get(genus)
+    if not family_name:
+        print(f"    WARNING: Unknown genus '{genus}' for species '{latin_name}'")
+        stats["species_not_matched"].append(latin_name)
+        return None
+
+    family_id = find_or_create_family(family_name)
+    if not family_id:
+        return None
+
+    print(f"    Creating species: {latin_name}")
+    resp, code = api_post("/species", {
+        "name_scientific": latin_name,
+        "family_id": family_id,
+    })
+    if "id" in resp:
+        species_cache[latin_name.lower()] = resp
+        stats["species_created"] += 1
+        return resp["id"]
+    else:
+        # Might already exist, reload
+        print(f"    Species creation returned {code}: {resp.get('error','')[:100]}")
+        page = 1
+        while True:
+            r = api_get("/species", {"per_page": 100, "page": page})
+            for s in r["data"]:
+                species_cache[s["name_scientific"].lower()] = s
+            if len(r["data"]) < 100:
+                break
+            page += 1
+        if latin_name.lower() in species_cache:
+            return species_cache[latin_name.lower()]["id"]
+        stats["errors"].append(f"Species creation failed: {latin_name}")
+        return None
+
+
+def find_or_create_family(family_name: str) -> Optional[str]:
+    """Find or create a plant family. Returns family ID."""
+    key = family_name.lower()
+    if key in family_cache:
+        return family_cache[key]["id"]
+
+    print(f"    Creating family: {family_name}")
+    resp, code = api_post("/families", {"name_scientific": family_name})
+    if "id" in resp:
+        family_cache[key] = resp
+        stats["families_created"] += 1
+        return resp["id"]
+    else:
+        # Reload
+        r = api_get("/families", {"per_page": 200})
+        for ff in r["data"]:
+            family_cache[ff["name_scientific"].lower()] = ff
+        if key in family_cache:
+            return family_cache[key]["id"]
+        stats["errors"].append(f"Family creation failed: {family_name}")
+        return None
+
+
+def slugify(text: str) -> str:
+    """Generate a URL-safe slug."""
+    text = text.lower()
+    replacements = {
+        "ä": "a", "ö": "o", "ü": "u", "ß": "ss",
+        "é": "e", "è": "e", "ê": "e", "ë": "e",
+        "à": "a", "â": "a", "á": "a",
+        "ô": "o", "ù": "u", "û": "u", "ú": "u",
+        "ï": "i", "î": "i", "í": "i",
+        "ç": "c", "ñ": "n", "ó": "o",
+        "œ": "oe", "æ": "ae",
+    }
+    for old, new in replacements.items():
+        text = text.replace(old, new)
+    text = re.sub(r'[^a-z0-9\s-]', '', text)
+    text = re.sub(r'[\s]+', '-', text.strip())
+    text = re.sub(r'-+', '-', text)
+    return text.strip('-')
+
+
+def find_existing_cultivar(species_name: str, variety_name: str, species_id: str) -> Optional[str]:
+    """Check if cultivar already exists. Returns cultivar ID or None."""
+    expected_slug = slugify(f"{species_name} {variety_name}")
+
+    # Direct slug match
+    if expected_slug in cultivar_cache:
+        return cultivar_cache[expected_slug]["id"]
+
+    # Check for name match in same species
+    variety_lower = variety_name.lower()
+    for slug, data in cultivar_cache.items():
+        if data["species_id"] == species_id and data["name"].lower() == variety_lower:
+            return data["id"]
+
+    return None
+
+
+def scrape_category(cat_path: str, default_species: Optional[str]):
+    """Scrape a single category page and all its products."""
+    url = f"{SITE_BASE}/de/bio-saatgut/{cat_path}.html"
+    print(f"\n{'='*60}")
+    print(f"Category: {cat_path}")
+
+    html = fetch_page(url)
+    if not html:
+        print("  SKIP: Page not found (404)")
+        return
+
+    time.sleep(DELAY)
+
+    products = parse_product_links(html)
+    print(f"  Found {len(products)} products")
+    stats["products_found"] += len(products)
+    stats["categories_scraped"] += 1
+
+    for prod_url, prod_name in products:
+        process_product(prod_url, prod_name, default_species)
+
+
+def process_product(prod_url: str, prod_name: str, default_species: Optional[str]):
+    """Process a single product: fetch detail, extract data, create cultivar."""
+    article_number = extract_article_number(prod_name, prod_url)
+    variety_name = extract_variety_name(prod_name)
+
+    if not variety_name:
+        print(f"  SKIP (no variety): {prod_name}")
+        return
+
+    # Skip mixes, sets, bundles
+    skip_keywords = ["mischung", "saatscheibe", "saatband", "saatplatte",
+                     "saat-set", " mix ", "trio ", "quartett", "gutschein",
+                     "buch ", "düngung", "erde ", "-garten"]
+    name_lower = prod_name.lower()
+    # Exception: if the variety name itself is the whole thing, keep it
+    if any(kw in name_lower for kw in skip_keywords) and variety_name.lower() != prod_name.lower():
+        # Only skip if it really seems like a mix
+        if "mischung" in name_lower or "mix" in name_lower or "trio" in name_lower:
+            print(f"  SKIP (mix/set): {prod_name}")
+            return
+
+    print(f"\n  Product: {prod_name}")
+    print(f"    Variety: {variety_name}, SKU: {article_number}")
+
+    # Fetch detail page
+    latin_name = None
+    description = ""
+    time.sleep(DELAY)
+    try:
+        detail_html = fetch_page(prod_url)
+        stats["detail_pages_fetched"] += 1
+        if detail_html:
+            latin_name = extract_latin_from_detail(detail_html)
+            description = extract_description_from_detail(detail_html)
+    except Exception as e:
+        print(f"    WARNING: Detail page error: {e}")
+
+    species_name = latin_name or default_species
+    if not species_name:
+        print(f"    SKIP: No species for '{prod_name}'")
+        stats["species_not_matched"].append(prod_name)
+        return
+
+    print(f"    Species: {species_name}")
+
+    species_id = find_or_create_species(species_name)
+    if not species_id:
+        print(f"    SKIP: Could not resolve species '{species_name}'")
+        return
+
+    # Check if cultivar already exists
+    existing_id = find_existing_cultivar(species_name, variety_name, species_id)
+
+    cultivar_id = None
+
+    if existing_id:
+        cultivar_id = existing_id
+        print(f"    EXISTS: cultivar already in DB")
+        stats["cultivars_existed"] += 1
+    else:
+        # Create cultivar
+        data = {
+            "species_id": species_id,
+            "name": variety_name,
+            "name_de": variety_name,
+            "is_organic": True,
+        }
+        if description:
+            data["description"] = description
+
+        resp, code = api_post("/cultivars", data)
+
+        if "id" in resp:
+            cultivar_id = resp["id"]
+            cultivar_cache[resp["slug"]] = {
+                "id": resp["id"],
+                "name": variety_name,
+                "species_id": species_id,
+            }
+            stats["cultivars_created"] += 1
+            print(f"    CREATED: {resp['slug']}")
+        elif code == 500 and "Database error" in str(resp.get("error", "")):
+            # Likely slug conflict - try to find existing
+            print(f"    DB conflict - searching for existing cultivar...")
+            # Reload cultivars for this species
+            page = 1
+            while True:
+                r = api_get("/cultivars", {"per_page": 100, "page": page})
+                for c in r["data"]:
+                    cultivar_cache[c["slug"]] = {
+                        "id": c["id"],
+                        "name": c["name"],
+                        "species_id": c["species_id"],
+                    }
+                    if c["species_id"] == species_id and c["name"].lower() == variety_name.lower():
+                        cultivar_id = c["id"]
+                if cultivar_id or len(r["data"]) < 100:
+                    break
+                page += 1
+
+            if cultivar_id:
+                print(f"    Found existing after conflict: {cultivar_id}")
+                stats["cultivars_existed"] += 1
+            else:
+                print(f"    ERROR: DB error and could not find existing cultivar")
+                stats["errors"].append(f"DB error + not found: {species_name} / {variety_name}")
+                return
+        else:
+            print(f"    ERROR ({code}): {str(resp.get('error',''))[:100]}")
+            stats["errors"].append(f"Create failed: {variety_name}: {resp.get('error','')[:80]}")
+            return
+
+    # Link to supplier
+    if cultivar_id and supplier_id:
+        link_data = {
+            "supplier_id": supplier_id,
+            "product_url": prod_url,
+        }
+        if article_number:
+            link_data["article_number"] = article_number
+
+        resp, code = api_post(f"/cultivars/{cultivar_id}/suppliers", link_data)
+
+        if "id" in resp:
+            stats["supplier_links_created"] += 1
+            print(f"    LINKED (SKU: {article_number})")
+        elif code == 500 or "already" in str(resp.get("error", "")).lower():
+            stats["supplier_links_existed"] += 1
+            print(f"    LINK EXISTS")
+        else:
+            print(f"    LINK ERROR ({code}): {str(resp.get('error',''))[:80]}")
+            stats["errors"].append(f"Link failed: {variety_name}: {resp.get('error','')[:60]}")
+
+
+def main():
+    print("=" * 60)
+    print("Bingenheimer Saatgut Scraper for HerbAPI")
+    print("=" * 60)
+
+    load_api_data()
+
+    print(f"\nScraping {len(ALL_CATEGORIES)} categories...")
+
+    for cat_path, default_species in ALL_CATEGORIES:
+        try:
+            scrape_category(cat_path, default_species)
+        except Exception as e:
+            print(f"  ERROR in category {cat_path}: {e}")
+            stats["errors"].append(f"Category error: {cat_path}: {e}")
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("SCRAPING COMPLETE - SUMMARY")
+    print("=" * 60)
+    print(f"Categories scraped:      {stats['categories_scraped']}")
+    print(f"Products found:          {stats['products_found']}")
+    print(f"Detail pages fetched:    {stats['detail_pages_fetched']}")
+    print(f"Cultivars created:       {stats['cultivars_created']}")
+    print(f"Cultivars existed:       {stats['cultivars_existed']}")
+    print(f"Supplier links created:  {stats['supplier_links_created']}")
+    print(f"Supplier links existed:  {stats['supplier_links_existed']}")
+    print(f"Species created:         {stats['species_created']}")
+    print(f"Families created:        {stats['families_created']}")
+    print(f"Errors:                  {len(stats['errors'])}")
+
+    if stats["species_not_matched"]:
+        print(f"\nUnmatched species ({len(stats['species_not_matched'])}):")
+        for s in stats["species_not_matched"][:30]:
+            print(f"  - {s}")
+
+    if stats["errors"]:
+        print(f"\nErrors ({len(stats['errors'])}):")
+        for e in stats["errors"][:30]:
+            print(f"  - {e}")
+
+    return 0 if not stats["errors"] else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,760 @@
+#!/usr/bin/env python3
+"""
+Scraper for Dreschflegel organic seed catalog (dreschflegel-saatgut.de).
+Extracts cultivar data and imports into HerbAPI.
+
+Run 2 - fixes pagination (API caps at 100/page), better species matching,
+caches scraped products, handles duplicates gracefully.
+"""
+
+import urllib.request
+import urllib.parse
+import urllib.error
+import gzip
+import json
+import re
+import time
+import sys
+import os
+import html as html_mod
+from collections import defaultdict
+
+# --- Configuration ---
+API_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+API_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+SITE_BASE = "https://www.dreschflegel-saatgut.de"
+DELAY = 0.5
+USER_AGENT = "Mozilla/5.0 (compatible; HerbAPI-Scraper/1.0)"
+CACHE_FILE = "/tmp/dreschflegel_products_cache.json"
+
+# Unbuffered output
+sys.stdout.reconfigure(line_buffering=True)
+sys.stderr.reconfigure(line_buffering=True)
+
+stats = defaultdict(int)
+
+
+def api_request(method, path, data=None):
+    """Make an API request to HerbAPI."""
+    url = f"{API_BASE}{path}"
+    body = json.dumps(data).encode("utf-8") if data else None
+    req = urllib.request.Request(url, data=body, method=method)
+    req.add_header("Authorization", f"Bearer {API_TOKEN}")
+    req.add_header("Content-Type", "application/json")
+    req.add_header("Accept", "application/json")
+    try:
+        resp = urllib.request.urlopen(req)
+        return json.loads(resp.read().decode("utf-8"))
+    except urllib.error.HTTPError as e:
+        body_text = e.read().decode("utf-8", errors="replace")
+        if e.code == 409 or "already exists" in body_text.lower() or "duplicate" in body_text.lower():
+            return None  # Duplicate, handled silently
+        if e.code == 500 and "database error" in body_text.lower():
+            # Likely a unique constraint violation = duplicate
+            return None
+        print(f"  API error {e.code} {method} {path}: {body_text[:200]}")
+        return None
+
+
+def fetch_page(url):
+    """Fetch a web page with delay and user-agent."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        resp = urllib.request.urlopen(req, timeout=30)
+        return resp.read().decode("utf-8", errors="replace")
+    except Exception as e:
+        print(f"  Fetch error {url}: {e}")
+        return None
+
+
+def get_sitemap_urls():
+    """Download sitemap and extract all URLs."""
+    print("Fetching sitemap index...")
+    html = fetch_page(f"{SITE_BASE}/sitemap.xml")
+    if not html:
+        return []
+
+    sitemap_urls = re.findall(r"<loc>(.*?)</loc>", html)
+    all_urls = []
+
+    for smap_url in sitemap_urls:
+        if smap_url.endswith(".xml.gz"):
+            print(f"  Fetching compressed sitemap...")
+            req = urllib.request.Request(smap_url, headers={"User-Agent": USER_AGENT})
+            try:
+                resp = urllib.request.urlopen(req, timeout=30)
+                data = gzip.decompress(resp.read()).decode("utf-8")
+                urls = re.findall(r"<loc>(.*?)</loc>", data)
+                all_urls.extend(urls)
+                print(f"    Found {len(urls)} URLs")
+            except Exception as e:
+                print(f"    Error: {e}")
+
+    return all_urls
+
+
+def classify_urls(urls):
+    """Filter URLs to likely product pages (single-segment paths)."""
+    skip_prefixes = [
+        "impressum", "agb", "datenschutz", "kontakt", "widerrufs",
+        "versand", "abkuerz", "zertifikat", "wichtige-hinweise",
+        "muster-", "gutscheine", "kalender", "flyer", "katalog",
+        "sommer-herbst", "unsere-hoefe", "bestellschein",
+        "dreschflegel-news", "termine", "rezepte", "anbautipps",
+        "tipps-zur", "gartentelefon", "gartenfreude", "buecher",
+        "navigation", "vielfalt", "sut20", "saatgut",
+        "neuheiten", "kennenlernangebote", "sut25", "vielfalt25",
+        "saatgut-vielfalt", "saat",
+    ]
+    candidates = []
+    for url in urls:
+        url = url.rstrip("/")
+        path = url.replace("https://dreschflegel-saatgut.de/", "").replace(
+            "https://www.dreschflegel-saatgut.de/", ""
+        )
+        if not path or "/" in path:
+            continue
+        if any(path == p or path.startswith(p) for p in skip_prefixes):
+            continue
+        candidates.append(url)
+    return candidates
+
+
+def parse_product_page(html_content):
+    """Extract product data from a Dreschflegel product page."""
+    if not html_content or 'class="botname"' not in html_content:
+        return None
+
+    result = {}
+
+    m = re.search(r"<h1>(.*?)</h1>", html_content)
+    if m:
+        result["name"] = html_mod.unescape(m.group(1).strip())
+
+    m = re.search(r'<div class="botname">\s*(.*?)\s*</div>', html_content, re.DOTALL)
+    if m:
+        result["botanical_name"] = html_mod.unescape(m.group(1).strip())
+
+    m = re.search(
+        r'class="product-detail-ordernumber"[^>]*>\s*(\d+)',
+        html_content,
+        re.DOTALL,
+    )
+    if m:
+        result["article_number"] = m.group(1)
+
+    m = re.search(r'itemprop="price"[^>]*content="([^"]+)"', html_content)
+    if m:
+        try:
+            result["price"] = float(m.group(1))
+        except ValueError:
+            pass
+
+    m = re.search(
+        r"product-detail-description-text.*?<p>(.*?)</p>",
+        html_content,
+        re.DOTALL,
+    )
+    if m:
+        desc = re.sub(r"<[^>]+>", "", m.group(1).strip())
+        desc = html_mod.unescape(desc).strip()
+        if desc:
+            result["description"] = desc
+
+    m = re.search(r"Inhalt reicht f[üu]r:</th><td>\s*(.*?)\s*</td>", html_content)
+    if m:
+        result["pack_info"] = html_mod.unescape(m.group(1).strip())
+
+    return result if "name" in result and "botanical_name" in result else None
+
+
+def scrape_all_products(candidate_urls):
+    """Scrape product pages, using cache for already-scraped URLs."""
+    # Load cache
+    cache = {}
+    if os.path.exists(CACHE_FILE):
+        with open(CACHE_FILE, "r") as f:
+            cache = json.load(f)
+        print(f"  Loaded {len(cache)} cached products")
+
+    products = []
+    to_fetch = [u for u in candidate_urls if u not in cache]
+    already_cached = [u for u in candidate_urls if u in cache]
+
+    # Add cached products
+    for u in already_cached:
+        if cache[u]:  # None means "not a product page"
+            products.append(cache[u])
+
+    cached_products = len(products)
+    cached_non_products = len(already_cached) - cached_products
+    print(f"  {cached_products} products from cache, "
+          f"{cached_non_products} non-products cached, "
+          f"{len(to_fetch)} to fetch")
+
+    for i, url in enumerate(to_fetch):
+        if (i + 1) % 50 == 0 or i == 0:
+            print(f"  Fetching {i + 1}/{len(to_fetch)}...")
+
+        time.sleep(DELAY)
+        html_content = fetch_page(url)
+        if not html_content:
+            stats["fetch_errors"] += 1
+            cache[url] = None
+            continue
+
+        product = parse_product_page(html_content)
+        if product:
+            product["url"] = url
+            products.append(product)
+            cache[url] = product
+            stats["products_scraped"] += 1
+        else:
+            cache[url] = None
+            stats["not_product_pages"] += 1
+
+        # Save cache periodically
+        if (i + 1) % 100 == 0:
+            with open(CACHE_FILE, "w") as f:
+                json.dump(cache, f)
+
+    # Final cache save
+    with open(CACHE_FILE, "w") as f:
+        json.dump(cache, f)
+
+    print(f"  Total: {len(products)} products ({stats['products_scraped']} newly scraped)")
+    return products
+
+
+def paginated_get(path):
+    """Fetch all pages from a paginated API endpoint."""
+    all_items = []
+    page = 1
+    while True:
+        resp = api_request("GET", f"{path}{'&' if '?' in path else '?'}per_page=100&page={page}")
+        if not resp or "data" not in resp or not resp["data"]:
+            break
+        all_items.extend(resp["data"])
+        if len(resp["data"]) < 100:
+            break
+        page += 1
+    return all_items
+
+
+def load_api_data():
+    """Load all species, families, cultivars from HerbAPI."""
+    print("Loading HerbAPI data...")
+
+    families = {}
+    for f in paginated_get("/families"):
+        families[f["name_scientific"].lower()] = f
+    print(f"  {len(families)} families")
+
+    species = {}
+    for s in paginated_get("/species"):
+        species[s["name_scientific"].lower().strip()] = s
+    print(f"  {len(species)} species")
+
+    cultivars = {}
+    for c in paginated_get("/cultivars"):
+        key = (c["species_id"], c["name"].lower().strip())
+        cultivars[key] = c
+    print(f"  {len(cultivars)} cultivars")
+
+    return families, species, cultivars
+
+
+def ensure_supplier():
+    """Create or find the Dreschflegel supplier."""
+    resp = api_request("GET", "/suppliers")
+    if resp:
+        for s in resp:
+            if "dreschflegel" in s["name"].lower():
+                print(f"  Supplier exists: {s['name']} ({s['id']})")
+                return s
+    data = {
+        "name": "Dreschflegel",
+        "url": "https://www.dreschflegel-saatgut.de",
+        "country": "DE",
+        "is_organic": True,
+        "is_demeter": False,
+        "notes": "German organic seed cooperative, open-pollinated heritage varieties",
+    }
+    resp = api_request("POST", "/suppliers", data)
+    if resp:
+        print(f"  Created supplier: {resp['name']} ({resp['id']})")
+    return resp
+
+
+# Genus → family mapping for species creation
+GENUS_TO_FAMILY = {
+    # Asteraceae
+    "Achillea": "Asteraceae", "Artemisia": "Asteraceae", "Aster": "Asteraceae",
+    "Calendula": "Asteraceae", "Carthamus": "Asteraceae", "Centaurea": "Asteraceae",
+    "Chamomilla": "Asteraceae", "Chrysanthemum": "Asteraceae", "Cichorium": "Asteraceae",
+    "Cnicus": "Asteraceae", "Cosmos": "Asteraceae", "Cynara": "Asteraceae",
+    "Dahlia": "Asteraceae", "Dimorphotheca": "Asteraceae", "Echinacea": "Asteraceae",
+    "Echinops": "Asteraceae", "Erigeron": "Asteraceae", "Eupatorium": "Asteraceae",
+    "Gaillardia": "Asteraceae", "Helenium": "Asteraceae", "Helianthus": "Asteraceae",
+    "Helichrysum": "Asteraceae", "Inula": "Asteraceae", "Lactuca": "Asteraceae",
+    "Leontodon": "Asteraceae", "Matricaria": "Asteraceae", "Onopordum": "Asteraceae",
+    "Petasites": "Asteraceae", "Rudbeckia": "Asteraceae", "Scorzonera": "Asteraceae",
+    "Silphium": "Asteraceae", "Solidago": "Asteraceae", "Tagetes": "Asteraceae",
+    "Tanacetum": "Asteraceae", "Taraxacum": "Asteraceae", "Telekia": "Asteraceae",
+    "Tragopogon": "Asteraceae", "Tussilago": "Asteraceae", "Zinnia": "Asteraceae",
+    "Xerochrysum": "Asteraceae", "Coreopsis": "Asteraceae",
+    # Solanaceae
+    "Capsicum": "Solanaceae", "Lycium": "Solanaceae", "Nicotiana": "Solanaceae",
+    "Physalis": "Solanaceae", "Solanum": "Solanaceae", "Atropa": "Solanaceae",
+    # Cucurbitaceae
+    "Citrullus": "Cucurbitaceae", "Cucumis": "Cucurbitaceae", "Cucurbita": "Cucurbitaceae",
+    "Luffa": "Cucurbitaceae", "Momordica": "Cucurbitaceae",
+    # Fabaceae
+    "Cicer": "Fabaceae", "Glycine": "Fabaceae", "Lathyrus": "Fabaceae",
+    "Lens": "Fabaceae", "Lupinus": "Fabaceae", "Medicago": "Fabaceae",
+    "Phaseolus": "Fabaceae", "Pisum": "Fabaceae", "Trifolium": "Fabaceae",
+    "Trigonella": "Fabaceae", "Vicia": "Fabaceae", "Vigna": "Fabaceae",
+    "Caragana": "Fabaceae", "Cytisus": "Fabaceae", "Robinia": "Fabaceae",
+    # Brassicaceae
+    "Armoracia": "Brassicaceae", "Barbarea": "Brassicaceae", "Brassica": "Brassicaceae",
+    "Crambe": "Brassicaceae", "Eruca": "Brassicaceae", "Hesperis": "Brassicaceae",
+    "Iberis": "Brassicaceae", "Isatis": "Brassicaceae", "Lepidium": "Brassicaceae",
+    "Lunaria": "Brassicaceae", "Raphanus": "Brassicaceae", "Sinapis": "Brassicaceae",
+    "Nasturtium": "Brassicaceae", "Diplotaxis": "Brassicaceae",
+    # Apiaceae
+    "Anethum": "Apiaceae", "Anthriscus": "Apiaceae", "Apium": "Apiaceae",
+    "Carum": "Apiaceae", "Chaerophyllum": "Apiaceae", "Coriandrum": "Apiaceae",
+    "Daucus": "Apiaceae", "Foeniculum": "Apiaceae", "Levisticum": "Apiaceae",
+    "Myrrhis": "Apiaceae", "Pastinaca": "Apiaceae", "Petroselinum": "Apiaceae",
+    "Pimpinella": "Apiaceae", "Angelica": "Apiaceae", "Aegopodium": "Apiaceae",
+    # Lamiaceae
+    "Agastache": "Lamiaceae", "Ajuga": "Lamiaceae", "Dracocephalum": "Lamiaceae",
+    "Elsholtzia": "Lamiaceae", "Hyssopus": "Lamiaceae", "Lavandula": "Lamiaceae",
+    "Melissa": "Lamiaceae", "Mentha": "Lamiaceae", "Monarda": "Lamiaceae",
+    "Nepeta": "Lamiaceae", "Ocimum": "Lamiaceae", "Origanum": "Lamiaceae",
+    "Perilla": "Lamiaceae", "Rosmarinus": "Lamiaceae", "Salvia": "Lamiaceae",
+    "Satureja": "Lamiaceae", "Stachys": "Lamiaceae", "Thymus": "Lamiaceae",
+    # Amaryllidaceae / Alliaceae
+    "Allium": "Amaryllidaceae",
+    # Poaceae
+    "Avena": "Poaceae", "Hordeum": "Poaceae", "Panicum": "Poaceae",
+    "Secale": "Poaceae", "Sorghum": "Poaceae", "Triticum": "Poaceae",
+    "Zea": "Poaceae", "Setaria": "Poaceae",
+    # Chenopodiaceae
+    "Atriplex": "Chenopodiaceae", "Beta": "Chenopodiaceae",
+    "Chenopodium": "Chenopodiaceae", "Spinacia": "Chenopodiaceae",
+    # Rosaceae
+    "Filipendula": "Rosaceae", "Fragaria": "Rosaceae", "Malus": "Rosaceae",
+    "Prunus": "Rosaceae", "Pyrus": "Rosaceae", "Rosa": "Rosaceae",
+    "Rubus": "Rosaceae", "Sanguisorba": "Rosaceae", "Sorbus": "Rosaceae",
+    "Waldsteinia": "Rosaceae",
+    # Boraginaceae
+    "Borago": "Boraginaceae", "Phacelia": "Boraginaceae", "Symphytum": "Boraginaceae",
+    "Pulmonaria": "Boraginaceae", "Myosotis": "Boraginaceae",
+    # Malvaceae
+    "Alcea": "Malvaceae", "Althaea": "Malvaceae", "Malva": "Malvaceae",
+    "Hibiscus": "Malvaceae", "Lavatera": "Malvaceae", "Abelmoschus": "Malvaceae",
+    # Polygonaceae
+    "Fagopyrum": "Polygonaceae", "Rheum": "Polygonaceae", "Rumex": "Polygonaceae",
+    # Caryophyllaceae
+    "Agrostemma": "Caryophyllaceae", "Dianthus": "Caryophyllaceae",
+    "Gypsophila": "Caryophyllaceae", "Lychnis": "Caryophyllaceae",
+    "Saponaria": "Caryophyllaceae", "Silene": "Caryophyllaceae",
+    # Tropaeolaceae
+    "Tropaeolum": "Tropaeolaceae",
+    # Papaveraceae
+    "Eschscholzia": "Papaveraceae", "Papaver": "Papaveraceae",
+    "Meconopsis": "Papaveraceae",
+    # Caprifoliaceae
+    "Valerianella": "Caprifoliaceae", "Valeriana": "Caprifoliaceae",
+    "Lonicera": "Caprifoliaceae", "Sambucus": "Adoxaceae",
+    # Plantaginaceae
+    "Digitalis": "Plantaginaceae", "Plantago": "Plantaginaceae",
+    "Antirrhinum": "Plantaginaceae", "Linaria": "Plantaginaceae",
+    # Violaceae
+    "Viola": "Violaceae",
+    # Ranunculaceae
+    "Aquilegia": "Ranunculaceae", "Consolida": "Ranunculaceae",
+    "Delphinium": "Ranunculaceae", "Nigella": "Ranunculaceae",
+    # Linaceae
+    "Linum": "Linaceae",
+    # Convolvulaceae
+    "Ipomoea": "Convolvulaceae", "Convolvulus": "Convolvulaceae",
+    # Portulacaceae / Montiaceae
+    "Claytonia": "Montiaceae", "Portulaca": "Portulacaceae",
+    # Amaranthaceae
+    "Amaranthus": "Amaranthaceae", "Celosia": "Amaranthaceae",
+    "Gomphrena": "Amaranthaceae",
+    # Asparagaceae
+    "Asparagus": "Asparagaceae",
+    # Resedaceae
+    "Reseda": "Resedaceae",
+    # Balsaminaceae
+    "Impatiens": "Balsaminaceae",
+    # Hydrangeaceae
+    "Hydrangea": "Hydrangeaceae",
+    # Campanulaceae
+    "Campanula": "Campanulaceae", "Phyteuma": "Campanulaceae",
+    # Scrophulariaceae
+    "Verbascum": "Scrophulariaceae",
+    # Verbenaceae
+    "Verbena": "Verbenaceae",
+    # Onagraceae
+    "Oenothera": "Onagraceae", "Clarkia": "Onagraceae",
+    # Cucurbitaceae extras
+    "Benincasa": "Cucurbitaceae", "Lagenaria": "Cucurbitaceae",
+    # Hypericaceae
+    "Hypericum": "Hypericaceae",
+    # Adoxaceae
+    "Sambucus": "Adoxaceae",
+    # Others
+    "Nigella": "Ranunculaceae",
+    "Dipsacus": "Caprifoliaceae",
+    "Knautia": "Caprifoliaceae",
+    "Scabiosa": "Caprifoliaceae",
+    "Succisa": "Caprifoliaceae",
+    "Asclepias": "Apocynaceae",
+    "Cynoglossum": "Boraginaceae",
+    "Echium": "Boraginaceae",
+    "Anchusa": "Boraginaceae",
+    "Lithospermum": "Boraginaceae",
+    "Tanacetum": "Asteraceae",
+    "Onobrychis": "Fabaceae",
+    "Ornithopus": "Fabaceae",
+    "Lotus": "Fabaceae",
+    "Anthyllis": "Fabaceae",
+    "Melilotus": "Fabaceae",
+    "Galega": "Fabaceae",
+    "Lespedeza": "Fabaceae",
+    "Arachis": "Fabaceae",
+    "Senna": "Fabaceae",
+    # Additional genera found in Dreschflegel catalog
+    "Acmella": "Asteraceae", "Adonis": "Ranunculaceae", "Ageratum": "Asteraceae",
+    "Amethystia": "Lamiaceae", "Anacyclus": "Asteraceae", "Anthemis": "Asteraceae",
+    "Asphodeline": "Asphodelaceae", "Brachyscome": "Asteraceae", "Bupleurum": "Apiaceae",
+    "Callistephus": "Asteraceae", "Camelina": "Brassicaceae", "Cardaria": "Brassicaceae",
+    "Cardiospermum": "Sapindaceae", "Cerinthe": "Boraginaceae",
+    "Chamaemelum": "Asteraceae", "Cistanthe": "Montiaceae", "Cleome": "Cleomaceae",
+    "Cochlearia": "Brassicaceae", "Codonopsis": "Campanulaceae", "Coix": "Poaceae",
+    "Cyperus": "Cyperaceae", "Digitaria": "Poaceae", "Dorotheanthus": "Aizoaceae",
+    "Emilia": "Asteraceae", "Eragrostis": "Poaceae", "Erysimum": "Brassicaceae",
+    "Euphorbia": "Euphorbiaceae", "Gentiana": "Gentianaceae", "Geum": "Rosaceae",
+    "Gilia": "Polemoniaceae", "Godetia": "Onagraceae", "Helipterum": "Asteraceae",
+    "Lallemantia": "Lamiaceae", "Leonurus": "Lamiaceae", "Leuzea": "Asteraceae",
+    "Liatris": "Asteraceae", "Malope": "Malvaceae", "Marrubium": "Lamiaceae",
+    "Matthiola": "Brassicaceae", "Maurandya": "Plantaginaceae",
+    "Melothria": "Cucurbitaceae", "Meum": "Apiaceae", "Nemesia": "Scrophulariaceae",
+    "Nicandra": "Solanaceae", "Nicotinia": "Solanaceae", "Oenanthe": "Apiaceae",
+    "Oxalis": "Oxalidaceae", "Pennisetum": "Poaceae", "Penstemon": "Plantaginaceae",
+    "Phlox": "Polemoniaceae", "Polemonium": "Polemoniaceae",
+    "Porophyllum": "Asteraceae", "Primula": "Primulaceae", "Psyllium": "Plantaginaceae",
+    "Quamoclit": "Convolvulaceae", "Ruta": "Rutaceae", "Salpiglossis": "Solanaceae",
+    "Sanvitalia": "Asteraceae", "Sideritis": "Lamiaceae", "Silybum": "Asteraceae",
+    "Talinum": "Talinaceae", "Thelesperma": "Asteraceae", "Vaccaria": "Caryophyllaceae",
+    "Veronica": "Plantaginaceae", "Xeranthemum": "Asteraceae",
+}
+
+
+def normalize_species_name(botanical_name):
+    """Normalize botanical name to 'Genus species' for matching.
+    Handles var., subsp., ssp., hybrids etc.
+    """
+    name = botanical_name.strip()
+    parts = name.split()
+    if len(parts) < 2:
+        return None, None
+
+    genus = parts[0]
+    # Handle 'Genus x species' (hybrid notation)
+    if parts[1] == "x" and len(parts) >= 3:
+        species = f"x {parts[2]}"
+    elif parts[1] in ("var.", "subsp.", "ssp.", "spec.", "sp."):
+        # Only genus level - can't match to species
+        return genus, None
+    else:
+        species = parts[1]
+
+    return genus, species
+
+
+def find_species(botanical_name, species_cache):
+    """Find existing species matching a botanical name.
+    Tries exact match, then genus+species without var/subsp.
+    """
+    genus, sp = normalize_species_name(botanical_name)
+    if not genus:
+        return None
+
+    if sp:
+        # Try exact genus+species
+        search_key = f"{genus} {sp}".lower()
+        if search_key in species_cache:
+            return species_cache[search_key]
+
+    # Try all species with same genus
+    genus_lower = genus.lower()
+    matches = {k: v for k, v in species_cache.items() if k.startswith(genus_lower + " ")}
+    if len(matches) == 1:
+        # Only one species in this genus - use it
+        return list(matches.values())[0]
+
+    return None
+
+
+def find_or_create_species(botanical_name, families, species_cache):
+    """Find or create a species from a botanical name."""
+    # Try to find existing
+    sp = find_species(botanical_name, species_cache)
+    if sp:
+        return sp
+
+    genus, species_epithet = normalize_species_name(botanical_name)
+    if not genus or not species_epithet:
+        stats["species_no_epithet"] += 1
+        return None
+
+    sci_name = f"{genus} {species_epithet}"
+
+    # Check cache again with normalized name
+    if sci_name.lower() in species_cache:
+        return species_cache[sci_name.lower()]
+
+    # Need to create - find the family
+    family_name = GENUS_TO_FAMILY.get(genus)
+    if not family_name:
+        stats["species_no_family"] += 1
+        print(f"    [SKIP] No family mapping for genus: {genus} ({botanical_name})")
+        return None
+
+    # Find or create the family
+    family = families.get(family_name.lower())
+    if not family:
+        print(f"    Creating family: {family_name}")
+        resp = api_request("POST", "/families", {"name_scientific": family_name})
+        if resp:
+            families[family_name.lower()] = resp
+            family = resp
+            stats["families_created"] += 1
+        else:
+            # May already exist (duplicate from previous run) - reload
+            for f in paginated_get("/families"):
+                if f["name_scientific"].lower() == family_name.lower():
+                    families[family_name.lower()] = f
+                    family = f
+                    break
+            if not family:
+                print(f"    [SKIP] Cannot create family: {family_name}")
+                return None
+
+    # Create species
+    print(f"    Creating species: {sci_name} (family: {family_name})")
+    resp = api_request("POST", "/species", {
+        "name_scientific": sci_name,
+        "family_id": family["id"],
+    })
+    if resp:
+        species_cache[sci_name.lower()] = resp
+        stats["species_created"] += 1
+        return resp
+    else:
+        # May already exist - try to find it
+        time.sleep(0.1)
+        for s in paginated_get("/species"):
+            if s["name_scientific"].lower() == sci_name.lower():
+                species_cache[sci_name.lower()] = s
+                return s
+        return None
+
+
+def extract_cultivar_name(product_name):
+    """Extract the cultivar/variety name from the full product name."""
+    name = product_name.strip()
+
+    # Common German crop type prefixes to strip (longest first)
+    prefixes = [
+        # Tomatoes
+        "Salattomate", "Stabtomate", "Buschtomate", "Cocktailtomate",
+        "Cherrytomate", "Fleischtomate", "Wildtomate", "Balkontomate",
+        "Flaschentomate", "Eitomate", "Datteltomate", "Tomate",
+        # Lettuce
+        "Winterkopfsalat", "Kopfsalat", "Bataviasalat", "Eissalat",
+        "Blattsalat", "Schnittsalat", "Pflücksalat", "Römersalat",
+        "Spargelsalat", "Romanasalat",
+        # Beans
+        "Buschbohne", "Stangenbohne", "Feuerbohne", "Puffbohne",
+        "Prunkbohne",
+        # Peas
+        "Markerbse", "Zuckererbse", "Palerbse", "Schalerbse",
+        "Knackerbse", "Kapuzinererbse",
+        # Cucumbers
+        "Einlegegurke", "Salatgurke", "Schälgurke", "Landgurke",
+        "Freilandgurke",
+        # Squash
+        "Hokkaidokürbis", "Butternutkürbis", "Speisekürbis",
+        "Riesenkürbis", "Zierkürbis", "Muskatkürbis", "Ölkürbis",
+        # Melon
+        "Wassermelone", "Zuckermelone",
+        # Peppers
+        "Gemüsepaprika", "Blockpaprika", "Spitzpaprika", "Tomatenpaprika",
+        "Snackpaprika", "Peperoni", "Chili",
+        # Brassicas
+        "Kohlrabi", "Brokkoli", "Blumenkohl", "Grünkohl", "Rosenkohl",
+        "Wirsing", "Rotkohl", "Weißkohl", "Spitzkohl", "Palmkohl",
+        "Chinakohl", "Pak Choi", "Markstammkohl",
+        # Root veg
+        "Möhre", "Karotte", "Pastinake", "Rote Bete", "Rote Beete",
+        "Herbstrübe", "Mairübe", "Stoppelrübe", "Schwarzer Rettich",
+        "Steckrübe", "Knollensellerie", "Petersilienwurzel",
+        "Rettich", "Radieschen",
+        # Onions
+        "Winterheckenzwiebel", "Lauchzwiebel", "Speisezwiebel",
+        "Schalotte", "Wintersteckzwiebel", "Zwiebel",
+        # Herbs
+        "Rotes Basilikum", "Buschbasilikum", "Zitronen-Basilikum",
+        "Thai-Basilikum", "Wildes Basilikum", "Zimtbasilikum",
+        "Basilikum", "Schnittknoblauch",
+        # Grains
+        "Sommerweizen", "Winterweizen", "Sommerroggen", "Winterroggen",
+        "Nackthafer", "Nacktgerste", "Dinkel", "Emmer", "Einkorn",
+        # Misc
+        "Zuckermais", "Popcornmais",
+        "Salattomate", "Zucchini",
+    ]
+
+    for prefix in sorted(prefixes, key=len, reverse=True):
+        if name.startswith(prefix + " "):
+            return name[len(prefix):].strip()
+
+    return name
+
+
+def get_existing_supplier_links(cultivar_id, supplier_id):
+    """Check if a cultivar-supplier link already exists."""
+    resp = api_request("GET", f"/cultivars/{cultivar_id}/suppliers")
+    if resp:
+        for link in resp:
+            if link["supplier_id"] == supplier_id:
+                return True
+    return False
+
+
+def main():
+    print("=" * 60)
+    print("Dreschflegel Seed Catalog Scraper for HerbAPI (v2)")
+    print("=" * 60)
+
+    # Step 1: Supplier
+    print("\n[1] Setting up supplier...")
+    supplier = ensure_supplier()
+    if not supplier:
+        print("FATAL: Could not create/find supplier")
+        sys.exit(1)
+    supplier_id = supplier["id"]
+
+    # Step 2: Load API data
+    print("\n[2] Loading existing HerbAPI data...")
+    families, species_cache, cultivar_cache = load_api_data()
+
+    # Step 3: Get product URLs
+    print("\n[3] Fetching sitemap...")
+    all_urls = get_sitemap_urls()
+    if not all_urls:
+        print("FATAL: Could not fetch sitemap")
+        sys.exit(1)
+    candidate_urls = classify_urls(all_urls)
+    print(f"  {len(all_urls)} total URLs, {len(candidate_urls)} product candidates")
+
+    # Step 4: Scrape
+    print(f"\n[4] Scraping product pages...")
+    products = scrape_all_products(candidate_urls)
+
+    # Step 5: Import
+    print(f"\n[5] Importing {len(products)} products into HerbAPI...")
+
+    for i, product in enumerate(products):
+        if (i + 1) % 50 == 0:
+            print(f"  Processing {i + 1}/{len(products)}...")
+
+        botanical = product.get("botanical_name", "")
+        if not botanical:
+            stats["no_botanical"] += 1
+            continue
+
+        # Find or create species
+        sp = find_or_create_species(botanical, families, species_cache)
+        if not sp:
+            stats["species_not_matched"] += 1
+            continue
+
+        species_id = sp["id"]
+        cultivar_name = extract_cultivar_name(product["name"])
+
+        # Check if cultivar already exists
+        cv_key = (species_id, cultivar_name.lower().strip())
+        if cv_key in cultivar_cache:
+            cv = cultivar_cache[cv_key]
+            stats["cultivars_existing"] += 1
+        else:
+            cv_data = {
+                "species_id": species_id,
+                "name": cultivar_name,
+                "is_organic": True,
+            }
+            if product.get("description"):
+                cv_data["description"] = product["description"]
+
+            cv = api_request("POST", "/cultivars", cv_data)
+            if cv:
+                cultivar_cache[cv_key] = cv
+                stats["cultivars_created"] += 1
+            else:
+                # Might already exist from previous run - try to find it
+                found = False
+                for c in paginated_get(f"/cultivars?species_id={species_id}"):
+                    if c["name"].lower().strip() == cultivar_name.lower().strip():
+                        cultivar_cache[cv_key] = c
+                        cv = c
+                        stats["cultivars_existing"] += 1
+                        found = True
+                        break
+                if not found:
+                    stats["cultivar_create_errors"] += 1
+                    continue
+
+        # Link to supplier (check first for idempotency)
+        if get_existing_supplier_links(cv["id"], supplier_id):
+            stats["supplier_links_existing"] += 1
+            continue
+
+        link_data = {
+            "supplier_id": supplier_id,
+            "article_number": product.get("article_number", ""),
+            "product_url": product.get("url", ""),
+            "price_eur": product.get("price"),
+        }
+        pack_info = product.get("pack_info", "")
+        if pack_info:
+            m = re.search(r"ca\.?\s*(\d+)\s*(Pfl|Korn|Samen|g|kg|ml)", pack_info)
+            if m:
+                link_data["pack_size"] = float(m.group(1))
+                unit_map = {"Pfl": "Pflanzen", "Korn": "Korn", "Samen": "Korn"}
+                link_data["pack_unit"] = unit_map.get(m.group(2), m.group(2))
+
+        resp = api_request("POST", f"/cultivars/{cv['id']}/suppliers", link_data)
+        if resp:
+            stats["supplier_links_created"] += 1
+        else:
+            stats["supplier_link_errors"] += 1
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("RESULTS")
+    print("=" * 60)
+    for key, val in sorted(stats.items()):
+        print(f"  {key}: {val}")
+    print(f"\n  Total species in DB: {len(species_cache)}")
+    print(f"  Total cultivars tracked: {len(cultivar_cache)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,380 @@
+#!/usr/bin/env python3
+"""Scrape Magic Garden Seeds product pages and update herbapi database."""
+
+import subprocess
+import re
+import time
+import os
+import sys
+
+DB_CMD = [
+    'psql', '-h', '10.31.3.90', '-U', 'herbapi', 'herbapi',
+    '-t', '-A', '-F|'
+]
+DB_ENV = {**os.environ, 'PGPASSWORD': '_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj'}
+
+MONTH_MAP = {
+    'january': 1, 'february': 2, 'march': 3, 'april': 4,
+    'may': 5, 'june': 6, 'july': 7, 'august': 8,
+    'september': 9, 'october': 10, 'november': 11, 'december': 12,
+}
+
+
+def run_sql(sql):
+    result = subprocess.run(
+        DB_CMD + ['-c', sql],
+        capture_output=True, text=True, env=DB_ENV
+    )
+    return result.stdout.strip()
+
+
+def fetch_page(url):
+    result = subprocess.run(
+        ['curl', '-sL', '--max-time', '15', url],
+        capture_output=True, text=True
+    )
+    return result.stdout
+
+
+def parse_months(text):
+    if not text:
+        return None
+    text_lower = text.lower().strip()
+    months = []
+    for month_name, month_num in sorted(MONTH_MAP.items(), key=lambda x: -len(x[0])):
+        if month_name in text_lower:
+            if month_num not in months:
+                months.append(month_num)
+            text_lower = text_lower.replace(month_name, '')
+    return sorted(months) if months else None
+
+
+def parse_depth(text):
+    if not text:
+        return None
+    match = re.search(r'(\d+(?:[.,]\d+)?)\s*-\s*(\d+(?:[.,]\d+)?)\s*cm', text)
+    if match:
+        v1 = float(match.group(1).replace(',', '.'))
+        v2 = float(match.group(2).replace(',', '.'))
+        return round((v1 + v2) / 2, 1)
+    match = re.search(r'(\d+(?:[.,]\d+)?)\s*cm', text)
+    if match:
+        return float(match.group(1).replace(',', '.'))
+    return None
+
+
+def parse_spacing(text):
+    """Parse planting distance. Returns (row_spacing, plant_spacing)."""
+    if not text:
+        return None, None
+    text = text.lower().strip()
+    # "X x Y cm"
+    match = re.search(r'(\d+(?:\.\d+)?)\s*(?:x|×)\s*(\d+(?:\.\d+)?)\s*cm', text)
+    if match:
+        return float(match.group(2)), float(match.group(1))
+    # "X - Y cm" range -> average as plant spacing
+    match = re.search(r'(\d+(?:\.\d+)?)\s*-\s*(\d+(?:\.\d+)?)\s*cm', text)
+    if match:
+        return None, round((float(match.group(1)) + float(match.group(2))) / 2, 1)
+    # Single value
+    match = re.search(r'(\d+(?:\.\d+)?)\s*cm', text)
+    if match:
+        return None, float(match.group(1))
+    return None, None
+
+
+def parse_germination_days(text):
+    if not text:
+        return None
+    text = text.lower()
+    match = re.search(r'(\d+)\s*-\s*(\d+)\s*weeks?', text)
+    if match:
+        return int(round((int(match.group(1)) + int(match.group(2))) / 2 * 7))
+    match = re.search(r'(\d+)\s*weeks?', text)
+    if match:
+        return int(match.group(1)) * 7
+    match = re.search(r'(\d+)\s*-\s*(\d+)\s*days?', text)
+    if match:
+        return int(round((int(match.group(1)) + int(match.group(2))) / 2))
+    match = re.search(r'(\d+)\s*days?', text)
+    if match:
+        return int(match.group(1))
+    return None
+
+
+def parse_germ_temp(text):
+    if not text:
+        return None
+    match = re.search(r'(\d+)\s*-\s*(\d+)\s*°', text)
+    if match:
+        return round((float(match.group(1)) + float(match.group(2))) / 2, 1)
+    match = re.search(r'(\d+)\s*°', text)
+    if match:
+        return float(match.group(1))
+    return None
+
+
+def parse_lifecycle(text):
+    if not text:
+        return None
+    text = text.lower().strip()
+    if 'perennial' in text:
+        return True
+    if 'annual' in text or 'biennial' in text:
+        return False
+    return None
+
+
+def parse_light(text):
+    if not text:
+        return None
+    text = text.lower().strip()
+    if 'full sun' in text and 'partial' in text:
+        return 'full sun to partial shade'
+    if 'full sun' in text:
+        return 'full sun'
+    if 'partial' in text or 'semi' in text or 'half' in text:
+        return 'partial shade'
+    if 'shade' in text:
+        return 'shade'
+    if 'sun' in text:
+        return 'full sun'
+    return text
+
+
+def extract_data(html):
+    data = {}
+
+    # Extract table cell pairs
+    cells = re.findall(r'<td[^>]*>(.*?)</td>', html, re.DOTALL)
+    clean_cells = []
+    for c in cells:
+        clean = re.sub(r'<[^>]+>', ' ', c).strip()
+        clean = re.sub(r'\s+', ' ', clean)
+        clean_cells.append(clean)
+
+    specs = {}
+    i = 0
+    while i < len(clean_cells) - 1:
+        key = clean_cells[i].rstrip(':').strip()
+        val = clean_cells[i + 1].strip()
+        if key and val and not re.match(r'^[\d,.\s€*]+$', key):
+            specs[key.lower()] = val
+        i += 2
+
+    # Extract description from itemprop="description"
+    desc_match = re.search(r'itemprop="description">(.*?)</div>\s*</div>\s*</div>', html, re.DOTALL)
+    if desc_match:
+        content = desc_match.group(1)
+        content = re.sub(r'<style[^>]*>.*?</style>', '', content, flags=re.DOTALL)
+        content = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL)
+        content = re.sub(r'<[^>]+>', ' ', content)
+        content = re.sub(r'\s+', ' ', content).strip()
+        for marker in ['Other names', 'Additional contact mail', 'Question about']:
+            idx = content.find(marker)
+            if idx > 0:
+                content = content[:idx].strip()
+        if len(content) > 20:
+            data['description'] = content
+
+    if 'description' not in data:
+        meta_match = re.search(r'<meta[^>]*name="description"[^>]*content="([^"]*)"', html)
+        if meta_match and len(meta_match.group(1)) > 20:
+            data['description'] = meta_match.group(1)
+
+    # Parse specs
+    if 'planting distance' in specs:
+        row_sp, plant_sp = parse_spacing(specs['planting distance'])
+        if plant_sp:
+            data['plant_spacing_cm'] = plant_sp
+        if row_sp:
+            data['row_spacing_cm'] = row_sp
+
+    if 'row spacing' in specs:
+        match = re.search(r'(\d+(?:\.\d+)?)\s*cm', specs['row spacing'])
+        if match:
+            data['row_spacing_cm'] = float(match.group(1))
+
+    if 'sowing depth' in specs:
+        depth = parse_depth(specs['sowing depth'])
+        if depth is not None:
+            data['planting_depth_cm'] = depth
+
+    # Harvesting months - prefer explicit harvest time over flowering
+    if 'harvest time' in specs:
+        months = parse_months(specs['harvest time'])
+        if months:
+            data['harvesting_months'] = months
+    elif 'harvesting months' in specs:
+        months = parse_months(specs['harvesting months'])
+        if months:
+            data['harvesting_months'] = months
+    elif 'flowering months' in specs:
+        months = parse_months(specs['flowering months'])
+        if months:
+            data['harvesting_months'] = months
+
+    if 'when to sow outdoors' in specs:
+        months = parse_months(specs['when to sow outdoors'])
+        if months:
+            data['direct_sowing_months'] = months
+
+    for indoor_key in ['when to sow indoors', 'pre-cultivation indoors']:
+        if indoor_key in specs:
+            months = parse_months(specs[indoor_key])
+            if months:
+                data['indoor_sowing_months'] = months
+                break
+
+    if 'lifecycle' in specs:
+        perennial = parse_lifecycle(specs['lifecycle'])
+        if perennial is not None:
+            data['perennial'] = perennial
+
+    if 'sunlight' in specs:
+        light = parse_light(specs['sunlight'])
+        if light:
+            data['light_requirement'] = light
+
+    if 'germination time' in specs:
+        days = parse_germination_days(specs['germination time'])
+        if days:
+            data['days_to_germination'] = days
+
+    if 'germination temperature' in specs:
+        temp = parse_germ_temp(specs['germination temperature'])
+        if temp:
+            data['germination_temp_c'] = temp
+
+    return data
+
+
+def get_current_values(cultivar_id):
+    sql = f"""SELECT description, row_spacing_cm, plant_spacing_cm, planting_depth_cm,
+                     perennial, harvesting_months, direct_sowing_months, light_requirement,
+                     days_to_germination, germination_temp_c, indoor_sowing_months
+              FROM cultivars WHERE id = '{cultivar_id}'"""
+    row = run_sql(sql)
+    if not row:
+        return {}
+    parts = row.split('|')
+    fields = ['description', 'row_spacing_cm', 'plant_spacing_cm', 'planting_depth_cm',
+              'perennial', 'harvesting_months', 'direct_sowing_months', 'light_requirement',
+              'days_to_germination', 'germination_temp_c', 'indoor_sowing_months']
+    current = {}
+    for i, f in enumerate(fields):
+        if i < len(parts):
+            val = parts[i].strip()
+            if val and val != '':
+                current[f] = val
+    return current
+
+
+def build_update_sql(cultivar_id, data, current):
+    sets = []
+    updated_fields = []
+    for field, value in data.items():
+        if field in current and current[field]:
+            continue
+
+        if isinstance(value, str):
+            escaped = value.replace("'", "''")
+            sets.append(f"{field} = '{escaped}'")
+        elif isinstance(value, bool):
+            sets.append(f"{field} = {'true' if value else 'false'}")
+        elif isinstance(value, list):
+            arr_str = '{' + ','.join(str(x) for x in value) + '}'
+            sets.append(f"{field} = '{arr_str}'")
+        elif isinstance(value, (int, float)):
+            sets.append(f"{field} = {value}")
+        updated_fields.append(field)
+
+    if not sets:
+        return None, []
+
+    return f"UPDATE cultivars SET {', '.join(sets)} WHERE id = '{cultivar_id}';", updated_fields
+
+
+def main():
+    sql = """
+    SELECT c.id, c.name, cs.product_url
+    FROM cultivars c
+    JOIN cultivar_suppliers cs ON c.id = cs.cultivar_id
+    JOIN suppliers s ON cs.supplier_id = s.id
+    WHERE s.name = 'Magic Garden Seeds'
+    AND cs.product_url IS NOT NULL AND cs.product_url <> ''
+    AND (c.row_spacing_cm IS NULL OR c.description IS NULL OR c.description = '')
+    ORDER BY c.name;
+    """
+    rows = run_sql(sql)
+    if not rows:
+        print("No cultivars to process")
+        return
+
+    cultivars = []
+    for line in rows.strip().split('\n'):
+        parts = line.split('|')
+        if len(parts) >= 3:
+            cultivars.append({
+                'id': parts[0],
+                'name': parts[1],
+                'url': parts[2]
+            })
+
+    print(f"Processing {len(cultivars)} MGS cultivars...")
+    sys.stdout.flush()
+
+    updated = 0
+    skipped = 0
+    failed = 0
+    fields_updated = {}
+
+    for i, cv in enumerate(cultivars):
+        print(f"[{i+1}/{len(cultivars)}] {cv['name']}...", end=' ', flush=True)
+
+        try:
+            html = fetch_page(cv['url'])
+            if not html or len(html) < 1000:
+                print("FAILED (empty page)")
+                failed += 1
+                time.sleep(0.5)
+                continue
+
+            data = extract_data(html)
+            if not data:
+                print("NO DATA")
+                skipped += 1
+                time.sleep(0.5)
+                continue
+
+            current = get_current_values(cv['id'])
+            sql_stmt, upd_fields = build_update_sql(cv['id'], data, current)
+
+            if not sql_stmt:
+                print(f"SKIP (all fields populated)")
+                skipped += 1
+            else:
+                run_sql(sql_stmt)
+                for f in upd_fields:
+                    fields_updated[f] = fields_updated.get(f, 0) + 1
+                print(f"OK ({len(upd_fields)} fields: {', '.join(upd_fields)})")
+                updated += 1
+
+        except Exception as e:
+            print(f"ERROR: {e}")
+            failed += 1
+
+        time.sleep(0.5)
+
+    print(f"\n=== MGS Summary ===")
+    print(f"Total processed: {len(cultivars)}")
+    print(f"Updated: {updated}")
+    print(f"Skipped (all fields already populated): {skipped}")
+    print(f"Failed: {failed}")
+    print(f"\nFields updated:")
+    for field, count in sorted(fields_updated.items(), key=lambda x: -x[1]):
+        print(f"  {field}: {count}")
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,330 @@
+#!/usr/bin/env python3
+"""
+Scrape NaturaDB wildlife interaction data and enrich HerbAPI species.
+"""
+
+import json
+import re
+import time
+import urllib.request
+import urllib.error
+import sys
+
+HERBAPI_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+HERBAPI_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+NATURADB_BASE = "https://www.naturadb.de/pflanzen"
+USER_AGENT = "Mozilla/5.0 (compatible; HerbAPI-Enrichment/1.0; +https://sub-net.at)"
+DELAY = 0.5
+
+
+def api_get(path):
+    """GET from HerbAPI."""
+    url = f"{HERBAPI_BASE}{path}"
+    req = urllib.request.Request(url)
+    req.add_header("Authorization", f"Bearer {HERBAPI_TOKEN}")
+    req.add_header("Accept", "application/json")
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read().decode())
+
+
+def api_put(path, data):
+    """PUT to HerbAPI."""
+    url = f"{HERBAPI_BASE}{path}"
+    body = json.dumps(data).encode()
+    req = urllib.request.Request(url, data=body, method="PUT")
+    req.add_header("Authorization", f"Bearer {HERBAPI_TOKEN}")
+    req.add_header("Content-Type", "application/json")
+    req.add_header("Accept", "application/json")
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read().decode())
+
+
+def fetch_naturadb(latin_name):
+    """Fetch a NaturaDB plant page. Returns HTML string or None."""
+    slug = latin_name.lower().replace(" ", "-")
+    url = f"{NATURADB_BASE}/{slug}/"
+    req = urllib.request.Request(url)
+    req.add_header("User-Agent", USER_AGENT)
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return resp.read().decode("utf-8", errors="replace")
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            return None
+        print(f"  HTTP {e.code} for {url}")
+        return None
+    except Exception as e:
+        print(f"  Error fetching {url}: {e}")
+        return None
+
+
+def extract_td_value(html, label):
+    """Extract value from <td>label:</td><td>value</td> pattern."""
+    pattern = rf"<td>{re.escape(label)}:?</td>\s*<td[^>]*>(.*?)</td>"
+    m = re.search(pattern, html, re.DOTALL)
+    if m:
+        # Strip HTML tags from value
+        val = re.sub(r"<[^>]+>", "", m.group(1)).strip()
+        return val
+    return None
+
+
+def extract_native_status(html):
+    """Extract native status from chip badges."""
+    # Look for the primary native status chips (large, colored)
+    statuses = []
+    for m in re.finditer(
+        r'chip--large\s+chip--no-border\s+clr-white\s+bg-\w+"[^>]*>([^<]+)', html
+    ):
+        tag = m.group(1).strip()
+        if tag in (
+            "heimische Wildform",
+            "Archäophyt",
+            "Neophyt",
+            "nicht heimisch (Neophyt)",
+        ):
+            statuses.append(tag)
+    return statuses
+
+
+def extract_badge_tags(html):
+    """Extract ecological badge chips (large, plain text)."""
+    tags = []
+    for m in re.finditer(r'chip--large\s+clr-text"[^>]*>([^<]+)', html):
+        tag = m.group(1).strip()
+        if tag and tag not in ("", "winterhart"):
+            tags.append(tag)
+    return tags
+
+
+def parse_count(text):
+    """Extract leading integer from text like '82 (Nektar und/oder ...)' """
+    if not text:
+        return None
+    m = re.match(r"(\d+)", text.strip())
+    return int(m.group(1)) if m else None
+
+
+def parse_specialist_count(text):
+    """Extract specialist/spezialisiert count from text like '39 (davon 5 spezialisiert)'."""
+    if not text:
+        return None
+    m = re.search(r"davon\s+(\d+)\s+spezialisiert", text)
+    return int(m.group(1)) if m else None
+
+
+def parse_nectar_pollen(text):
+    """Extract numeric value from '2/4 - mäßig' -> 2."""
+    if not text:
+        return None
+    m = re.match(r"(\d+)/4", text.strip())
+    return int(m.group(1)) if m else None
+
+
+def build_wildlife_value(data):
+    """Build a structured wildlife_value string from scraped data."""
+    parts = []
+
+    # Nectar and pollen
+    np_parts = []
+    if data.get("nectar") is not None:
+        np_parts.append(f"Nectar: {data['nectar']}/4")
+    if data.get("pollen") is not None:
+        np_parts.append(f"Pollen: {data['pollen']}/4")
+    if np_parts:
+        parts.append(", ".join(np_parts) + ".")
+
+    # Wild bees
+    if data.get("wildbienen_count") is not None:
+        s = f"Supports {data['wildbienen_count']} wild bee species"
+        if data.get("wildbienen_specialists") is not None:
+            s += f" ({data['wildbienen_specialists']} specialists)"
+        parts.append(s + ".")
+
+    # Butterflies / moths
+    if data.get("schmetterlinge_count") is not None:
+        s = f"{data['schmetterlinge_count']} butterfly/moth species"
+        if data.get("raupen_count") is not None:
+            spec = ""
+            if data.get("raupen_specialists") is not None:
+                spec = f" ({data['raupen_specialists']} specialized)"
+            s += f", {data['raupen_count']} as caterpillar host{spec}"
+        parts.append(s + ".")
+
+    # Hoverflies
+    if data.get("schwebfliegen_count") is not None:
+        parts.append(f"{data['schwebfliegen_count']} hoverfly species.")
+
+    # Beetles
+    if data.get("kaefer_count") is not None:
+        parts.append(f"{data['kaefer_count']} beetle species.")
+
+    # Birds
+    if data.get("vogelarten_count") is not None:
+        parts.append(f"{data['vogelarten_count']} bird species.")
+
+    # Mammals
+    if data.get("saeugetier_count") is not None:
+        parts.append(f"{data['saeugetier_count']} mammal species.")
+
+    # Native status
+    if data.get("native_status"):
+        parts.append(" ".join(data["native_status"]) + ".")
+
+    # Notable badges
+    notable = [
+        t
+        for t in data.get("badges", [])
+        if any(
+            kw in t.lower()
+            for kw in [
+                "insektenpflanze",
+                "raupenfutter",
+                "vogelschutz",
+                "vogelnähr",
+                "bienenweide",
+            ]
+        )
+    ]
+    if notable:
+        parts.append("Tags: " + ", ".join(notable) + ".")
+
+    return " ".join(parts) if parts else None
+
+
+def scrape_species(html):
+    """Parse NaturaDB HTML and return structured wildlife data dict."""
+    data = {}
+
+    # Nectar and pollen values
+    nectar_raw = extract_td_value(html, "Nektarwert")
+    pollen_raw = extract_td_value(html, "Pollenwert")
+    data["nectar"] = parse_nectar_pollen(nectar_raw)
+    data["pollen"] = parse_nectar_pollen(pollen_raw)
+
+    # Wild bees
+    bees_raw = extract_td_value(html, "Wildbienen")
+    data["wildbienen_count"] = parse_count(bees_raw)
+    data["wildbienen_specialists"] = parse_specialist_count(bees_raw)
+
+    # Butterflies/moths
+    schmett_raw = extract_td_value(html, "Schmetterlinge")
+    data["schmetterlinge_count"] = parse_count(schmett_raw)
+
+    # Caterpillar hosts
+    raupen_raw = extract_td_value(html, "Raupen")
+    data["raupen_count"] = parse_count(raupen_raw)
+    data["raupen_specialists"] = parse_specialist_count(raupen_raw)
+
+    # Hoverflies
+    schweb_raw = extract_td_value(html, "Schwebfliegen")
+    data["schwebfliegen_count"] = parse_count(schweb_raw)
+
+    # Beetles
+    kaefer_raw = extract_td_value(html, "Käfer")
+    data["kaefer_count"] = parse_count(kaefer_raw)
+
+    # Birds
+    vogel_raw = extract_td_value(html, "fressende Vogelarten")
+    data["vogelarten_count"] = parse_count(vogel_raw)
+
+    # Mammals
+    saeuget_raw = extract_td_value(html, "fressende Säugetierarten")
+    data["saeugetier_count"] = parse_count(saeuget_raw)
+
+    # Native status
+    data["native_status"] = extract_native_status(html)
+
+    # Badge tags
+    data["badges"] = extract_badge_tags(html)
+
+    return data
+
+
+def has_any_data(data):
+    """Check if we scraped anything meaningful."""
+    for k, v in data.items():
+        if k in ("native_status", "badges"):
+            if v:
+                return True
+        elif v is not None:
+            return True
+    return False
+
+
+def main():
+    print("Fetching species list from HerbAPI...")
+    species_list = api_get("/species?per_page=200")["data"]
+    print(f"Found {len(species_list)} species.\n")
+
+    enriched = 0
+    skipped_has_data = 0
+    skipped_not_found = 0
+    skipped_no_data = 0
+    errors = 0
+
+    for i, sp in enumerate(species_list):
+        slug = sp["slug"]
+        name = sp["name_scientific"]
+        existing_wv = sp.get("wildlife_value")
+
+        # Only enrich if wildlife_value is empty/null
+        if existing_wv:
+            print(f"[{i+1:3d}/{len(species_list)}] {slug:40s} SKIP (already has data)")
+            skipped_has_data += 1
+            continue
+
+        print(f"[{i+1:3d}/{len(species_list)}] {slug:40s} ", end="", flush=True)
+
+        # Fetch NaturaDB page
+        html = fetch_naturadb(name)
+        time.sleep(DELAY)
+
+        if html is None:
+            print("NOT FOUND on NaturaDB")
+            skipped_not_found += 1
+            continue
+
+        # Parse wildlife data
+        data = scrape_species(html)
+
+        if not has_any_data(data):
+            print("no wildlife data on page")
+            skipped_no_data += 1
+            continue
+
+        # Build wildlife_value string
+        wildlife_value = build_wildlife_value(data)
+        if not wildlife_value:
+            print("no wildlife data extracted")
+            skipped_no_data += 1
+            continue
+
+        # GET full species, merge, PUT back
+        try:
+            full = api_get(f"/species/{slug}")
+            full["wildlife_value"] = wildlife_value
+
+            # Remove read-only / computed fields that the PUT endpoint might reject
+            for key in ("created_at", "updated_at", "family"):
+                full.pop(key, None)
+
+            api_put(f"/species/{full['id']}", full)
+            print(f"ENRICHED -> {wildlife_value[:80]}...")
+            enriched += 1
+        except Exception as e:
+            print(f"API ERROR: {e}")
+            errors += 1
+
+    print("\n" + "=" * 70)
+    print(f"DONE. Results:")
+    print(f"  Enriched:           {enriched}")
+    print(f"  Already had data:   {skipped_has_data}")
+    print(f"  Not on NaturaDB:    {skipped_not_found}")
+    print(f"  No wildlife data:   {skipped_no_data}")
+    print(f"  Errors:             {errors}")
+    print(f"  Total:              {len(species_list)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,560 @@
+#!/usr/bin/env python3
+"""
+Scrape cultivar data from Reinsaat (reinsaat.at) and push into HerbAPI.
+
+Strategy:
+1. Fetch category pages, recursively discover product pages via JSON-LD detection
+2. Extract structured data from JSON-LD Product schema + HTML text for growing data
+3. Match Latin names to existing species in the API
+4. Create cultivar records and link them to Reinsaat supplier
+"""
+
+import json
+import re
+import ssl
+import time
+import urllib.request
+import urllib.error
+import urllib.parse
+from html.parser import HTMLParser
+from dataclasses import dataclass
+from typing import Optional
+
+# ── Config ──────────────────────────────────────────────────────────────────
+API_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+AUTH_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+REINSAAT_SUPPLIER_ID = "019ced24-1702-72d1-9acc-90435441a5c4"
+DELAY = 0.5  # seconds between requests
+USER_AGENT = "HerbAPI-Scraper/1.0 (florian.berthold@sub-net.at)"
+
+# ── Categories to scrape ────────────────────────────────────────────────────
+# (category_url, default_species_hint for leaf pages in this category)
+CATEGORIES = [
+    ("https://www.reinsaat.at/shop/DE/tomaten_paradeiser/", "Solanum lycopersicum"),
+    ("https://www.reinsaat.at/shop/DE/kuechen-_und_gewuerzkraeuter/", None),
+    ("https://www.reinsaat.at/shop/DE/kuerbis/", None),
+    ("https://www.reinsaat.at/shop/DE/zucchini/", "Cucurbita pepo"),
+    ("https://www.reinsaat.at/shop/DE/bohnen/", None),
+    ("https://www.reinsaat.at/shop/DE/karotten_moehren_1/", "Daucus carota"),
+    ("https://www.reinsaat.at/shop/DE/rote_ruebe/", "Beta vulgaris"),
+    ("https://www.reinsaat.at/shop/DE/blumen_und_heilkraeuter/", None),
+]
+
+# ── Known Latin name genera we can match ────────────────────────────────────
+KNOWN_GENERA = (
+    "Solanum|Cucurbita|Vicia|Phaseolus|Glycine|Daucus|Beta|Borago|Lavandula|"
+    "Salvia|Melissa|Thymus|Calendula|Allium|Ocimum|Satureja|Origanum|Anethum|"
+    "Foeniculum|Carum|Nigella|Levisticum|Rumex|Majorana|Hyssopus|Coriandrum|"
+    "Petroselinum|Eruca|Tropaeolum|Lupinus|Helianthus|Tagetes|Zinnia|Cosmos|"
+    "Papaver|Centaurea|Matricaria|Chrysanthemum|Antirrhinum|Lathyrus|Ipomoea|"
+    "Phacelia|Trifolium|Symphytum|Urtica|Fragaria|Sambucus"
+)
+
+LATIN_PATTERN = re.compile(
+    rf'((?:{KNOWN_GENERA})\s+[a-z]+(?:\s+L\.?)?(?:\s+(?:ssp|var|subsp)\.\s+[a-z]+)?)'
+)
+
+
+# ── HTML helpers ────────────────────────────────────────────────────────────
+class TextExtractor(HTMLParser):
+    """Extract all visible text from HTML."""
+    def __init__(self):
+        super().__init__()
+        self.parts = []
+        self._skip = 0
+
+    def handle_starttag(self, tag, attrs):
+        if tag in ("script", "style", "noscript"):
+            self._skip += 1
+
+    def handle_endtag(self, tag):
+        if tag in ("script", "style", "noscript") and self._skip > 0:
+            self._skip -= 1
+
+    def handle_data(self, data):
+        if self._skip == 0:
+            t = data.strip()
+            if t:
+                self.parts.append(t)
+
+
+def extract_links(html: str, base_url: str) -> list[str]:
+    """Extract all <a href> links from HTML, resolving relative URLs."""
+    links = []
+    seen = set()
+    for m in re.finditer(r'<a\s[^>]*href="([^"]*)"', html, re.IGNORECASE):
+        href = m.group(1)
+        if not href or href.startswith("#") or href.startswith("javascript:"):
+            continue
+        full = urllib.parse.urljoin(base_url, href)
+        if full not in seen:
+            seen.add(full)
+            links.append(full)
+    return links
+
+
+def extract_jsonld_product(html: str) -> Optional[dict]:
+    """Extract the JSON-LD Product object from HTML, if present."""
+    for m in re.finditer(
+        r'<script[^>]*type="application/ld\+json"[^>]*>(.*?)</script>',
+        html, re.DOTALL | re.IGNORECASE
+    ):
+        try:
+            data = json.loads(m.group(1))
+            if isinstance(data, dict) and data.get("@type") == "Product":
+                return data
+        except (json.JSONDecodeError, ValueError):
+            continue
+    return None
+
+
+# ── HTTP helpers ────────────────────────────────────────────────────────────
+_ssl_ctx = ssl.create_default_context()
+
+def fetch_url(url: str, retries: int = 2) -> str:
+    """Fetch a URL with retries."""
+    req = urllib.request.Request(url, headers={
+        "User-Agent": USER_AGENT,
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Language": "de-AT,de;q=0.9,en;q=0.5",
+    })
+    for attempt in range(retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=30, context=_ssl_ctx) as resp:
+                charset = resp.headers.get_content_charset() or "utf-8"
+                return resp.read().decode(charset)
+        except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
+            if attempt < retries:
+                time.sleep(2)
+                continue
+            raise
+    return ""
+
+
+def api_get(path: str):
+    """GET from HerbAPI."""
+    req = urllib.request.Request(
+        f"{API_BASE}{path}",
+        headers={"Authorization": f"Bearer {AUTH_TOKEN}", "Accept": "application/json"},
+    )
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path: str, data: dict):
+    """POST to HerbAPI."""
+    body = json.dumps(data).encode("utf-8")
+    req = urllib.request.Request(
+        f"{API_BASE}{path}",
+        data=body,
+        headers={
+            "Authorization": f"Bearer {AUTH_TOKEN}",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read())
+    except urllib.error.HTTPError as e:
+        error_body = e.read().decode("utf-8", errors="replace")
+        print(f"    API ERROR {e.code}: {error_body[:500]}")
+        raise
+
+
+# ── Species matching ────────────────────────────────────────────────────────
+def load_species() -> dict:
+    """Load species from API. Returns dict: lowercase scientific name -> species dict."""
+    result = {}
+    page = 1
+    while True:
+        data = api_get(f"/species?per_page=100&page={page}")
+        species_list = data.get("data", data) if isinstance(data, dict) else data
+        for s in species_list:
+            key = s["name_scientific"].lower().strip()
+            result[key] = s
+        if isinstance(data, dict) and "pagination" in data:
+            if page >= data["pagination"].get("total_pages", 1):
+                break
+        else:
+            break
+        page += 1
+    return result
+
+
+def match_species(latin_name: str, species_map: dict) -> Optional[dict]:
+    """Match a Latin name to an existing species. Returns species dict or None."""
+    if not latin_name:
+        return None
+
+    # Clean the name: remove author citations, subspecies
+    clean = latin_name.strip()
+    clean = re.sub(r'\s+L\.\s*$', '', clean)
+    clean = re.sub(r'\s+[A-Z][a-z]*\.\s*$', '', clean)
+    clean = re.sub(r'\s+(?:ssp|subsp|var)\.\s+\S+', '', clean)
+
+    key = clean.lower().strip()
+    if key in species_map:
+        return species_map[key]
+
+    # Try genus + species (first two words)
+    parts = key.split()
+    if len(parts) >= 2:
+        two = f"{parts[0]} {parts[1]}"
+        if two in species_map:
+            return species_map[two]
+
+    # Try genus-only match (less reliable, but useful for Borago, etc.)
+    if parts:
+        for skey, sval in species_map.items():
+            if skey.startswith(parts[0] + " "):
+                return sval
+
+    return None
+
+
+# ── Product data extraction ─────────────────────────────────────────────────
+@dataclass
+class ProductData:
+    name: str = ""
+    latin_name: str = ""
+    description: str = ""
+    sku: str = ""
+    url: str = ""
+    is_organic: bool = True
+    sowing_depth_cm: Optional[float] = None
+    row_spacing_cm: Optional[float] = None
+    plant_spacing_cm: Optional[float] = None
+    germination_temp_c: Optional[float] = None
+    perennial: bool = False
+
+
+def parse_product(html: str, url: str, default_species: Optional[str] = None) -> Optional[ProductData]:
+    """Parse a product page. Returns ProductData or None if not a product page."""
+    jsonld = extract_jsonld_product(html)
+    if not jsonld:
+        return None  # Not a product page
+
+    product = ProductData(url=url)
+
+    # ── From JSON-LD ──
+    product.name = jsonld.get("name", "").strip()
+    product.description = jsonld.get("description", "").strip()
+    product.sku = jsonld.get("model", "").strip()
+
+    # ── Extract full text for pattern matching ──
+    extractor = TextExtractor()
+    extractor.feed(html)
+    full_text = " ".join(extractor.parts)
+
+    # ── Latin name ──
+    m = LATIN_PATTERN.search(full_text)
+    if m:
+        product.latin_name = m.group(1).strip()
+    # Also check <i>/<em> tags in HTML
+    if not product.latin_name:
+        for italic in re.finditer(r'<(?:i|em)[^>]*>(.*?)</(?:i|em)>', html, re.IGNORECASE | re.DOTALL):
+            clean = re.sub(r'<[^>]+>', '', italic.group(1)).strip()
+            im = LATIN_PATTERN.search(clean)
+            if im:
+                product.latin_name = im.group(1).strip()
+                break
+    if not product.latin_name and default_species:
+        product.latin_name = default_species
+
+    # ── Sowing depth ──
+    depth_pats = [
+        r'(?:Saattiefe|Aussaattiefe|Ablagetiefe)[:\s]*(?:ca\.?\s*)?(\d+(?:[.,]\d+)?)\s*[-–]\s*(\d+(?:[.,]\d+)?)\s*cm',
+        r'(?:Saattiefe|Aussaattiefe|Ablagetiefe)[:\s]*(?:ca\.?\s*)?(\d+(?:[.,]\d+)?)\s*cm',
+        r'(\d+(?:[.,]\d+)?)\s*[-–]\s*(\d+(?:[.,]\d+)?)\s*cm\s+(?:tief|Tiefe)',
+    ]
+    for pat in depth_pats:
+        dm = re.search(pat, full_text, re.IGNORECASE)
+        if dm:
+            vals = [float(dm.group(i).replace(",", ".")) for i in range(1, dm.lastindex + 1)]
+            product.sowing_depth_cm = sum(vals) / len(vals)
+            break
+
+    # Fallback: look in raw HTML for common depth patterns like "0,5–1 cm" near depth keywords
+    if product.sowing_depth_cm is None:
+        dm = re.search(
+            r'(?:Saattiefe|Ablagetiefe|Aussaattiefe|Saatgutablage)\D{0,30}?(\d+(?:[.,]\d+)?)\s*[-–]\s*(\d+(?:[.,]\d+)?)\s*cm',
+            html, re.IGNORECASE
+        )
+        if dm:
+            d1 = float(dm.group(1).replace(",", "."))
+            d2 = float(dm.group(2).replace(",", "."))
+            product.sowing_depth_cm = (d1 + d2) / 2
+
+    # ── Spacing ──
+    # Look for "ROW x PLANT cm" patterns
+    spacing_pats = [
+        # "30–40 x 2–4 cm" (range x range)
+        r'(\d+)\s*[-–]\s*(\d+)\s*[x×]\s*(\d+)\s*[-–]\s*(\d+)\s*cm',
+        # "100 x 50 cm" (simple)
+        r'(\d+(?:[.,]\d+)?)\s*[x×]\s*(\d+(?:[.,]\d+)?)\s*cm',
+    ]
+    for pat in spacing_pats:
+        matches = re.findall(pat, full_text, re.IGNORECASE)
+        if matches:
+            # Prefer the last match (often the more relevant outdoor spacing)
+            m = matches[-1]
+            if len(m) == 4:
+                product.row_spacing_cm = (float(m[0]) + float(m[1])) / 2
+                product.plant_spacing_cm = (float(m[2]) + float(m[3])) / 2
+            elif len(m) == 2:
+                v1 = float(m[0].replace(",", "."))
+                v2 = float(m[1].replace(",", "."))
+                product.row_spacing_cm = v1
+                product.plant_spacing_cm = v2
+            break
+
+    # ── Germination temperature ──
+    temp_pats = [
+        r'(?:Keimtemperatur|Keimtemp)[.:\s]*(?:ca\.?\s*)?(\d+)\s*[-–]\s*(\d+)\s*°?\s*C',
+        r'(\d+)\s*[-–und ]*\s*(\d+)\s*°\s*C',
+        r'(?:mindestens|mind\.)\s*(\d+)\s*°\s*C',
+    ]
+    for pat in temp_pats:
+        tm = re.search(pat, full_text, re.IGNORECASE)
+        if tm:
+            vals = [float(tm.group(i)) for i in range(1, tm.lastindex + 1)]
+            # Sanity check: germination temps are typically 5-35°C
+            avg = sum(vals) / len(vals)
+            if 5 <= avg <= 40:
+                product.germination_temp_c = avg
+                break
+
+    # ── Perennial ──
+    perennial_pats = [r'mehrj[aä]hrig', r'winterhart', r'ausdauernd', r'Halbstrauch', r'Staude']
+    for pat in perennial_pats:
+        if re.search(pat, full_text, re.IGNORECASE):
+            product.perennial = True
+            break
+
+    return product
+
+
+# ── Recursive product discovery ─────────────────────────────────────────────
+def discover_products(
+    category_url: str,
+    default_species: Optional[str],
+    max_depth: int = 3,
+    _depth: int = 0,
+    _visited: set = None,
+) -> list[ProductData]:
+    """Recursively discover and parse product pages under a category URL."""
+    if _visited is None:
+        _visited = set()
+    if category_url in _visited or _depth > max_depth:
+        return []
+    _visited.add(category_url)
+
+    indent = "  " * (_depth + 1)
+    print(f"{indent}Fetching: {category_url}")
+
+    try:
+        html = fetch_url(category_url)
+        time.sleep(DELAY)
+    except Exception as e:
+        print(f"{indent}  ERROR: {e}")
+        return []
+
+    # Check if this IS a product page
+    product = parse_product(html, category_url, default_species)
+    if product:
+        return [product]
+
+    # It's a category/subcategory page: extract child links
+    cat_path = urllib.parse.urlparse(category_url).path.rstrip("/")
+    child_links = []
+    for link in extract_links(html, category_url):
+        parsed = urllib.parse.urlparse(link)
+        if parsed.netloc and parsed.netloc != "www.reinsaat.at":
+            continue
+        child_path = parsed.path.rstrip("/")
+        # Must be a direct child of the category path
+        if not child_path.startswith(cat_path + "/"):
+            continue
+        relative = child_path[len(cat_path) + 1:]
+        # Must be exactly one level deeper (no further slashes)
+        if "/" in relative:
+            continue
+        # Skip empty or same-path
+        if not relative:
+            continue
+        # Build clean URL
+        clean_url = f"https://www.reinsaat.at{child_path}/"
+        if clean_url not in _visited:
+            child_links.append(clean_url)
+
+    # Deduplicate
+    child_links = list(dict.fromkeys(child_links))
+    print(f"{indent}  Found {len(child_links)} child links")
+
+    products = []
+    for child_url in child_links:
+        results = discover_products(child_url, default_species, max_depth, _depth + 1, _visited)
+        products.extend(results)
+
+    return products
+
+
+# ── Main ────────────────────────────────────────────────────────────────────
+def main():
+    print("=" * 70)
+    print("Reinsaat Scraper -> HerbAPI")
+    print("=" * 70)
+
+    # Load species
+    print("\n[1] Loading species from API...")
+    species_map = load_species()
+    sci_names = [k for k in species_map if " " in k]
+    print(f"    {len(sci_names)} species loaded:")
+    for k in sorted(sci_names):
+        s = species_map[k]
+        print(f"      {s['name_scientific']:40s} {s['id'][:12]}...")
+
+    # Load existing cultivars
+    print("\n[2] Loading existing cultivars...")
+    existing_cultivars = {}  # (species_id, name_lower) -> cultivar_id
+    page = 1
+    while True:
+        data = api_get(f"/cultivars?per_page=100&page={page}")
+        clist = data.get("data", data) if isinstance(data, dict) else data
+        if not clist:
+            break
+        for c in clist:
+            existing_cultivars[(c["species_id"], c["name"].lower())] = c["id"]
+        # Check pagination - API uses {data, total, page, per_page} format
+        if isinstance(data, dict):
+            total = data.get("total", len(clist))
+            per_page = data.get("per_page", 100)
+            if page * per_page >= total:
+                break
+        else:
+            break
+        page += 1
+    print(f"    {len(existing_cultivars)} existing cultivars")
+
+    # Discover products from all categories
+    print("\n[3] Discovering products from Reinsaat categories...")
+    all_products: list[ProductData] = []
+    visited: set[str] = set()
+
+    for cat_url, species_hint in CATEGORIES:
+        print(f"\n  Category: {cat_url}")
+        products = discover_products(cat_url, species_hint, max_depth=3, _visited=visited)
+        all_products.extend(products)
+        print(f"  -> {len(products)} products from this category")
+
+    print(f"\n  Total products discovered: {len(all_products)}")
+
+    # Deduplicate by URL
+    seen_urls = set()
+    unique_products = []
+    for p in all_products:
+        if p.url not in seen_urls:
+            seen_urls.add(p.url)
+            unique_products.append(p)
+    all_products = unique_products
+    print(f"  Unique products: {len(all_products)}")
+
+    # Process products
+    print("\n[4] Creating cultivars in API...")
+    stats = {"created": 0, "skipped_no_species": 0, "skipped_exists": 0, "errors": 0, "linked": 0}
+
+    for i, product in enumerate(all_products):
+        pct = (i + 1) / len(all_products) * 100
+        print(f"\n  [{i+1}/{len(all_products)}] ({pct:.0f}%) {product.name}")
+
+        # Match species
+        species = match_species(product.latin_name, species_map)
+        if not species:
+            print(f"    Skip: no species match for '{product.latin_name}'")
+            stats["skipped_no_species"] += 1
+            continue
+
+        species_id = species["id"]
+        print(f"    Species: {species['name_scientific']}")
+        print(f"    SKU: {product.sku}, Depth: {product.sowing_depth_cm}, "
+              f"Spacing: {product.row_spacing_cm}x{product.plant_spacing_cm}, "
+              f"Temp: {product.germination_temp_c}, Perennial: {product.perennial}")
+
+        # Check duplicates
+        key = (species_id, product.name.lower())
+        if key in existing_cultivars:
+            # Still try to link supplier if cultivar exists
+            cultivar_id = existing_cultivars[key]
+            print(f"    Exists: {cultivar_id[:12]}... - checking supplier link")
+            try:
+                api_post(f"/cultivars/{cultivar_id}/suppliers", {
+                    "supplier_id": REINSAAT_SUPPLIER_ID,
+                    "product_url": product.url,
+                    "article_number": product.sku,
+                })
+                print(f"    Linked to Reinsaat (SKU: {product.sku})")
+                stats["linked"] += 1
+            except Exception:
+                pass  # Already linked or other error
+            stats["skipped_exists"] += 1
+            continue
+
+        # Build payload
+        payload = {
+            "species_id": species_id,
+            "name": product.name,
+            "name_de": product.name,
+            "name_en": "",
+            "description": product.description,
+            "is_organic": product.is_organic,
+            "perennial": product.perennial,
+        }
+        if product.sowing_depth_cm is not None:
+            payload["planting_depth_cm"] = round(product.sowing_depth_cm, 2)
+        if product.row_spacing_cm is not None:
+            payload["row_spacing_cm"] = round(product.row_spacing_cm, 1)
+        if product.plant_spacing_cm is not None:
+            payload["plant_spacing_cm"] = round(product.plant_spacing_cm, 1)
+        if product.germination_temp_c is not None:
+            payload["germination_temp_c"] = round(product.germination_temp_c, 1)
+
+        # Create cultivar
+        try:
+            result = api_post("/cultivars", payload)
+            cultivar_id = result["id"]
+            print(f"    Created: {cultivar_id}")
+            stats["created"] += 1
+            existing_cultivars[key] = cultivar_id
+        except Exception as e:
+            print(f"    FAILED to create: {e}")
+            stats["errors"] += 1
+            continue
+
+        # Link to supplier
+        try:
+            api_post(f"/cultivars/{cultivar_id}/suppliers", {
+                "supplier_id": REINSAAT_SUPPLIER_ID,
+                "product_url": product.url,
+                "article_number": product.sku,
+            })
+            print(f"    Linked to Reinsaat (SKU: {product.sku})")
+            stats["linked"] += 1
+        except Exception as e:
+            print(f"    FAILED to link supplier: {e}")
+
+    # Summary
+    print("\n" + "=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+    print(f"  Created:              {stats['created']}")
+    print(f"  Linked to supplier:   {stats['linked']}")
+    print(f"  Skipped (no species): {stats['skipped_no_species']}")
+    print(f"  Skipped (exists):     {stats['skipped_exists']}")
+    print(f"  Errors:               {stats['errors']}")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,770 @@
+#!/usr/bin/env python3
+"""
+Reinsaat Scraper v2 — scrape ALL Reinsaat categories, match species by extracting
+genus+species from extended botanical names, create/enrich cultivars, link supplier.
+
+Uses direct PostgreSQL access (psycopg2) for speed and reliability.
+"""
+
+import json
+import re
+import ssl
+import sys
+import time
+import uuid
+import html as html_mod
+import urllib.request
+import urllib.error
+import urllib.parse
+from dataclasses import dataclass, field
+from typing import Optional
+
+# Unbuffered output
+sys.stdout.reconfigure(line_buffering=True)
+sys.stderr.reconfigure(line_buffering=True)
+
+import psycopg2
+import psycopg2.extras
+
+# ── Config ──────────────────────────────────────────────────────────────────
+DB_HOST = "10.31.3.90"
+DB_NAME = "herbapi"
+DB_USER = "herbapi"
+DB_PASS = "_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj"
+
+REINSAAT_SUPPLIER_ID = "019ced24-1702-72d1-9acc-90435441a5c4"
+DELAY = 0.3
+USER_AGENT = "HerbAPI-Scraper/2.0 (florian.berthold@sub-net.at)"
+
+# ── All Reinsaat categories ────────────────────────────────────────────────
+CATEGORIES = [
+    "https://www.reinsaat.at/shop/DE/bohnen/",
+    "https://www.reinsaat.at/shop/DE/erbsen/",
+    "https://www.reinsaat.at/shop/DE/gurken/",
+    "https://www.reinsaat.at/shop/DE/karotten_moehren_1/",
+    "https://www.reinsaat.at/shop/DE/knollenfenchel/",
+    "https://www.reinsaat.at/shop/DE/kohlgewaechse/",
+    "https://www.reinsaat.at/shop/DE/kuerbis/",
+    "https://www.reinsaat.at/shop/DE/mais/",
+    "https://www.reinsaat.at/shop/DE/mangold/",
+    "https://www.reinsaat.at/shop/DE/melanzani_1/",
+    "https://www.reinsaat.at/shop/DE/melone/",
+    "https://www.reinsaat.at/shop/DE/paprika/",
+    "https://www.reinsaat.at/shop/DE/pastinaken_1/",
+    "https://www.reinsaat.at/shop/DE/petersilie/",
+    "https://www.reinsaat.at/shop/DE/pfefferoni_chili/",
+    "https://www.reinsaat.at/shop/DE/porree/",
+    "https://www.reinsaat.at/shop/DE/radies_rettich/",
+    "https://www.reinsaat.at/shop/DE/rote_ruebe/",
+    "https://www.reinsaat.at/shop/DE/salate/",
+    "https://www.reinsaat.at/shop/DE/schwarzwurzeln/",
+    "https://www.reinsaat.at/shop/DE/sellerie/",
+    "https://www.reinsaat.at/shop/DE/spinat/",
+    "https://www.reinsaat.at/shop/DE/tomaten_paradeiser/",
+    "https://www.reinsaat.at/shop/DE/wurzelpetersilie_1/",
+    "https://www.reinsaat.at/shop/DE/zucchini/",
+    "https://www.reinsaat.at/shop/DE/zwiebel_knoblauch/",
+    "https://www.reinsaat.at/shop/DE/kuechen-_und_gewuerzkraeuter/",
+    "https://www.reinsaat.at/shop/DE/blumen_und_heilkraeuter/",
+    "https://www.reinsaat.at/shop/DE/gruenduengung/",
+]
+
+# ── HTTP ────────────────────────────────────────────────────────────────────
+_ssl_ctx = ssl.create_default_context()
+
+
+def fetch_url(url: str, retries: int = 2) -> str:
+    req = urllib.request.Request(url, headers={
+        "User-Agent": USER_AGENT,
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Language": "de-AT,de;q=0.9,en;q=0.5",
+    })
+    for attempt in range(retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=30, context=_ssl_ctx) as resp:
+                charset = resp.headers.get_content_charset() or "utf-8"
+                return resp.read().decode(charset)
+        except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
+            if attempt < retries:
+                time.sleep(2)
+                continue
+            raise
+    return ""
+
+
+# ── HTML parsing helpers ────────────────────────────────────────────────────
+def extract_links(html_text: str, base_url: str) -> list[str]:
+    links = []
+    seen = set()
+    for m in re.finditer(r'<a\s[^>]*href="([^"]*)"', html_text, re.IGNORECASE):
+        href = m.group(1)
+        if not href or href.startswith("#") or href.startswith("javascript:"):
+            continue
+        full = urllib.parse.urljoin(base_url, href)
+        if full not in seen:
+            seen.add(full)
+            links.append(full)
+    return links
+
+
+def extract_jsonld_product(html_text: str) -> Optional[dict]:
+    for m in re.finditer(
+        r'<script[^>]*type="application/ld\+json"[^>]*>(.*?)</script>',
+        html_text, re.DOTALL | re.IGNORECASE
+    ):
+        try:
+            data = json.loads(m.group(1))
+            if isinstance(data, dict) and data.get("@type") == "Product":
+                return data
+        except (json.JSONDecodeError, ValueError):
+            continue
+    return None
+
+
+def html_to_text(html_text: str) -> str:
+    """Strip HTML tags and decode entities."""
+    text = re.sub(r'<[^>]+>', ' ', html_text)
+    text = html_mod.unescape(text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+
+
+def extract_botanical_name(html_text: str) -> str:
+    """
+    Extract the botanical/Latin name from the page.
+    Primary source: <div class="fce_shop_kurztext"> content.
+    Fallback: <em> tags in growing infos.
+
+    Returns the raw text (may include authority names, infraspecific ranks, etc.)
+    """
+    # Primary: kurztext div
+    m = re.search(r'class="fce_shop_kurztext"[^>]*>(.*?)</div>', html_text, re.DOTALL | re.IGNORECASE)
+    if m:
+        text = html_to_text(m.group(1)).strip()
+        if text and re.search(r'[A-Z][a-z]+\s+[a-z]', text):
+            return text
+
+    # Fallback: first <em> in growingInfos that looks like a Latin name
+    gi = re.search(r'class="growingInfos"[^>]*>(.*?)</div>', html_text, re.DOTALL | re.IGNORECASE)
+    if gi:
+        for em in re.finditer(r'<em>(.*?)</em>', gi.group(1), re.DOTALL):
+            text = html_to_text(em.group(1)).strip()
+            if text and re.search(r'[A-Z][a-z]+\s+[a-z]', text):
+                return text
+
+    # Last resort: any <em>/<i> tag with a Latin-looking name
+    for tag in re.finditer(r'<(?:em|i)>(.*?)</(?:em|i)>', html_text, re.DOTALL | re.IGNORECASE):
+        text = html_to_text(tag.group(1)).strip()
+        if text and re.search(r'^[A-Z][a-z]+\s+[a-z]+', text) and len(text) < 100:
+            return text
+
+    return ""
+
+
+def normalize_latin_name(raw: str) -> str:
+    """
+    Extract genus + species from an extended botanical name.
+
+    Examples:
+      "Pisum sativum L. convar. sat." -> "Pisum sativum"
+      "Capsicum annuum L." -> "Capsicum annuum"
+      "Brassica oleracea L. convar. botrytis" -> "Brassica oleracea"
+      "Solanum lycopersicum L." -> "Solanum lycopersicum"
+      "Cucumis sativus" -> "Cucumis sativus"
+      "Mentha x piperita" -> "Mentha x piperita"
+    """
+    if not raw:
+        return ""
+
+    # Clean up
+    name = raw.strip()
+    # Remove leading/trailing punctuation
+    name = name.strip(".,;:")
+
+    words = name.split()
+    if len(words) < 2:
+        return name
+
+    genus = words[0]
+
+    # Handle hybrid notation: "Mentha x piperita" or "Mentha × piperita"
+    if len(words) >= 3 and words[1] in ("x", "×"):
+        return f"{genus} x {words[2]}"
+
+    species = words[1]
+
+    # Validate: genus should start uppercase, species lowercase
+    if not genus[0].isupper() or not species[0].islower():
+        return name  # Can't parse, return as-is
+
+    return f"{genus} {species}"
+
+
+# ── Calendar parsing ────────────────────────────────────────────────────────
+CALENDAR_ROW_TYPES = {
+    "voranzucht": "indoor_sowing_months",
+    "vorzucht": "indoor_sowing_months",
+    "vorkultur": "indoor_sowing_months",
+    "aussaat/ pflanzung freiland": "direct_sowing_months",
+    "aussaat/pflanzung freiland": "direct_sowing_months",
+    "aussaat freiland": "direct_sowing_months",
+    "direktsaat": "direct_sowing_months",
+    "pflanzung freiland": "transplanting_months",
+    "pflanzung": "transplanting_months",
+    "aussaat/ pflanzung gewächshaus": "glasshouse_months",
+    "aussaat/pflanzung gewächshaus": "glasshouse_months",
+    "gewächshaus": "glasshouse_months",
+    "ernte": "harvesting_months",
+}
+
+
+def parse_calendar(html_text: str) -> dict:
+    """
+    Parse the Reinsaat growing calendar table.
+    Returns dict with keys like 'direct_sowing_months', 'harvesting_months' etc.
+    Each value is a sorted list of month integers (1-12).
+    """
+    result = {}
+
+    cal_match = re.search(r'class="rs-growing-time[^"]*"(.*?)</table>', html_text, re.DOTALL)
+    if not cal_match:
+        return result
+
+    cal = cal_match.group(1)
+    rows = re.findall(r'<tr>(.*?)</tr>', cal, re.DOTALL)
+
+    for row in rows:
+        # Get label
+        label_m = re.search(r'class="type-lable"[^>]*>(.*?)</td>', row, re.DOTALL)
+        if not label_m:
+            continue
+        label = html_to_text(label_m.group(1)).strip().lower()
+
+        # Map label to our field
+        field_name = None
+        for pattern, fname in CALENDAR_ROW_TYPES.items():
+            if pattern in label:
+                field_name = fname
+                break
+        if not field_name:
+            continue
+
+        # Extract background colors for each cell (24 cells = 12 months x 2 halves)
+        colors = re.findall(r'background-color:\s*([^;"]+)', row)
+
+        # Convert to months: cell i maps to month (i // 2) + 1
+        active_months = set()
+        for i, color in enumerate(colors):
+            color = color.strip().lower()
+            if color != "none" and color != "transparent" and color != "":
+                month = (i // 2) + 1
+                if 1 <= month <= 12:
+                    active_months.add(month)
+
+        if active_months:
+            # Merge if same field already found (e.g. two sowing rows)
+            if field_name in result:
+                result[field_name] = sorted(set(result[field_name]) | active_months)
+            else:
+                result[field_name] = sorted(active_months)
+
+    return result
+
+
+# ── Growing data extraction ─────────────────────────────────────────────────
+def extract_growing_data(html_text: str) -> dict:
+    """Extract spacing, depth, germination temp from the growing text."""
+    data = {}
+
+    # Get the growingInfos text
+    gi = re.search(r'class="growingInfos"[^>]*>(.*?)</div>', html_text, re.DOTALL | re.IGNORECASE)
+    if not gi:
+        return data
+
+    full_text = html_to_text(gi.group(1))
+    # Also get the raw HTML for better entity handling
+    raw_html = gi.group(1)
+    # Convert HTML entities for pattern matching
+    raw_text = html_mod.unescape(re.sub(r'<[^>]+>', ' ', raw_html))
+    raw_text = re.sub(r'\s+', ' ', raw_text)
+
+    # ── Sowing depth ──
+    depth_pats = [
+        r'(?:Saattiefe|Aussaattiefe|Ablagetiefe|Saatgutablage)[:\s]*(?:ca\.?\s*)?(\d+(?:[.,]\d+)?)\s*[-–]\s*(\d+(?:[.,]\d+)?)\s*cm',
+        r'(?:Saattiefe|Aussaattiefe|Ablagetiefe|Saatgutablage)[:\s]*(?:ca\.?\s*)?(\d+(?:[.,]\d+)?)\s*cm',
+    ]
+    for pat in depth_pats:
+        dm = re.search(pat, raw_text, re.IGNORECASE)
+        if dm:
+            vals = [float(dm.group(i).replace(",", ".")) for i in range(1, dm.lastindex + 1)]
+            data["planting_depth_cm"] = round(sum(vals) / len(vals), 2)
+            break
+
+    # ── Spacing: "ROW x PLANT cm" ──
+    spacing_pats = [
+        # "30–45 x 3–5 cm" (range x range)
+        r'(\d+)\s*[-–]\s*(\d+)\s*[x×]\s*(\d+)\s*[-–]\s*(\d+)\s*cm',
+        # "100 x 50 cm" (simple)
+        r'(\d+(?:[.,]\d+)?)\s*[x×]\s*(\d+(?:[.,]\d+)?)\s*cm',
+    ]
+    for pat in spacing_pats:
+        matches = re.findall(pat, raw_text, re.IGNORECASE)
+        if matches:
+            m = matches[-1]  # prefer last match
+            if len(m) == 4:
+                data["row_spacing_cm"] = round((float(m[0]) + float(m[1])) / 2, 1)
+                data["plant_spacing_cm"] = round((float(m[2]) + float(m[3])) / 2, 1)
+            elif len(m) == 2:
+                v1 = float(m[0].replace(",", "."))
+                v2 = float(m[1].replace(",", "."))
+                data["row_spacing_cm"] = round(v1, 1)
+                data["plant_spacing_cm"] = round(v2, 1)
+            break
+
+    # ── Germination temperature ──
+    temp_pats = [
+        r'(?:Keimtemperatur|Keimtemp)[.:\s]*(?:ca\.?\s*)?(\d+)\s*[-–]\s*(\d+)\s*[°]?\s*C',
+        r'(?:mindestens|mind\.)\s*(\d+)\s*°\s*C',
+    ]
+    for pat in temp_pats:
+        tm = re.search(pat, raw_text, re.IGNORECASE)
+        if tm:
+            vals = [float(tm.group(i)) for i in range(1, tm.lastindex + 1)]
+            avg = sum(vals) / len(vals)
+            if 5 <= avg <= 40:
+                data["germination_temp_c"] = round(avg, 1)
+                break
+
+    # ── Perennial ──
+    perennial_pats = [r'mehrj[aä]hrig', r'winterhart', r'ausdauernd', r'Halbstrauch', r'Staude']
+    for pat in perennial_pats:
+        if re.search(pat, raw_text, re.IGNORECASE):
+            data["perennial"] = True
+            break
+
+    return data
+
+
+# ── Product data ────────────────────────────────────────────────────────────
+@dataclass
+class ProductData:
+    name: str = ""
+    raw_latin_name: str = ""
+    normalized_latin: str = ""
+    description: str = ""
+    sku: str = ""
+    url: str = ""
+    is_organic: bool = True
+    growing_data: dict = field(default_factory=dict)
+    calendar: dict = field(default_factory=dict)
+
+
+def parse_product(html_text: str, url: str) -> Optional[ProductData]:
+    """Parse a product page. Returns ProductData or None if not a product page."""
+    jsonld = extract_jsonld_product(html_text)
+    if not jsonld:
+        return None
+
+    product = ProductData(url=url)
+    product.name = jsonld.get("name", "").strip()
+    product.description = jsonld.get("description", "").strip()
+    product.sku = jsonld.get("model", "").strip()
+
+    # Extract and normalize botanical name
+    product.raw_latin_name = extract_botanical_name(html_text)
+    product.normalized_latin = normalize_latin_name(product.raw_latin_name)
+
+    # Extract growing data
+    product.growing_data = extract_growing_data(html_text)
+
+    # Parse calendar
+    product.calendar = parse_calendar(html_text)
+
+    # Check organic status (Reinsaat is all organic, but check for "demeter" too)
+    product.is_organic = True
+
+    return product
+
+
+# ── Recursive discovery ─────────────────────────────────────────────────────
+def discover_products(
+    category_url: str,
+    max_depth: int = 4,
+    _depth: int = 0,
+    _visited: set = None,
+) -> list[ProductData]:
+    if _visited is None:
+        _visited = set()
+    if category_url in _visited or _depth > max_depth:
+        return []
+    _visited.add(category_url)
+
+    indent = "  " * (_depth + 1)
+
+    try:
+        html_text = fetch_url(category_url)
+        time.sleep(DELAY)
+    except Exception as e:
+        print(f"{indent}ERROR fetching {category_url}: {e}")
+        return []
+
+    # Check if this is a product page
+    product = parse_product(html_text, category_url)
+    if product:
+        return [product]
+
+    # Category page: find child links
+    cat_path = urllib.parse.urlparse(category_url).path.rstrip("/")
+    child_links = []
+    for link in extract_links(html_text, category_url):
+        parsed = urllib.parse.urlparse(link)
+        if parsed.netloc and parsed.netloc != "www.reinsaat.at":
+            continue
+        child_path = parsed.path.rstrip("/")
+        if not child_path.startswith(cat_path + "/"):
+            continue
+        relative = child_path[len(cat_path) + 1:]
+        if "/" in relative or not relative:
+            continue
+        clean_url = f"https://www.reinsaat.at{child_path}/"
+        if clean_url not in _visited:
+            child_links.append(clean_url)
+
+    child_links = list(dict.fromkeys(child_links))
+    print(f"{indent}Category {category_url} -> {len(child_links)} children")
+
+    products = []
+    for child_url in child_links:
+        results = discover_products(child_url, max_depth, _depth + 1, _visited)
+        products.extend(results)
+
+    return products
+
+
+# ── Slug generation ─────────────────────────────────────────────────────────
+def make_slug(species_name: str, cultivar_name: str) -> str:
+    """Generate a URL-friendly slug."""
+    raw = f"{species_name}-{cultivar_name}".lower()
+    # Replace umlauts and special chars
+    replacements = {
+        'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss',
+        'é': 'e', 'è': 'e', 'ê': 'e', 'ë': 'e',
+        'á': 'a', 'à': 'a', 'â': 'a',
+        'í': 'i', 'ì': 'i', 'î': 'i',
+        'ó': 'o', 'ò': 'o', 'ô': 'o',
+        'ú': 'u', 'ù': 'u', 'û': 'u',
+        'ñ': 'n', 'ç': 'c',
+    }
+    for old, new in replacements.items():
+        raw = raw.replace(old, new)
+    # Keep only alphanumeric and hyphens
+    slug = re.sub(r'[^a-z0-9]+', '-', raw)
+    slug = slug.strip('-')
+    # Collapse multiple hyphens
+    slug = re.sub(r'-+', '-', slug)
+    return slug
+
+
+# ── Main ────────────────────────────────────────────────────────────────────
+def db_connect():
+    """Create a fresh DB connection."""
+    conn = psycopg2.connect(
+        host=DB_HOST, dbname=DB_NAME, user=DB_USER, password=DB_PASS
+    )
+    conn.autocommit = False
+    return conn
+
+
+def main():
+    print("=" * 70)
+    print("Reinsaat Scraper v2")
+    print("=" * 70)
+
+    # ── Phase 1: Discover all products (no DB needed) ──
+    print("\n[1] Discovering products from Reinsaat categories...")
+    all_products: list[ProductData] = []
+    visited: set[str] = set()
+
+    for cat_url in CATEGORIES:
+        print(f"\n  Category: {cat_url}")
+        products = discover_products(cat_url, max_depth=4, _visited=visited)
+        all_products.extend(products)
+        print(f"  -> {len(products)} products")
+
+    # Deduplicate by URL
+    seen_urls = set()
+    unique_products = []
+    for p in all_products:
+        if p.url not in seen_urls:
+            seen_urls.add(p.url)
+            unique_products.append(p)
+    all_products = unique_products
+    print(f"\n  Total unique products: {len(all_products)}")
+
+    # ── Phase 2: Connect to DB and load existing data ──
+    print("\n[2] Connecting to DB and loading existing data...")
+    conn = db_connect()
+    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+
+    # Load species
+    cur.execute("SELECT id, name_scientific FROM species ORDER BY name_scientific")
+    species_rows = cur.fetchall()
+    species_map = {}
+    for row in species_rows:
+        key = row["name_scientific"].lower().strip()
+        species_map[key] = row
+    print(f"    {len(species_map)} species loaded")
+
+    # Load existing cultivars
+    cur.execute("""
+        SELECT id, species_id, name, slug, description,
+               row_spacing_cm, plant_spacing_cm, planting_depth_cm,
+               germination_temp_c, perennial,
+               indoor_sowing_months, direct_sowing_months,
+               transplanting_months, glasshouse_months, harvesting_months
+        FROM cultivars
+    """)
+    cultivar_rows = cur.fetchall()
+    existing_cultivars = {}
+    existing_slugs = set()
+    for row in cultivar_rows:
+        sid = str(row["species_id"])
+        name_lower = row["name"].lower()
+        existing_cultivars[(sid, name_lower)] = dict(row)
+        existing_slugs.add(row["slug"])
+    print(f"    {len(existing_cultivars)} cultivars loaded")
+
+    # Load existing Reinsaat supplier links
+    cur.execute("""
+        SELECT cultivar_id, product_url, article_number
+        FROM cultivar_suppliers
+        WHERE supplier_id = %s
+    """, (REINSAAT_SUPPLIER_ID,))
+    existing_links = {}
+    for row in cur.fetchall():
+        cid = str(row["cultivar_id"])
+        url = row["product_url"] or ""
+        sku = row["article_number"] or ""
+        existing_links.setdefault(cid, []).append((url, sku))
+    print(f"    {sum(len(v) for v in existing_links.values())} existing links for {len(existing_links)} cultivars")
+
+    # ── Phase 3: Process products ──
+    print("\n[3] Processing products...")
+    stats = {
+        "created": 0,
+        "linked": 0,
+        "enriched": 0,
+        "skipped_no_species": 0,
+        "skipped_no_name": 0,
+        "link_exists": 0,
+        "errors": 0,
+    }
+    unmatched = []
+
+    for i, product in enumerate(all_products):
+        pct = (i + 1) / len(all_products) * 100
+        prefix = f"  [{i+1}/{len(all_products)}] ({pct:.0f}%)"
+
+        if not product.name:
+            stats["skipped_no_name"] += 1
+            continue
+
+        # Match species
+        normalized = product.normalized_latin.lower().strip()
+        species = species_map.get(normalized)
+
+        if not species:
+            # Try exact match on raw name (first two words)
+            raw_words = product.raw_latin_name.split()
+            if len(raw_words) >= 2:
+                attempt = f"{raw_words[0].lower()} {raw_words[1].lower()}"
+                species = species_map.get(attempt)
+
+        if not species:
+            stats["skipped_no_species"] += 1
+            unmatched.append((product.name, product.raw_latin_name, product.normalized_latin, product.url))
+            continue
+
+        species_id = str(species["id"])
+        species_name = species["name_scientific"]
+
+        # Check if cultivar exists
+        ckey = (species_id, product.name.lower())
+        existing = existing_cultivars.get(ckey)
+
+        if existing:
+            cultivar_id = str(existing["id"])
+
+            # ── Enrich existing cultivar with missing data ──
+            updates = {}
+
+            # Growing data from page
+            gd = product.growing_data
+            if gd.get("planting_depth_cm") and not existing.get("planting_depth_cm"):
+                updates["planting_depth_cm"] = gd["planting_depth_cm"]
+            if gd.get("row_spacing_cm") and not existing.get("row_spacing_cm"):
+                updates["row_spacing_cm"] = gd["row_spacing_cm"]
+            if gd.get("plant_spacing_cm") and not existing.get("plant_spacing_cm"):
+                updates["plant_spacing_cm"] = gd["plant_spacing_cm"]
+            if gd.get("germination_temp_c") and not existing.get("germination_temp_c"):
+                updates["germination_temp_c"] = gd["germination_temp_c"]
+            if gd.get("perennial") and not existing.get("perennial"):
+                updates["perennial"] = True
+
+            # Calendar data
+            cal = product.calendar
+            if cal.get("indoor_sowing_months") and not existing.get("indoor_sowing_months"):
+                updates["indoor_sowing_months"] = cal["indoor_sowing_months"]
+            if cal.get("direct_sowing_months") and not existing.get("direct_sowing_months"):
+                updates["direct_sowing_months"] = cal["direct_sowing_months"]
+            if cal.get("transplanting_months") and not existing.get("transplanting_months"):
+                updates["transplanting_months"] = cal["transplanting_months"]
+            if cal.get("glasshouse_months") and not existing.get("glasshouse_months"):
+                updates["glasshouse_months"] = cal["glasshouse_months"]
+            if cal.get("harvesting_months") and not existing.get("harvesting_months"):
+                updates["harvesting_months"] = cal["harvesting_months"]
+
+            # Description
+            if product.description and not existing.get("description"):
+                updates["description"] = product.description
+
+            if updates:
+                set_clauses = []
+                values = []
+                for col, val in updates.items():
+                    set_clauses.append(f"{col} = %s")
+                    values.append(val)
+                set_clauses.append("updated_at = NOW()")
+                values.append(cultivar_id)
+                cur.execute(
+                    f"UPDATE cultivars SET {', '.join(set_clauses)} WHERE id = %s::uuid",
+                    values
+                )
+                stats["enriched"] += 1
+                print(f"{prefix} {product.name} -> ENRICHED ({', '.join(updates.keys())})")
+
+            # ── Add supplier link if missing ──
+            link_exists = False
+            if cultivar_id in existing_links:
+                for lurl, lsku in existing_links[cultivar_id]:
+                    if lurl == product.url or (lsku and lsku == product.sku):
+                        link_exists = True
+                        break
+
+            if link_exists:
+                stats["link_exists"] += 1
+            else:
+                try:
+                    cur.execute("SAVEPOINT link_sp")
+                    cur.execute("""
+                        INSERT INTO cultivar_suppliers (cultivar_id, supplier_id, product_url, article_number, last_checked_at)
+                        VALUES (%s::uuid, %s::uuid, %s, %s, NOW())
+                        ON CONFLICT (cultivar_id, supplier_id, article_number) DO UPDATE
+                        SET product_url = EXCLUDED.product_url, last_checked_at = NOW()
+                    """, (cultivar_id, REINSAAT_SUPPLIER_ID, product.url, product.sku))
+                    cur.execute("RELEASE SAVEPOINT link_sp")
+                    stats["linked"] += 1
+                    existing_links.setdefault(cultivar_id, []).append((product.url, product.sku))
+                    print(f"{prefix} {product.name} -> LINKED ({product.sku})")
+                except Exception as e:
+                    print(f"{prefix} {product.name} -> LINK ERROR: {e}")
+                    cur.execute("ROLLBACK TO SAVEPOINT link_sp")
+                    stats["errors"] += 1
+        else:
+            # ── Create new cultivar ──
+            slug = make_slug(species_name, product.name)
+            # Ensure unique slug
+            base_slug = slug
+            counter = 2
+            while slug in existing_slugs:
+                slug = f"{base_slug}-{counter}"
+                counter += 1
+
+            gd = product.growing_data
+            cal = product.calendar
+
+            try:
+                cur.execute("SAVEPOINT create_sp")
+                cur.execute("""
+                    INSERT INTO cultivars (
+                        species_id, name, name_de, slug, description,
+                        is_organic, perennial,
+                        planting_depth_cm, row_spacing_cm, plant_spacing_cm,
+                        germination_temp_c,
+                        indoor_sowing_months, direct_sowing_months,
+                        transplanting_months, glasshouse_months, harvesting_months
+                    ) VALUES (
+                        %s::uuid, %s, %s, %s, %s,
+                        %s, %s,
+                        %s, %s, %s,
+                        %s,
+                        %s, %s,
+                        %s, %s, %s
+                    )
+                    RETURNING id
+                """, (
+                    species_id,
+                    product.name,
+                    product.name,
+                    slug,
+                    product.description,
+                    product.is_organic,
+                    gd.get("perennial", False),
+                    gd.get("planting_depth_cm"),
+                    gd.get("row_spacing_cm"),
+                    gd.get("plant_spacing_cm"),
+                    gd.get("germination_temp_c"),
+                    cal.get("indoor_sowing_months"),
+                    cal.get("direct_sowing_months"),
+                    cal.get("transplanting_months"),
+                    cal.get("glasshouse_months"),
+                    cal.get("harvesting_months"),
+                ))
+                new_id = str(cur.fetchone()["id"])
+                existing_slugs.add(slug)
+                existing_cultivars[ckey] = {"id": new_id}
+                stats["created"] += 1
+
+                # Link to supplier
+                cur.execute("""
+                    INSERT INTO cultivar_suppliers (cultivar_id, supplier_id, product_url, article_number, last_checked_at)
+                    VALUES (%s::uuid, %s::uuid, %s, %s, NOW())
+                """, (new_id, REINSAAT_SUPPLIER_ID, product.url, product.sku))
+                stats["linked"] += 1
+                existing_links.setdefault(new_id, []).append((product.url, product.sku))
+
+                print(f"{prefix} {product.name} -> CREATED ({species_name}, {slug})")
+                cur.execute("RELEASE SAVEPOINT create_sp")
+            except Exception as e:
+                print(f"{prefix} {product.name} -> CREATE ERROR: {e}")
+                cur.execute("ROLLBACK TO SAVEPOINT create_sp")
+                stats["errors"] += 1
+
+    # ── Commit ──
+    conn.commit()
+
+    # ── Summary ──
+    print("\n" + "=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+    print(f"  Total products discovered: {len(all_products)}")
+    print(f"  New cultivars created:     {stats['created']}")
+    print(f"  New supplier links added:  {stats['linked']}")
+    print(f"  Cultivars enriched:        {stats['enriched']}")
+    print(f"  Links already existed:     {stats['link_exists']}")
+    print(f"  Skipped (no species):      {stats['skipped_no_species']}")
+    print(f"  Skipped (no name):         {stats['skipped_no_name']}")
+    print(f"  Errors:                    {stats['errors']}")
+    print("=" * 70)
+
+    if unmatched:
+        print(f"\n  UNMATCHED PRODUCTS ({len(unmatched)}):")
+        for name, raw_latin, normalized, url in sorted(unmatched, key=lambda x: x[2]):
+            print(f"    {normalized:30s} (raw: {raw_latin:40s}) {name:30s} {url}")
+
+    cur.close()
+    conn.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,635 @@
+#!/usr/bin/env python3
+"""Reinsaat v3 scraper - uses HerbAPI REST API, robust botanical name matching."""
+
+import json
+import re
+import sys
+import time
+import urllib.request
+import urllib.error
+import urllib.parse
+from html import unescape
+
+# --- Config ---
+API_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+API_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+REINSAAT_BASE = "https://www.reinsaat.at"
+DELAY = 0.3
+
+# Categories to scrape (seed products only, skip books/bulbs/peonies/potatoes/gift/seed_tapes)
+CATEGORIES = [
+    "beans", "peas", "florence_fennel", "cucumbers", "brassica", "garden_cress",
+    "pumpkins_squash", "corn", "swiss_chard", "aubergine_eggplants", "melons",
+    "carrots", "sweet_pepper", "chilli_peppers_chill", "parsnips", "parsley",
+    "parsley_root", "leeks", "radish", "beetroot", "lettuce", "black_salsify",
+    "celery", "spinach", "tomatoes", "zucchini_courgette", "onion_garlic",
+    "culinary_and_aromatic_herbs", "conservation_varieties", "flowers_and_herbs",
+    "wild_flowers_seeds", "green_manure",
+]
+
+# Suffixes to strip from botanical names (authority names, infraspecific ranks)
+STRIP_SUFFIXES = {
+    "l.", "mill.", "dc.", "l", "convar.", "convar", "var.", "var",
+    "subsp.", "subsp", "ssp.", "ssp", "f.", "em.", "auct.",
+    "hort.", "medik.", "moench", "pers.", "salisb.", "thunb.",
+    "crantz", "gaertn.", "lam.", "link", "siebold", "zucc.",
+    "sat.", "sat", "axillare", "medikus",
+}
+
+
+def api_get(path, params=None):
+    """GET from HerbAPI."""
+    url = f"{API_BASE}{path}"
+    if params:
+        url += "?" + urllib.parse.urlencode(params)
+    req = urllib.request.Request(url)
+    req.add_header("Authorization", f"Bearer {API_TOKEN}")
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path, data):
+    """POST to HerbAPI."""
+    url = f"{API_BASE}{path}"
+    body = json.dumps(data).encode()
+    req = urllib.request.Request(url, data=body, method="POST")
+    req.add_header("Authorization", f"Bearer {API_TOKEN}")
+    req.add_header("Content-Type", "application/json")
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def fetch_page(url):
+    """Fetch a web page, return HTML string."""
+    req = urllib.request.Request(url)
+    req.add_header("User-Agent", "Mozilla/5.0 (HerbAPI Scraper)")
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        return resp.read().decode("utf-8", errors="replace")
+
+
+BOTANICAL_TYPOS = {
+    "capscicum": "capsicum",
+    "capsicum frutenscens": "capsicum frutescens",
+    "tropaelum": "tropaeolum",
+    "lact.": "lactuca",
+}
+
+ABBREVIATED_NAMES = {
+    "origanum vulg.": "origanum vulgare",
+    "helichrysum bract.": "helichrysum bracteatum",
+    "campanula lat.": "campanula latifolia",
+    "cosmos bip.": "cosmos bipinnatus",
+    "papaver somnif.": "papaver somniferum",
+}
+
+
+def normalise_botanical(raw):
+    """Strip botanical name to genus + species only.
+
+    'Pisum sativum L. convar. sat.' -> 'pisum sativum'
+    'Solanum lycopersicum L.'       -> 'solanum lycopersicum'
+    'Beta vulgaris L. ssp. vulgaris' -> 'beta vulgaris'
+    """
+    if not raw:
+        return None
+    # Clean HTML entities
+    raw = unescape(raw).replace("\xa0", " ").strip()
+    # Remove trailing commas/periods
+    raw = raw.rstrip(",. ")
+    # Remove content in parentheses
+    raw = re.sub(r"\([^)]*\)", "", raw)
+    # Check abbreviated names first (before splitting)
+    raw_lower = raw.lower().strip()
+    for abbrev, full in ABBREVIATED_NAMES.items():
+        if raw_lower.startswith(abbrev):
+            return full
+
+    parts = raw.split()
+    if len(parts) < 2:
+        return None
+    # Genus (capitalised) + species (lowercase)
+    genus = parts[0].lower().rstrip(",")
+    species = parts[1].lower().rstrip(",")
+
+    # Fix known typos
+    if genus in BOTANICAL_TYPOS:
+        genus = BOTANICAL_TYPOS[genus]
+    full_name = f"{genus} {species}"
+    if full_name in BOTANICAL_TYPOS:
+        full_name = BOTANICAL_TYPOS[full_name]
+        genus, species = full_name.split()
+
+    # Validate: genus should start with letter, species should be all lowercase
+    if not genus[0].isalpha() or not species[0].isalpha():
+        return None
+    # Skip if species looks like an authority (starts with uppercase in original)
+    if parts[1][0].isupper():
+        return None
+    return f"{genus} {species}"
+
+
+def extract_product_data(html, url):
+    """Extract product info from a Reinsaat product page."""
+    result = {}
+
+    # H1 = variety name
+    m = re.search(r'<h1[^>]*>([^<]+)</h1>', html)
+    if m:
+        name = unescape(m.group(1)).strip()
+        # Clean up names like "RS-To-01.26 (Alda)" -> "Alda"
+        paren = re.search(r"\(([^)]+)\)", name)
+        if paren and re.match(r"RS-", name):
+            name = paren.group(1).strip()
+        result["name"] = name
+
+    # Botanical name from fce_shop_kurztext
+    m = re.search(
+        r'fce_shop_kurztext[^>]*>\s*(?:<em[^>]*>)?\s*([^<]+?)\s*(?:</em>)?\s*</div>',
+        html,
+    )
+    if m:
+        result["botanical_raw"] = unescape(m.group(1)).replace("\xa0", " ").strip()
+        result["botanical_norm"] = normalise_botanical(result["botanical_raw"])
+
+    # Article number from JSON-LD
+    for jm in re.finditer(
+        r'<script type="application/ld\+json">(.*?)</script>', html, re.S
+    ):
+        try:
+            jd = json.loads(jm.group(1))
+        except json.JSONDecodeError:
+            continue
+        if jd.get("@type") == "Product":
+            if "model" in jd:
+                result["article_number"] = str(jd["model"])
+            # Get smallest pack price (usually the Portion)
+            offers = jd.get("offers", {})
+            if isinstance(offers, dict):
+                offer_list = offers.get("offers", [])
+            elif isinstance(offers, list):
+                offer_list = offers
+            else:
+                offer_list = []
+            if offer_list:
+                prices = [
+                    o["price"]
+                    for o in offer_list
+                    if isinstance(o.get("price"), (int, float)) and o["price"] > 0
+                ]
+                if prices:
+                    result["price_eur"] = min(prices)
+            break
+
+    # Price table - get pack sizes
+    tables = re.findall(r"<table[^>]*>(.*?)</table>", html, re.S)
+    for tbl in tables:
+        if "€" not in tbl:
+            continue
+        rows = re.findall(r"<tr[^>]*>(.*?)</tr>", tbl, re.S)
+        if len(rows) >= 2:
+            size_cells = re.findall(r"<td[^>]*>(.*?)</td>", rows[0], re.S)
+            size_texts = [re.sub(r"<[^>]+>", "", c).strip() for c in size_cells]
+            price_cells = re.findall(r"<td[^>]*>(.*?)</td>", rows[1], re.S)
+            price_texts = [re.sub(r"<[^>]+>", "", c).strip() for c in price_cells]
+            # Find the "Port." entry
+            for i, st in enumerate(size_texts):
+                if "Port" in st:
+                    if i < len(price_texts):
+                        pm = re.search(r"[\d,\.]+", price_texts[i].replace(",", "."))
+                        if pm:
+                            result["port_price"] = float(pm.group())
+                    break
+            # Get portion content info
+            result["pack_sizes"] = size_texts
+            break
+
+    # Sowing depth
+    m = re.search(r"(?:sowing|seed)\s*depth[:\s]*(?:approx\.?\s*)?(\d+[\.,]?\d*)\s*(?:-\s*(\d+[\.,]?\d*)\s*)?cm", html, re.I)
+    if m:
+        d1 = float(m.group(1).replace(",", "."))
+        d2 = float(m.group(2).replace(",", ".")) if m.group(2) else d1
+        result["planting_depth_cm"] = round((d1 + d2) / 2, 2)
+
+    # Spacing: "row spacing NNxNN cm" or "NN x NN cm"
+    # Try outdoor spacing first
+    m = re.search(r"(?:outdoors?|field)[^.]*?(\d+)\s*(?:x|×)\s*(\d+)\s*cm", html, re.I)
+    if not m:
+        m = re.search(r"row\s*spacing\s*(\d+)\s*(?:x|×)\s*(\d+)\s*cm", html, re.I)
+    if not m:
+        m = re.search(r"(\d+)\s*(?:x|×)\s*(\d+)\s*cm", html, re.I)
+    if m:
+        result["row_spacing_cm"] = float(m.group(1))
+        result["plant_spacing_cm"] = float(m.group(2))
+
+    # Row spacing without plant spacing (e.g. "row spacing 30-45 cm")
+    if "row_spacing_cm" not in result:
+        m = re.search(r"row\s*spacing\s*(\d+)(?:\s*-\s*(\d+))?\s*cm", html, re.I)
+        if m:
+            r1 = int(m.group(1))
+            r2 = int(m.group(2)) if m.group(2) else r1
+            result["row_spacing_cm"] = float((r1 + r2) // 2)
+
+    # Germination temperature
+    m = re.search(r"germination\s*temp[^:]*:\s*(\d+)\s*(?:-\s*(\d+))?\s*°?\s*C", html, re.I)
+    if m:
+        t1 = int(m.group(1))
+        t2 = int(m.group(2)) if m.group(2) else t1
+        result["germination_temp_c"] = float((t1 + t2) // 2)
+
+    # Pack unit from portion info - "20 seeds" or "25 g" etc
+    portion_m = re.search(r"[Pp]ortion\s*(?:contents?)?[:\s]*(\d+[\.,]?\d*)\s*(seeds?|Korn|g|kg)", html)
+    if not portion_m:
+        # Try "Port. (20 seeds)" format
+        portion_m = re.search(r"Port[.\w]*\s*\(?\s*(\d+[\.,]?\d*)\s*(seeds?|Korn|g|kg)", html)
+    if portion_m:
+        result["pack_size"] = float(portion_m.group(1).replace(",", "."))
+        unit = portion_m.group(2).lower()
+        if unit in ("seed", "seeds", "korn"):
+            result["pack_unit"] = "Korn"
+        else:
+            result["pack_unit"] = unit
+
+    result["url"] = url
+    return result
+
+
+def get_all_species():
+    """Fetch all species from API, build lookup by normalised name."""
+    species_map = {}
+    page = 1
+    while True:
+        data = api_get("/species", {"per_page": 100, "page": page})
+        batch = data.get("data", [])
+        for sp in batch:
+            norm = normalise_botanical(sp["name_scientific"])
+            if norm:
+                species_map[norm] = {"id": sp["id"], "slug": sp["slug"], "name": sp["name_scientific"]}
+        print(f"    page {page}: {len(batch)} species (total so far: {len(species_map)})")
+        if len(batch) < 100:
+            break
+        page += 1
+    return species_map
+
+
+def get_all_cultivars():
+    """Fetch all cultivars, build lookup by (species_id, normalised name)."""
+    cultivar_map = {}  # (species_id, lower_name) -> cultivar
+    page = 1
+    while True:
+        data = api_get("/cultivars", {"per_page": 100, "page": page})
+        batch = data.get("data", [])
+        for cv in batch:
+            key = (cv["species_id"], cv["name"].lower().strip())
+            cultivar_map[key] = cv
+        print(f"    page {page}: {len(batch)} cultivars (total so far: {len(cultivar_map)})")
+        if len(batch) < 100:
+            break
+        page += 1
+    return cultivar_map
+
+
+def get_reinsaat_supplier():
+    """Get Reinsaat supplier record."""
+    suppliers = api_get("/suppliers")
+    for s in suppliers:
+        if s["slug"] == "reinsaat":
+            return s
+    raise RuntimeError("Reinsaat supplier not found in API")
+
+
+def get_cultivar_suppliers(cultivar_id):
+    """Get existing supplier links for a cultivar."""
+    return api_get(f"/cultivars/{cultivar_id}/suppliers")
+
+
+def get_product_urls_from_category(cat_slug):
+    """Fetch product URLs from a category page. Handles one level of subcategories."""
+    cat_url = f"{REINSAAT_BASE}/shop/EN/{cat_slug}/"
+    try:
+        html = fetch_page(cat_url)
+    except Exception as e:
+        print(f"  WARN: Failed to fetch category {cat_slug}: {e}")
+        return []
+
+    time.sleep(DELAY)
+
+    # Get all internal links under this category
+    pattern = rf'/shop/EN/{re.escape(cat_slug)}/([^"]+)/'
+    raw_links = re.findall(rf'href="({pattern})"', html)
+    # raw_links is list of (full_path, slug_part) but re gives us captured groups
+    # Let me redo this
+    raw_links = re.findall(rf'href="(/shop/EN/{re.escape(cat_slug)}/[^"]+/)"', html)
+    unique_links = sorted(set(raw_links))
+
+    product_urls = []
+    subcategory_urls = []
+
+    for link in unique_links:
+        full_url = REINSAAT_BASE + link
+        # Determine depth relative to category
+        parts = link.rstrip("/").split("/")
+        # /shop/EN/cat_slug/item -> 4 parts = product or subcategory
+        # /shop/EN/cat_slug/subcat/item -> 5 parts = nested product
+        if len(parts) == 4:
+            # Could be product or subcategory - we'll check later
+            product_urls.append(full_url)
+        elif len(parts) >= 5:
+            product_urls.append(full_url)
+
+    return product_urls
+
+
+def is_product_page(html):
+    """Check if HTML is a product page (has botanical name or JSON-LD Product)."""
+    return bool(
+        re.search(r'fce_shop_kurztext', html)
+        or re.search(r'"@type":\s*"Product"', html)
+    )
+
+
+def main():
+    print("=" * 60)
+    print("Reinsaat v3 Scraper")
+    print("=" * 60)
+
+    # Step 1: Load all species
+    print("\n[1/4] Loading species from API...")
+    species_map = get_all_species()
+    print(f"  Loaded {len(species_map)} species")
+
+    # Step 2: Load all cultivars
+    print("\n[2/4] Loading cultivars from API...")
+    cultivar_map = get_all_cultivars()
+    print(f"  Loaded {len(cultivar_map)} cultivars")
+
+    # Step 3: Get Reinsaat supplier
+    print("\n[3/4] Getting Reinsaat supplier...")
+    supplier = get_reinsaat_supplier()
+    supplier_id = supplier["id"]
+    print(f"  Reinsaat ID: {supplier_id}")
+
+    # Step 4: Scrape categories
+    print(f"\n[4/4] Scraping {len(CATEGORIES)} categories...")
+
+    stats = {
+        "products_found": 0,
+        "botanical_extracted": 0,
+        "species_matched": 0,
+        "species_not_matched": 0,
+        "cultivar_existed": 0,
+        "cultivar_created": 0,
+        "link_existed": 0,
+        "link_created": 0,
+        "errors": 0,
+    }
+    unmatched_species = {}  # botanical_norm -> count
+    new_cultivars = []
+    new_links = []
+
+    for cat_i, cat in enumerate(CATEGORIES):
+        print(f"\n--- [{cat_i+1}/{len(CATEGORIES)}] {cat} ---")
+        urls = get_product_urls_from_category(cat)
+        print(f"  Found {len(urls)} URLs")
+
+        for url in urls:
+            time.sleep(DELAY)
+            try:
+                html = fetch_page(url)
+            except Exception as e:
+                print(f"  ERROR fetching {url}: {e}")
+                stats["errors"] += 1
+                continue
+
+            # Check if this is actually a product page
+            if not is_product_page(html):
+                # Might be a subcategory - get links from it
+                sub_links = re.findall(rf'href="(/shop/EN/[^"]+/)"', html)
+                sub_links = [
+                    REINSAAT_BASE + l
+                    for l in sorted(set(sub_links))
+                    if l.startswith(f"/shop/EN/{cat}/")
+                    and l.count("/") > url.rstrip("/").count("/")
+                ]
+                if sub_links:
+                    # It's a subcategory, process its product links
+                    for sub_url in sub_links:
+                        if sub_url in urls:
+                            continue  # already in list
+                        time.sleep(DELAY)
+                        try:
+                            sub_html = fetch_page(sub_url)
+                        except Exception as e:
+                            print(f"  ERROR fetching {sub_url}: {e}")
+                            stats["errors"] += 1
+                            continue
+                        if not is_product_page(sub_html):
+                            continue
+                        process_product(
+                            sub_html, sub_url, species_map, cultivar_map,
+                            supplier_id, stats, unmatched_species,
+                            new_cultivars, new_links,
+                        )
+                continue
+
+            process_product(
+                html, url, species_map, cultivar_map,
+                supplier_id, stats, unmatched_species,
+                new_cultivars, new_links,
+            )
+
+    # Report
+    print("\n" + "=" * 60)
+    print("RESULTS")
+    print("=" * 60)
+    print(f"Products found:        {stats['products_found']}")
+    print(f"Botanical extracted:   {stats['botanical_extracted']}")
+    print(f"Species matched:       {stats['species_matched']}")
+    print(f"Species NOT matched:   {stats['species_not_matched']}")
+    print(f"Cultivars existed:     {stats['cultivar_existed']}")
+    print(f"Cultivars created:     {stats['cultivar_created']}")
+    print(f"Links existed:         {stats['link_existed']}")
+    print(f"Links created:         {stats['link_created']}")
+    print(f"Errors:                {stats['errors']}")
+
+    if new_cultivars:
+        print(f"\n--- New cultivars ({len(new_cultivars)}) ---")
+        for cv in new_cultivars:
+            print(f"  + {cv['name']} ({cv.get('species', '?')})")
+
+    if new_links:
+        print(f"\n--- New supplier links ({len(new_links)}) ---")
+        for lk in new_links:
+            print(f"  + {lk['cultivar']} -> {lk.get('article', '?')}")
+
+    if unmatched_species:
+        print(f"\n--- Unmatched species ({len(unmatched_species)}) ---")
+        for name, count in sorted(unmatched_species.items(), key=lambda x: -x[1]):
+            print(f"  ? {name} (x{count})")
+
+    print("\nDone.")
+
+
+def process_product(html, url, species_map, cultivar_map, supplier_id,
+                    stats, unmatched_species, new_cultivars, new_links):
+    """Process a single product page."""
+    stats["products_found"] += 1
+    prod = extract_product_data(html, url)
+
+    if not prod.get("name"):
+        return
+
+    bot_norm = prod.get("botanical_norm")
+    if not bot_norm:
+        # No botanical name found on page
+        stats["species_not_matched"] += 1
+        unmatched_species["(no botanical name)"] = unmatched_species.get("(no botanical name)", 0) + 1
+        return
+
+    stats["botanical_extracted"] += 1
+
+    # Match species
+    species = species_map.get(bot_norm)
+    if not species:
+        stats["species_not_matched"] += 1
+        unmatched_species[bot_norm] = unmatched_species.get(bot_norm, 0) + 1
+        return
+
+    stats["species_matched"] += 1
+    species_id = species["id"]
+    cultivar_name = prod["name"]
+
+    # Check if cultivar exists
+    cv_key = (species_id, cultivar_name.lower().strip())
+    existing_cv = cultivar_map.get(cv_key)
+
+    if existing_cv:
+        stats["cultivar_existed"] += 1
+        cultivar_id = existing_cv["id"]
+    else:
+        # Create cultivar
+        create_data = {
+            "species_id": species_id,
+            "name": cultivar_name,
+            "is_organic": True,
+            "source_urls": [url],
+        }
+        # Add growing data if we extracted any
+        if "planting_depth_cm" in prod:
+            create_data["planting_depth_cm"] = prod["planting_depth_cm"]
+        if "row_spacing_cm" in prod:
+            create_data["row_spacing_cm"] = prod["row_spacing_cm"]
+        if "plant_spacing_cm" in prod:
+            create_data["plant_spacing_cm"] = prod["plant_spacing_cm"]
+        if "germination_temp_c" in prod:
+            create_data["germination_temp_c"] = prod["germination_temp_c"]
+
+        try:
+            new_cv = api_post("/cultivars", create_data)
+            cultivar_id = new_cv["id"]
+            stats["cultivar_created"] += 1
+            new_cultivars.append({
+                "name": cultivar_name,
+                "species": species["name"],
+                "id": cultivar_id,
+            })
+            # Add to local cache
+            cultivar_map[cv_key] = new_cv
+            print(f"  + Created cultivar: {cultivar_name} ({species['name']})")
+        except urllib.error.HTTPError as e:
+            body = e.read().decode() if hasattr(e, 'read') else str(e)
+            if e.code == 500 and "Database error" in body:
+                # Likely slug collision - search for existing cultivar
+                try:
+                    # Try multiple search strategies
+                    found = None
+                    cn_lower = cultivar_name.lower().strip()
+
+                    # Strategy 1: search by full name
+                    search_data = api_get("/cultivars", {"search": cultivar_name, "per_page": 50})
+                    for cv in search_data.get("data", []):
+                        if cv["name"].lower().strip() == cn_lower:
+                            found = cv
+                            break
+                    # Strategy 2: match by species_id + partial name
+                    if not found:
+                        for cv in search_data.get("data", []):
+                            if cv["species_id"] == species_id:
+                                # Match if names are similar (ignoring punctuation)
+                                cv_clean = re.sub(r'[^\w\s]', '', cv["name"].lower())
+                                cn_clean = re.sub(r'[^\w\s]', '', cn_lower)
+                                if cv_clean == cn_clean or cv_clean in cn_clean or cn_clean in cv_clean:
+                                    found = cv
+                                    break
+                    # Strategy 3: search by last significant word
+                    if not found:
+                        words = [w for w in cultivar_name.split() if len(w) > 2]
+                        if words:
+                            search2 = api_get("/cultivars", {"search": words[-1], "per_page": 50})
+                            for cv in search2.get("data", []):
+                                if cv["species_id"] == species_id:
+                                    cv_clean = re.sub(r'[^\w\s]', '', cv["name"].lower())
+                                    cn_clean = re.sub(r'[^\w\s]', '', cn_lower)
+                                    if cv_clean == cn_clean:
+                                        found = cv
+                                        break
+
+                    if found:
+                        cultivar_id = found["id"]
+                        cultivar_map[cv_key] = found
+                        stats["cultivar_existed"] += 1
+                    else:
+                        print(f"  WARN: could not create or find cultivar '{cultivar_name}' (DB error + no search match)")
+                        stats["errors"] += 1
+                        return
+                except Exception as e2:
+                    print(f"  ERROR searching for '{cultivar_name}' after collision: {e2}")
+                    stats["errors"] += 1
+                    return
+            else:
+                print(f"  ERROR creating cultivar '{cultivar_name}': {e.code} {body}")
+                stats["errors"] += 1
+                return
+
+    # Check if Reinsaat supplier link exists
+    try:
+        existing_links = get_cultivar_suppliers(cultivar_id)
+    except Exception:
+        existing_links = []
+
+    has_reinsaat = any(l["supplier_id"] == supplier_id for l in existing_links)
+
+    if has_reinsaat:
+        stats["link_existed"] += 1
+    else:
+        # Create supplier link
+        link_data = {
+            "supplier_id": supplier_id,
+            "product_url": url,
+        }
+        if "article_number" in prod:
+            link_data["article_number"] = prod["article_number"]
+        if "port_price" in prod:
+            link_data["price_eur"] = prod["port_price"]
+        elif "price_eur" in prod:
+            link_data["price_eur"] = prod["price_eur"]
+        if "pack_size" in prod:
+            link_data["pack_size"] = prod["pack_size"]
+        if "pack_unit" in prod:
+            link_data["pack_unit"] = prod["pack_unit"]
+
+        try:
+            api_post(f"/cultivars/{cultivar_id}/suppliers", link_data)
+            stats["link_created"] += 1
+            new_links.append({
+                "cultivar": cultivar_name,
+                "article": prod.get("article_number", "?"),
+                "url": url,
+            })
+        except urllib.error.HTTPError as e:
+            body = e.read().decode() if hasattr(e, 'read') else str(e)
+            print(f"  ERROR linking '{cultivar_name}': {e.code} {body}")
+            stats["errors"] += 1
+
+
+if __name__ == "__main__":
+    main()