Add scraper and enrichment scripts to tools/ directory

2026-03-16 11:10:18 +01:00
parent 83ab8c4cf9
commit 0ef902cc91
13 changed files with 6031 additions and 0 deletions
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Enrich HerbAPI species with Wikidata QID, GBIF ID, and EPPO code."""
+
+import json
+import time
+import urllib.parse
+import urllib.request
+
+HERBAPI_BASE = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+HERBAPI_TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
+
+HEADERS_WD = {
+    "User-Agent": "HerbAPI-Enrichment/1.0 (florian.berthold@sub-net.at)",
+    "Accept": "application/json",
+}
+
+
+def herbapi_request(path, method="GET", data=None):
+    url = f"{HERBAPI_BASE}{path}"
+    body = json.dumps(data).encode() if data else None
+    req = urllib.request.Request(url, data=body, method=method, headers={
+        "Authorization": f"Bearer {HERBAPI_TOKEN}",
+        "Content-Type": "application/json",
+    })
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def query_wikidata_batch(names):
+    """Query Wikidata for a batch of scientific names."""
+    values = " ".join(f'"{n}"' for n in names)
+    sparql = f"""SELECT ?name ?item ?gbifId ?eppoCode WHERE {{
+  VALUES ?name {{ {values} }}
+  ?item wdt:P225 ?name .
+  OPTIONAL {{ ?item wdt:P846 ?gbifId }}
+  OPTIONAL {{ ?item wdt:P3031 ?eppoCode }}
+}}"""
+    encoded = urllib.parse.quote(sparql)
+    url = f"{WIKIDATA_SPARQL}?query={encoded}&format=json"
+    req = urllib.request.Request(url, headers=HEADERS_WD)
+    with urllib.request.urlopen(req, timeout=60) as resp:
+        data = json.loads(resp.read())
+
+    results = {}
+    for binding in data.get("results", {}).get("bindings", []):
+        name = binding["name"]["value"]
+        qid_url = binding["item"]["value"]
+        qid = qid_url.rsplit("/", 1)[-1]
+        gbif = binding.get("gbifId", {}).get("value")
+        eppo = binding.get("eppoCode", {}).get("value")
+        results[name] = {"qid": qid, "gbif_id": gbif, "eppo_code": eppo}
+    return results
+
+
+def main():
+    # 1. Fetch all species
+    resp = herbapi_request("/species?per_page=200")
+    species_list = resp["data"]
+    print(f"Fetched {len(species_list)} species from HerbAPI\n")
+
+    # 2. Collect species needing enrichment
+    to_enrich = [sp for sp in species_list
+                 if not sp["wikidata_qid"] or not sp["gbif_id"] or not sp["eppo_code"]]
+
+    if not to_enrich:
+        print("All species already enriched.")
+        return
+
+    print(f"{len(to_enrich)} species need enrichment\n")
+
+    # 3. Batch query Wikidata
+    BATCH_SIZE = 20
+    wikidata_results = {}
+    names = [sp["name_scientific"] for sp in to_enrich]
+
+    for i in range(0, len(names), BATCH_SIZE):
+        batch = names[i:i + BATCH_SIZE]
+        print(f"Querying Wikidata batch {i // BATCH_SIZE + 1}: {len(batch)} species...")
+        try:
+            results = query_wikidata_batch(batch)
+            wikidata_results.update(results)
+            print(f"  Got {len(results)} matches")
+        except Exception as e:
+            print(f"  ERROR: {e}")
+        if i + BATCH_SIZE < len(names):
+            time.sleep(2)
+
+    print(f"\nWikidata returned data for {len(wikidata_results)} / {len(names)} species\n")
+
+    # 4. Update HerbAPI - GET full object by slug, merge, PUT by UUID
+    updated = 0
+    skipped = 0
+    not_found = 0
+    errors = 0
+
+    for sp in to_enrich:
+        name = sp["name_scientific"]
+        wd = wikidata_results.get(name)
+        if not wd:
+            print(f"  SKIP (no Wikidata match): {name}")
+            not_found += 1
+            continue
+
+        # Check what needs updating
+        needs_qid = not sp["wikidata_qid"] and wd["qid"]
+        needs_gbif = not sp["gbif_id"] and wd["gbif_id"]
+        needs_eppo = not sp["eppo_code"] and wd["eppo_code"]
+
+        if not (needs_qid or needs_gbif or needs_eppo):
+            print(f"  SKIP (nothing new): {name}")
+            skipped += 1
+            continue
+
+        try:
+            # GET full species by slug for the complete object
+            full_sp = herbapi_request(f"/species/{sp['slug']}")
+
+            # Remove read-only fields
+            species_id = full_sp.pop("id")
+            full_sp.pop("slug", None)
+            full_sp.pop("created_at", None)
+            full_sp.pop("updated_at", None)
+
+            # Merge new data (only null fields)
+            if needs_qid:
+                full_sp["wikidata_qid"] = wd["qid"]
+            if needs_gbif:
+                full_sp["gbif_id"] = str(wd["gbif_id"])  # API expects string
+            if needs_eppo:
+                full_sp["eppo_code"] = wd["eppo_code"]
+
+            # PUT by UUID
+            herbapi_request(f"/species/{species_id}", method="PUT", data=full_sp)
+
+            fields = []
+            if needs_qid: fields.append(f"qid={wd['qid']}")
+            if needs_gbif: fields.append(f"gbif={wd['gbif_id']}")
+            if needs_eppo: fields.append(f"eppo={wd['eppo_code']}")
+            print(f"  UPDATED: {name} -> {', '.join(fields)}")
+            updated += 1
+        except Exception as e:
+            print(f"  ERROR updating {name}: {e}")
+            errors += 1
+
+    print(f"\n{'=' * 60}")
+    print(f"RESULTS:")
+    print(f"  Updated:               {updated}")
+    print(f"  Skipped (no new data): {skipped}")
+    print(f"  Not found on Wikidata: {not_found}")
+    print(f"  Errors:                {errors}")
+    print(f"  Total species:         {len(species_list)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,305 @@
+#!/usr/bin/env python3
+"""Expand HerbAPI species database with common permaculture/garden species."""
+
+import json
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+import ssl
+
+BASE_URL = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+AUTH = "Bearer km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+DELAY = 0.15
+
+# SSL context for GBIF (https)
+ssl_ctx = ssl.create_default_context()
+
+
+def api_get(path):
+    req = urllib.request.Request(f"{BASE_URL}{path}", headers={"Authorization": AUTH})
+    with urllib.request.urlopen(req) as resp:
+        return json.loads(resp.read())
+
+
+def api_post(path, data):
+    body = json.dumps(data).encode()
+    req = urllib.request.Request(
+        f"{BASE_URL}{path}",
+        data=body,
+        headers={"Authorization": AUTH, "Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req) as resp:
+            return json.loads(resp.read()), resp.status
+    except urllib.error.HTTPError as e:
+        err_body = e.read().decode()
+        print(f"  ERROR {e.code}: {err_body}")
+        return None, e.code
+
+
+def gbif_get_german_name(scientific_name):
+    """Query GBIF for the German vernacular name."""
+    try:
+        url = f"https://api.gbif.org/v1/species/match?name={urllib.parse.quote(scientific_name)}"
+        req = urllib.request.Request(url)
+        with urllib.request.urlopen(req, context=ssl_ctx, timeout=10) as resp:
+            match = json.loads(resp.read())
+
+        usage_key = match.get("usageKey")
+        if not usage_key:
+            return None
+
+        url2 = f"https://api.gbif.org/v1/species/{usage_key}/vernacularNames?limit=100"
+        req2 = urllib.request.Request(url2)
+        with urllib.request.urlopen(req2, context=ssl_ctx, timeout=10) as resp:
+            vn = json.loads(resp.read())
+
+        for r in vn.get("results", []):
+            if r.get("language") == "deu":
+                return r["vernacularName"]
+        return None
+    except Exception as e:
+        print(f"  GBIF lookup failed for {scientific_name}: {e}")
+        return None
+
+
+# ── Families to ensure exist ─────────────────────────────────────────
+FAMILIES_NEEDED = {
+    "Fabaceae":        {"name_en": "Legumes", "name_de": "Hülsenfrüchtler"},
+    "Solanaceae":      {"name_en": "Nightshade family", "name_de": "Nachtschattengewächse"},
+    "Cucurbitaceae":   {"name_en": "Gourd family", "name_de": "Kürbisgewächse"},
+    "Asteraceae":      {"name_en": "Daisy family", "name_de": "Korbblütler"},
+    "Chenopodiaceae":  {"name_en": "Goosefoot family", "name_de": "Gänsefußgewächse"},
+    "Brassicaceae":    {"name_en": "Cabbage family", "name_de": "Kreuzblütler"},
+    "Amaryllidaceae":  {"name_en": "Amaryllis family", "name_de": "Amaryllisgewächse"},
+    "Apiaceae":        {"name_en": "Carrot family", "name_de": "Doldenblütler"},
+    "Poaceae":         {"name_en": "Grass family", "name_de": "Süßgräser"},
+    "Lamiaceae":       {"name_en": "Mint family", "name_de": "Lippenblütler"},
+    "Caprifoliaceae":  {"name_en": "Honeysuckle family", "name_de": "Geißblattgewächse"},
+    "Rosaceae":        {"name_en": "Rose family", "name_de": "Rosengewächse"},
+    "Grossulariaceae": {"name_en": "Gooseberry family", "name_de": "Stachelbeergewächse"},
+    "Ericaceae":       {"name_en": "Heath family", "name_de": "Heidekrautgewächse"},
+    "Moraceae":        {"name_en": "Mulberry family", "name_de": "Maulbeergewächse"},
+    # New families not yet in the DB:
+    "Hypericaceae":    {"name_en": "St John's wort family", "name_de": "Johanniskrautgewächse"},
+    "Tropaeolaceae":   {"name_en": "Nasturtium family", "name_de": "Kapuzinerkressengewächse"},
+    "Elaeagnaceae":    {"name_en": "Oleaster family", "name_de": "Ölweidengewächse"},
+}
+
+# ── Species to add ───────────────────────────────────────────────────
+# Format: (scientific_name, family, name_en, name_de, plant_layer, extra_fields)
+SPECIES = [
+    # Vegetables
+    ("Phaseolus vulgaris", "Fabaceae", "common bean", "Gartenbohne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds)"}),
+    ("Phaseolus coccineus", "Fabaceae", "runner bean", "Feuerbohne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds), flowers", "attracts_pollinators": True}),
+    ("Pisum sativum", "Fabaceae", "pea", "Erbse", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Peas, shoots"}),
+    ("Capsicum annuum", "Solanaceae", "pepper", "Paprika", "herbaceous",
+     {"food_uses": "Fruit"}),
+    ("Cucumis sativus", "Cucurbitaceae", "cucumber", "Gurke", "ground_cover",
+     {"food_uses": "Fruit"}),
+    ("Cucurbita maxima", "Cucurbitaceae", "winter squash", "Riesenkürbis", "ground_cover",
+     {"food_uses": "Fruit, seeds, flowers"}),
+    ("Cucurbita moschata", "Cucurbitaceae", "butternut squash", "Moschuskürbis", "ground_cover",
+     {"food_uses": "Fruit, seeds"}),
+    ("Lactuca sativa", "Asteraceae", "lettuce", "Salat", "herbaceous",
+     {"food_uses": "Leaves"}),
+    ("Spinacia oleracea", "Chenopodiaceae", "spinach", "Spinat", "herbaceous",
+     {"food_uses": "Leaves"}),
+    ("Brassica oleracea", "Brassicaceae", "cabbage / kale", "Kohl", "herbaceous",
+     {"food_uses": "Leaves, flower buds, stems"}),
+    ("Brassica rapa", "Brassicaceae", "turnip", "Rübe", "herbaceous",
+     {"food_uses": "Root, leaves"}),
+    ("Raphanus sativus", "Brassicaceae", "radish", "Rettich", "herbaceous",
+     {"food_uses": "Root, leaves, seed pods"}),
+    ("Allium cepa", "Amaryllidaceae", "onion", "Zwiebel", "herbaceous",
+     {"food_uses": "Bulb, leaves"}),
+    ("Allium sativum", "Amaryllidaceae", "garlic", "Knoblauch", "herbaceous",
+     {"food_uses": "Bulb, scapes", "medicinal_uses": "Antimicrobial, cardiovascular"}),
+    ("Allium schoenoprasum", "Amaryllidaceae", "chives", "Schnittlauch", "herbaceous",
+     {"food_uses": "Leaves, flowers", "attracts_pollinators": True}),
+    ("Petroselinum crispum", "Apiaceae", "parsley", "Petersilie", "herbaceous",
+     {"food_uses": "Leaves, root"}),
+    ("Apium graveolens", "Apiaceae", "celery", "Sellerie", "herbaceous",
+     {"food_uses": "Stalks, root, leaves"}),
+    ("Foeniculum vulgare", "Apiaceae", "fennel", "Fenchel", "herbaceous",
+     {"food_uses": "Bulb, fronds, seeds", "attracts_beneficial_insects": True}),
+    ("Pastinaca sativa", "Apiaceae", "parsnip", "Pastinake", "herbaceous",
+     {"food_uses": "Root"}),
+    ("Zea mays", "Poaceae", "corn", "Mais", "herbaceous",
+     {"food_uses": "Kernels, cobs"}),
+    ("Solanum melongena", "Solanaceae", "eggplant", "Melanzani", "herbaceous",
+     {"food_uses": "Fruit"}),
+
+    # Herbs
+    ("Ocimum basilicum", "Lamiaceae", "basil", "Basilikum", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True}),
+    ("Origanum vulgare", "Lamiaceae", "oregano", "Oregano", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
+    ("Mentha x piperita", "Lamiaceae", "peppermint", "Pfefferminze", "herbaceous",
+     {"food_uses": "Leaves (tea, culinary)", "medicinal_uses": "Digestive, headache relief", "invasiveness": "spreading"}),
+    ("Rosmarinus officinalis", "Lamiaceae", "rosemary", "Rosmarin", "herbaceous",
+     {"food_uses": "Leaves", "attracts_pollinators": True}),
+    ("Anethum graveolens", "Apiaceae", "dill", "Dill", "herbaceous",
+     {"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
+    ("Coriandrum sativum", "Apiaceae", "coriander", "Koriander", "herbaceous",
+     {"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
+    ("Artemisia absinthium", "Asteraceae", "wormwood", "Wermut", "herbaceous",
+     {"medicinal_uses": "Digestive, anti-parasitic", "other_uses": "Companion plant pest deterrent", "allelopathic": True}),
+    ("Achillea millefolium", "Asteraceae", "yarrow", "Schafgarbe", "herbaceous",
+     {"food_uses": "Young leaves (salad)", "medicinal_uses": "Wound healing, anti-inflammatory",
+      "dynamic_accumulator": True, "dynamic_accumulator_nutrients": "K, P, Cu",
+      "attracts_beneficial_insects": True, "attracts_pollinators": True}),
+    ("Hypericum perforatum", "Hypericaceae", "St John's wort", "Johanniskraut", "herbaceous",
+     {"medicinal_uses": "Antidepressant, wound healing", "attracts_pollinators": True}),
+    ("Echinacea purpurea", "Asteraceae", "echinacea", "Sonnenhut", "herbaceous",
+     {"medicinal_uses": "Immune stimulant", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
+    ("Valeriana officinalis", "Caprifoliaceae", "valerian", "Baldrian", "herbaceous",
+     {"medicinal_uses": "Sedative, sleep aid", "attracts_pollinators": True,
+      "other_uses": "Earthworm attractant (biodynamic)"}),
+
+    # Flowers & cover crops
+    ("Tagetes patula", "Asteraceae", "French marigold", "Studentenblume", "herbaceous",
+     {"other_uses": "Nematode suppression, companion plant", "attracts_pollinators": True}),
+    ("Helianthus annuus", "Asteraceae", "sunflower", "Sonnenblume", "herbaceous",
+     {"food_uses": "Seeds, oil", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
+    ("Tropaeolum majus", "Tropaeolaceae", "nasturtium", "Kapuzinerkresse", "ground_cover",
+     {"food_uses": "Leaves, flowers, seeds (capers)", "other_uses": "Trap crop for aphids"}),
+    ("Centaurea cyanus", "Asteraceae", "cornflower", "Kornblume", "herbaceous",
+     {"food_uses": "Flowers (edible garnish)", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
+    ("Sinapis alba", "Brassicaceae", "white mustard", "Weißer Senf", "herbaceous",
+     {"food_uses": "Seeds, young leaves", "other_uses": "Green manure, biofumigant"}),
+    ("Trifolium repens", "Fabaceae", "white clover", "Weißklee", "ground_cover",
+     {"nitrogen_fixer": True, "food_uses": "Flowers (tea), young leaves",
+      "ground_cover_quality": "excellent", "attracts_pollinators": True}),
+    ("Medicago sativa", "Fabaceae", "alfalfa", "Luzerne", "herbaceous",
+     {"nitrogen_fixer": True, "food_uses": "Sprouts",
+      "dynamic_accumulator": True, "dynamic_accumulator_nutrients": "N, K, Ca, Mg, Fe",
+      "other_uses": "Green manure, deep-rooting soil improver"}),
+
+    # Fruit / Trees
+    ("Prunus avium", "Rosaceae", "sweet cherry", "Süßkirsche", "canopy",
+     {"food_uses": "Fruit", "attracts_pollinators": True, "wildlife_value": "Fruit for birds"}),
+    ("Prunus cerasus", "Rosaceae", "sour cherry", "Sauerkirsche", "understory",
+     {"food_uses": "Fruit (cooking, preserves)", "attracts_pollinators": True}),
+    ("Pyrus communis", "Rosaceae", "pear", "Birne", "canopy",
+     {"food_uses": "Fruit", "attracts_pollinators": True}),
+    ("Ribes uva-crispa", "Grossulariaceae", "gooseberry", "Stachelbeere", "shrub",
+     {"food_uses": "Berries"}),
+    ("Rubus fruticosus", "Rosaceae", "blackberry", "Brombeere", "shrub",
+     {"food_uses": "Berries, leaves (tea)", "attracts_pollinators": True,
+      "wildlife_value": "Berries for birds, nesting habitat", "invasiveness": "spreading"}),
+    ("Vaccinium myrtillus", "Ericaceae", "bilberry", "Heidelbeere", "shrub",
+     {"food_uses": "Berries", "medicinal_uses": "Antioxidant, eye health"}),
+    ("Hippophae rhamnoides", "Elaeagnaceae", "sea buckthorn", "Sanddorn", "shrub",
+     {"nitrogen_fixer": True, "food_uses": "Berries (juice, oil)",
+      "medicinal_uses": "High vitamin C, skin care",
+      "other_uses": "Erosion control, windbreak"}),
+    ("Morus alba", "Moraceae", "white mulberry", "Weiße Maulbeere", "canopy",
+     {"food_uses": "Fruit, young leaves", "wildlife_value": "Fruit for birds"}),
+]
+
+
+def main():
+    # 1. Load existing families
+    print("=== Loading existing families ===")
+    fam_resp = api_get("/families?per_page=100")
+    family_map = {}  # name_scientific -> id
+    for f in fam_resp["data"]:
+        family_map[f["name_scientific"]] = f["id"]
+    print(f"  Found {len(family_map)} existing families")
+
+    # 2. Create missing families
+    print("\n=== Creating missing families ===")
+    families_created = 0
+    for fam_name, fam_info in FAMILIES_NEEDED.items():
+        if fam_name in family_map:
+            print(f"  SKIP (exists): {fam_name}")
+            continue
+        payload = {
+            "name_scientific": fam_name,
+            "name_en": fam_info["name_en"],
+            "name_de": fam_info["name_de"],
+        }
+        print(f"  CREATE: {fam_name} ...", end=" ")
+        result, status = api_post("/families", payload)
+        if result and "id" in result:
+            family_map[fam_name] = result["id"]
+            print(f"OK ({result['id']})")
+            families_created += 1
+        else:
+            print(f"FAILED (status={status})")
+        time.sleep(DELAY)
+
+    print(f"\n  Families created: {families_created}")
+
+    # 3. Load existing species
+    print("\n=== Loading existing species ===")
+    sp_resp = api_get("/species?per_page=200")
+    existing_species = set()
+    for s in sp_resp["data"]:
+        existing_species.add(s["name_scientific"])
+    print(f"  Found {len(existing_species)} existing species")
+
+    # 4. Add new species
+    print("\n=== Adding new species ===")
+    created = 0
+    skipped = 0
+    failed = 0
+
+    for sci_name, family, name_en, name_de, plant_layer, extras in SPECIES:
+        if sci_name in existing_species:
+            print(f"  SKIP (exists): {sci_name}")
+            skipped += 1
+            continue
+
+        # Look up family ID
+        fam_id = family_map.get(family)
+        if not fam_id:
+            print(f"  SKIP (no family '{family}'): {sci_name}")
+            failed += 1
+            continue
+
+        # Try GBIF for German name
+        gbif_de = gbif_get_german_name(sci_name)
+        if gbif_de:
+            print(f"  GBIF name for {sci_name}: {gbif_de}")
+            # Use GBIF name if it differs (prefer catalog name as primary, GBIF as validation)
+            # Keep our curated name_de but log the GBIF one
+
+        payload = {
+            "name_scientific": sci_name,
+            "family_id": fam_id,
+            "name_en": name_en,
+            "name_de": name_de,
+            "plant_layer": plant_layer,
+        }
+        # Add extra fields
+        for k, v in extras.items():
+            payload[k] = v
+
+        print(f"  CREATE: {sci_name} ({name_de}) ...", end=" ")
+        result, status = api_post("/species", payload)
+        if result and "id" in result:
+            print(f"OK ({result['id']})")
+            created += 1
+        else:
+            print(f"FAILED (status={status})")
+            failed += 1
+        time.sleep(DELAY)
+
+    print(f"\n{'='*50}")
+    print(f"SUMMARY")
+    print(f"  Families created: {families_created}")
+    print(f"  Species created:  {created}")
+    print(f"  Species skipped:  {skipped}")
+    print(f"  Species failed:   {failed}")
+    print(f"  Total species now: {len(existing_species) + created}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+"""Import CC-licensed plant images from Wikimedia Commons via Wikidata into HerbAPI."""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+import urllib.parse
+import urllib.request
+
+# Force unbuffered output
+sys.stdout.reconfigure(line_buffering=True)
+sys.stderr.reconfigure(line_buffering=True)
+
+# --- Configuration ---
+S3_ENDPOINT = "http://garage.sub-net.at:3900"
+S3_BUCKET = "herbapi"
+S3_ACCESS_KEY = "GK1a89859373a6ac56bf11958f"
+S3_SECRET_KEY = "bea45a333b5c7b1efdd7466bdbcac54d8642fa19f0c617ca2fd64bd07951b899"
+S3_REGION = "garage"
+
+DB_HOST = "10.31.3.90"
+DB_USER = "herbapi"
+DB_PASS = "_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj"
+DB_NAME = "herbapi"
+
+USER_AGENT = "HerbAPI/1.0 (https://herbapi.naturalised.at; florian.berthold@sub-net.at)"
+THUMB_WIDTH = 800
+REQUEST_DELAY = 0.3
+
+ALLOWED_LICENSES = {
+    "cc0", "cc-zero", "cc0 1.0", "cc-zero 1.0",
+    "public domain", "pd", "pd-self", "pd-old", "pd-old-auto", "pd-old-100",
+    "pd-us", "pd-usgov", "pd-author",
+    "cc by 1.0", "cc by 2.0", "cc by 2.5", "cc by 3.0", "cc by 4.0",
+    "cc-by-1.0", "cc-by-2.0", "cc-by-2.5", "cc-by-3.0", "cc-by-4.0",
+    "cc by-sa 1.0", "cc by-sa 2.0", "cc by-sa 2.5", "cc by-sa 3.0", "cc by-sa 4.0",
+    "cc-by-sa-1.0", "cc-by-sa-2.0", "cc-by-sa-2.5", "cc-by-sa-3.0", "cc-by-sa-4.0",
+}
+
+
+def slugify(name: str) -> str:
+    """Convert scientific name to a URL-safe slug."""
+    return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-')
+
+
+def psql(query: str) -> str:
+    """Run a psql query and return output."""
+    env = os.environ.copy()
+    env["PGPASSWORD"] = DB_PASS
+    result = subprocess.run(
+        ["psql", "-h", DB_HOST, "-U", DB_USER, DB_NAME, "-t", "-A", "-c", query],
+        capture_output=True, text=True, env=env
+    )
+    if result.returncode != 0:
+        print(f"  psql error: {result.stderr.strip()}", file=sys.stderr)
+    return result.stdout.strip()
+
+
+def fetch_json(url: str) -> dict | None:
+    """Fetch JSON from a URL with proper User-Agent."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read())
+    except Exception as e:
+        print(f"  HTTP error fetching {url}: {e}")
+        return None
+
+
+def get_wikidata_image(qid: str) -> str | None:
+    """Query Wikidata SPARQL for P18 image filename."""
+    sparql = f"SELECT ?image WHERE {{ wd:{qid} wdt:P18 ?image }} LIMIT 1"
+    url = "https://query.wikidata.org/sparql?" + urllib.parse.urlencode({
+        "query": sparql, "format": "json"
+    })
+    data = fetch_json(url)
+    if not data:
+        return None
+    bindings = data.get("results", {}).get("bindings", [])
+    if not bindings:
+        return None
+    image_url = bindings[0]["image"]["value"]
+    # URL like http://commons.wikimedia.org/wiki/Special:FilePath/Filename.jpg
+    filename = urllib.parse.unquote(image_url.rsplit("/", 1)[-1])
+    return filename
+
+
+def get_commons_info(filename: str) -> dict | None:
+    """Get image info from Wikimedia Commons API."""
+    url = "https://commons.wikimedia.org/w/api.php?" + urllib.parse.urlencode({
+        "action": "query",
+        "titles": f"File:{filename}",
+        "prop": "imageinfo",
+        "iiprop": "url|extmetadata",
+        "iiurlwidth": str(THUMB_WIDTH),
+        "format": "json",
+    })
+    data = fetch_json(url)
+    if not data:
+        return None
+    pages = data.get("query", {}).get("pages", {})
+    for page_id, page in pages.items():
+        if page_id == "-1":
+            return None
+        imageinfo = page.get("imageinfo", [])
+        if not imageinfo:
+            return None
+        info = imageinfo[0]
+        meta = info.get("extmetadata", {})
+
+        thumb_url = info.get("thumburl") or info.get("url")
+        desc_url = info.get("descriptionurl", "")
+
+        license_short = meta.get("LicenseShortName", {}).get("value", "")
+        artist_html = meta.get("Artist", {}).get("value", "")
+        # Strip HTML tags from artist
+        artist = re.sub(r'<[^>]+>', '', artist_html).strip()
+        # Clean up whitespace
+        artist = re.sub(r'\s+', ' ', artist)
+
+        return {
+            "thumb_url": thumb_url,
+            "description_url": desc_url,
+            "license": license_short,
+            "artist": artist,
+            "filename": filename,
+        }
+    return None
+
+
+def is_license_allowed(license_str: str) -> bool:
+    """Check if a license is in our allowed list."""
+    normalized = license_str.lower().strip()
+    # Direct match
+    if normalized in ALLOWED_LICENSES:
+        return True
+    # Check for NC or ND
+    if "nc" in normalized or "nd" in normalized:
+        return False
+    # Check patterns
+    if normalized.startswith("public domain") or normalized.startswith("pd"):
+        return True
+    if re.match(r'^cc[- ]?by[- ]?sa[- ]?\d', normalized):
+        return True
+    if re.match(r'^cc[- ]?by[- ]?\d', normalized):
+        return True
+    if re.match(r'^cc[- ]?0', normalized) or normalized == "cc zero":
+        return True
+    return False
+
+
+def normalize_license(license_str: str) -> str:
+    """Normalize license string for storage."""
+    low = license_str.lower().strip()
+    if "public domain" in low or low.startswith("pd"):
+        return "Public domain"
+    if re.match(r'^cc[- ]?0', low) or "cc-zero" in low or "cc zero" in low:
+        return "CC0 1.0"
+    # CC BY-SA X.0
+    m = re.match(r'^cc[- ]?by[- ]?sa[- ]?(\d+\.?\d*)', low)
+    if m:
+        return f"CC BY-SA {m.group(1)}"
+    # CC BY X.0
+    m = re.match(r'^cc[- ]?by[- ]?(\d+\.?\d*)', low)
+    if m:
+        return f"CC BY {m.group(1)}"
+    return license_str
+
+
+def s3_upload(s3_key: str, data: bytes, content_type: str = "image/jpeg"):
+    """Upload to S3 Garage using AWS CLI."""
+    tmp_path = "/tmp/_herbapi_upload_tmp_file_file"
+    with open(tmp_path, "wb") as f:
+        f.write(data)
+
+    env = os.environ.copy()
+    env["AWS_ACCESS_KEY_ID"] = S3_ACCESS_KEY
+    env["AWS_SECRET_ACCESS_KEY"] = S3_SECRET_KEY
+    env["AWS_DEFAULT_REGION"] = S3_REGION
+
+    result = subprocess.run(
+        [
+            "aws", "s3", "cp", tmp_path,
+            f"s3://{S3_BUCKET}/{s3_key}",
+            "--endpoint-url", S3_ENDPOINT,
+            "--content-type", content_type,
+        ],
+        capture_output=True, text=True, env=env
+    )
+    os.unlink(tmp_path)
+    if result.returncode != 0:
+        raise RuntimeError(f"S3 upload failed: {result.stderr.strip()}")
+
+
+def download_image(url: str) -> bytes | None:
+    """Download image data from URL."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            return resp.read()
+    except Exception as e:
+        print(f"  Download error: {e}")
+        return None
+
+
+def main():
+    # 1. Get species
+    rows = psql(
+        "SELECT id, name_scientific, wikidata_qid FROM species "
+        "WHERE wikidata_qid IS NOT NULL AND wikidata_qid <> '' "
+        "ORDER BY name_scientific"
+    )
+    if not rows:
+        print("No species with wikidata_qid found.")
+        return
+
+    species_list = []
+    for line in rows.split("\n"):
+        parts = line.split("|")
+        if len(parts) == 3:
+            species_list.append({
+                "id": parts[0],
+                "name": parts[1],
+                "qid": parts[2],
+            })
+
+    print(f"Found {len(species_list)} species with Wikidata QIDs.")
+
+    # 2. Get existing images
+    existing = set()
+    existing_rows = psql("SELECT entity_id FROM images WHERE entity_type = 'species'")
+    if existing_rows:
+        for line in existing_rows.split("\n"):
+            line = line.strip()
+            if line:
+                existing.add(line)
+
+    print(f"Found {len(existing)} species that already have images.")
+
+    imported = 0
+    skipped_existing = 0
+    skipped_no_image = 0
+    skipped_license = 0
+    skipped_download = 0
+    errors = 0
+
+    for i, sp in enumerate(species_list):
+        name = sp["name"]
+        qid = sp["qid"]
+        sp_id = sp["id"]
+        slug = slugify(name)
+
+        print(f"\n[{i+1}/{len(species_list)}] {name} ({qid})")
+
+        if sp_id in existing:
+            print("  Already has image, skipping.")
+            skipped_existing += 1
+            continue
+
+        # Query Wikidata for image
+        time.sleep(REQUEST_DELAY)
+        filename = get_wikidata_image(qid)
+        if not filename:
+            print("  No image on Wikidata.")
+            skipped_no_image += 1
+            continue
+
+        # Get Commons info
+        time.sleep(REQUEST_DELAY)
+        info = get_commons_info(filename)
+        if not info:
+            print(f"  Could not get Commons info for {filename}")
+            skipped_no_image += 1
+            continue
+
+        # Check license
+        raw_license = info["license"]
+        if not is_license_allowed(raw_license):
+            print(f"  License not allowed: {raw_license}")
+            skipped_license += 1
+            continue
+
+        norm_license = normalize_license(raw_license)
+        artist = info["artist"]
+        thumb_url = info["thumb_url"]
+        desc_url = info["description_url"]
+
+        print(f"  License: {raw_license} -> {norm_license}")
+        print(f"  Artist: {artist[:80]}")
+        print(f"  Thumbnail: {thumb_url[:100]}...")
+
+        # Download image
+        time.sleep(REQUEST_DELAY)
+        image_data = download_image(thumb_url)
+        if not image_data:
+            print("  Failed to download image.")
+            skipped_download += 1
+            continue
+
+        print(f"  Downloaded {len(image_data)} bytes")
+
+        # Determine file extension from URL
+        ext = "jpg"
+        if ".png" in thumb_url.lower():
+            ext = "png"
+        elif ".svg" in thumb_url.lower():
+            ext = "svg"
+        elif ".gif" in thumb_url.lower():
+            ext = "gif"
+
+        s3_key = f"species/{slug}.{ext}"
+        content_type = {
+            "jpg": "image/jpeg",
+            "png": "image/png",
+            "svg": "image/svg+xml",
+            "gif": "image/gif",
+        }.get(ext, "image/jpeg")
+
+        # Upload to S3
+        try:
+            s3_upload(s3_key, image_data, content_type)
+            print(f"  Uploaded to s3://{S3_BUCKET}/{s3_key}")
+        except RuntimeError as e:
+            print(f"  S3 upload failed: {e}")
+            errors += 1
+            continue
+
+        # Insert into database
+        caption = f"Photo: {artist}" if artist else "Wikimedia Commons"
+        # Escape single quotes for SQL
+        caption_esc = caption.replace("'", "''")
+        desc_url_esc = desc_url.replace("'", "''")
+        norm_license_esc = norm_license.replace("'", "''")
+        s3_key_esc = s3_key.replace("'", "''")
+
+        insert_sql = (
+            f"INSERT INTO images (id, entity_type, entity_id, s3_key, caption, source_url, license, is_primary) "
+            f"VALUES (gen_random_uuid(), 'species', '{sp_id}', '{s3_key_esc}', "
+            f"'{caption_esc}', '{desc_url_esc}', '{norm_license_esc}', true)"
+        )
+
+        result = psql(insert_sql)
+        # psql returns empty on success for INSERT
+        print(f"  Inserted into images table.")
+        imported += 1
+
+    print(f"\n{'='*60}")
+    print(f"DONE!")
+    print(f"  Imported:          {imported}")
+    print(f"  Skipped (existing):{skipped_existing}")
+    print(f"  Skipped (no image):{skipped_no_image}")
+    print(f"  Skipped (license): {skipped_license}")
+    print(f"  Skipped (download):{skipped_download}")
+    print(f"  Errors:            {errors}")
+    print(f"  Total processed:   {len(species_list)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""Import CC-licensed plant images from Wikimedia Commons into HerbAPI."""
+
+import hashlib
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+import urllib.parse
+import urllib.request
+
+# Config
+DB_HOST = "10.31.3.90"
+DB_USER = "herbapi"
+DB_PASS = "_6Qo_jEFhE9LZOEbwLynEWoLbc6B4Ipj"
+DB_NAME = "herbapi"
+S3_BUCKET = "herbapi"
+S3_ENDPOINT = "http://10.31.3.170:3900"
+USER_AGENT = "HerbAPI/1.0 (https://herbapi.naturalised.at; florian.berthold@sub-net.at)"
+REQUEST_DELAY = 0.3
+
+# AWS env for subprocess calls
+AWS_ENV = {
+    **os.environ,
+    "AWS_ACCESS_KEY_ID": "GK1a89859373a6ac56bf11958f",
+    "AWS_SECRET_ACCESS_KEY": "bea45a333b5c7b1efdd7466bdbcac54d8642fa19f0c617ca2fd64bd07951b899",
+    "AWS_DEFAULT_REGION": "garage",
+}
+
+# Stats
+stats = {"total": 0, "imported": 0, "no_p18": 0, "bad_license": 0, "download_fail": 0, "upload_fail": 0, "errors": 0}
+
+
+def fetch_url(url):
+    """Fetch URL with custom User-Agent."""
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read()
+
+
+def fetch_json(url):
+    """Fetch URL and parse JSON."""
+    return json.loads(fetch_url(url))
+
+
+def psql(sql):
+    """Run psql command and return output."""
+    result = subprocess.run(
+        ["psql", "-h", DB_HOST, "-U", DB_USER, DB_NAME, "-t", "-A", "-c", sql],
+        capture_output=True, text=True,
+        env={**os.environ, "PGPASSWORD": DB_PASS},
+    )
+    return result.stdout.strip()
+
+
+def is_license_allowed(license_str):
+    """Check if license is CC0/CC-BY/CC-BY-SA or Public Domain.
+    Wikimedia returns things like 'CC BY-SA 3.0', 'CC BY 4.0', 'CC0', 'Public domain'.
+    We allow CC0, Public Domain, CC BY (any version), CC BY-SA (any version).
+    We reject: GFDL, CC BY-NC, CC BY-ND, CC BY-NC-SA, CC BY-NC-ND, FAL, Copyrighted free use.
+    """
+    if not license_str:
+        return False
+    ls = license_str.lower().strip()
+
+    # Reject NC and ND explicitly first
+    if "nc" in ls.split() or "-nc" in ls or "nd" in ls.split() or "-nd" in ls:
+        return False
+
+    # Public domain / CC0
+    if ls in ("cc0", "cc-zero", "cc0 1.0", "cc0 1.0 universal"):
+        return True
+    if "public domain" in ls or ls.startswith("pd"):
+        return True
+
+    # CC BY-SA (any version, any jurisdiction)
+    if re.match(r"cc\s+by-sa\b", ls):
+        return True
+
+    # CC BY (any version, any jurisdiction) -- but NOT CC BY-NC or CC BY-ND
+    if re.match(r"cc\s+by\b", ls):
+        return True
+
+    return False
+
+
+def get_wikidata_image(qid):
+    """Query Wikidata SPARQL for P18 image filename."""
+    sparql = f"SELECT ?image WHERE {{ wd:{qid} wdt:P18 ?image }} LIMIT 1"
+    url = f"https://query.wikidata.org/sparql?query={urllib.parse.quote(sparql)}&format=json"
+    data = fetch_json(url)
+    bindings = data.get("results", {}).get("bindings", [])
+    if not bindings:
+        return None
+    image_url = bindings[0]["image"]["value"]
+    # Extract filename from commons URL
+    filename = urllib.parse.unquote(image_url.split("/")[-1])
+    return filename
+
+
+def get_commons_info(filename):
+    """Get image info from Commons API: license, artist, thumbnail URL."""
+    title = f"File:{filename}"
+    url = (
+        f"https://commons.wikimedia.org/w/api.php?action=query"
+        f"&titles={urllib.parse.quote(title)}"
+        f"&prop=imageinfo&iiprop=url|extmetadata"
+        f"&iiurlwidth=800&format=json"
+    )
+    data = fetch_json(url)
+    pages = data.get("query", {}).get("pages", {})
+    for page_id, page in pages.items():
+        if page_id == "-1":
+            return None
+        imageinfo = page.get("imageinfo", [{}])[0]
+        meta = imageinfo.get("extmetadata", {})
+
+        license_short = meta.get("LicenseShortName", {}).get("value", "").strip()
+        artist_html = meta.get("Artist", {}).get("value", "")
+
+        # Clean up artist: strip HTML tags
+        artist = re.sub(r"<[^>]+>", "", artist_html).strip()
+        # Collapse whitespace
+        artist = re.sub(r"\s+", " ", artist)
+        if len(artist) > 120:
+            artist = artist[:117] + "..."
+
+        # Use the API-provided thumbnail URL (iiurlwidth=800)
+        thumb_url = imageinfo.get("thumburl", "")
+        # Also get the description URL
+        desc_url = imageinfo.get("descriptionurl", "")
+
+        return {
+            "license": license_short,
+            "artist": artist,
+            "thumb_url": thumb_url,
+            "desc_url": desc_url,
+            "filename": filename,
+        }
+    return None
+
+
+def process_species(species_id, slug, name_sci, qid):
+    """Process a single species: fetch image from Wikidata/Commons, upload to S3, insert to DB."""
+    stats["total"] += 1
+
+    # Step 1: Get image filename from Wikidata
+    try:
+        filename = get_wikidata_image(qid)
+    except Exception as e:
+        print(f"  ERROR querying Wikidata for {qid}: {e}")
+        stats["errors"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    if not filename:
+        print(f"  No P18 image for {qid}")
+        stats["no_p18"] += 1
+        return False
+
+    # Step 2: Get Commons info (license, artist, thumb URL)
+    try:
+        info = get_commons_info(filename)
+    except Exception as e:
+        print(f"  ERROR querying Commons for {filename}: {e}")
+        stats["errors"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    if not info:
+        print(f"  No Commons info for {filename}")
+        stats["errors"] += 1
+        return False
+
+    # Step 3: Check license
+    if not is_license_allowed(info["license"]):
+        print(f"  Bad license: {info['license']} for {filename}")
+        stats["bad_license"] += 1
+        return False
+
+    # Step 4: Download thumbnail using API-provided URL
+    thumb_url = info["thumb_url"]
+    if not thumb_url:
+        print(f"  No thumbnail URL available for {filename}")
+        stats["download_fail"] += 1
+        return False
+
+    # Determine file extension from thumbnail URL
+    ext = "jpg"
+    if ".png" in thumb_url.lower().split("?")[0].split("/")[-1]:
+        ext = "png"
+    elif ".gif" in thumb_url.lower().split("?")[0].split("/")[-1]:
+        ext = "gif"
+
+    tmp_path = f"/tmp/herbapi_img_{slug}.{ext}"
+    try:
+        img_data = fetch_url(thumb_url)
+        with open(tmp_path, "wb") as f:
+            f.write(img_data)
+    except Exception as e:
+        print(f"  ERROR downloading {thumb_url}: {e}")
+        stats["download_fail"] += 1
+        return False
+    time.sleep(REQUEST_DELAY)
+
+    # Step 5: Upload to S3
+    s3_key = f"species/{slug}.{ext}"
+    try:
+        result = subprocess.run(
+            ["aws", "s3", "cp", tmp_path, f"s3://{S3_BUCKET}/{s3_key}", "--endpoint-url", S3_ENDPOINT],
+            capture_output=True, text=True, env=AWS_ENV, timeout=60,
+        )
+        if result.returncode != 0:
+            print(f"  S3 upload failed: {result.stderr}")
+            stats["upload_fail"] += 1
+            return False
+    except Exception as e:
+        print(f"  ERROR uploading to S3: {e}")
+        stats["upload_fail"] += 1
+        return False
+    finally:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+
+    # Step 6: Insert into DB
+    caption = f"Photo: {info['artist']}" if info["artist"] else ""
+    caption_sql = caption.replace("'", "''")
+    source_url = info["desc_url"] or f"https://commons.wikimedia.org/wiki/File:{urllib.parse.quote(filename)}"
+    source_url_sql = source_url.replace("'", "''")
+    license_sql = info["license"].replace("'", "''")
+
+    sql = (
+        f"INSERT INTO images (entity_type, entity_id, s3_key, caption, source_url, license, is_primary) "
+        f"VALUES ('species', '{species_id}', '{s3_key}', '{caption_sql}', '{source_url_sql}', '{license_sql}', true);"
+    )
+    try:
+        psql(sql)
+    except Exception as e:
+        print(f"  ERROR inserting to DB: {e}")
+        stats["errors"] += 1
+        return False
+
+    stats["imported"] += 1
+    return True
+
+
+def main():
+    # Get species without images
+    rows = psql(
+        "SELECT s.id, s.slug, s.name_scientific, s.wikidata_qid "
+        "FROM species s "
+        "LEFT JOIN images i ON i.entity_type = 'species' AND i.entity_id = s.id "
+        "WHERE s.wikidata_qid IS NOT NULL AND s.wikidata_qid != '' AND i.id IS NULL "
+        "ORDER BY s.name_scientific;"
+    )
+    if not rows:
+        print("No species need images.")
+        return
+
+    species_list = []
+    for line in rows.split("\n"):
+        parts = line.strip().split("|")
+        if len(parts) == 4:
+            species_list.append(parts)
+
+    print(f"Processing {len(species_list)} species...\n")
+
+    for i, (sid, slug, name_sci, qid) in enumerate(species_list, 1):
+        print(f"[{i}/{len(species_list)}] {name_sci} ({qid})")
+        ok = process_species(sid, slug, name_sci, qid)
+        if ok:
+            print(f"  OK - imported")
+
+    print(f"\n{'='*50}")
+    print(f"RESULTS:")
+    print(f"  Total species processed: {stats['total']}")
+    print(f"  Successfully imported:   {stats['imported']}")
+    print(f"  No P18 image:            {stats['no_p18']}")
+    print(f"  Bad license (NC/ND/GFDL):{stats['bad_license']}")
+    print(f"  Download failures:       {stats['download_fail']}")
+    print(f"  Upload failures:         {stats['upload_fail']}")
+    print(f"  Other errors:            {stats['errors']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Seed HerbAPI with common permaculture plant families and species via GBIF + API."""
+import json, urllib.request, urllib.parse, time, sys
+
+API = "http://herbapi01.corp.sub-net.at:8080/api/v1"
+TOKEN = "km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
+GBIF = "https://api.gbif.org/v1"
+
+def api_post(path, data):
+    req = urllib.request.Request(f"{API}{path}", 
+        data=json.dumps(data).encode(),
+        headers={"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"})
+    try:
+        resp = urllib.request.urlopen(req)
+        return json.loads(resp.read())
+    except urllib.error.HTTPError as e:
+        print(f"  ERR {e.code}: {e.read().decode()[:120]}", file=sys.stderr)
+        return None
+
+def gbif_de_name(name):
+    """Get German common name from GBIF."""
+    url = f"{GBIF}/species/match?name={urllib.parse.quote(name)}"
+    try:
+        match = json.loads(urllib.request.urlopen(url).read())
+        if not match.get("usageKey"): return None
+        url2 = f"{GBIF}/species/{match['usageKey']}/vernacularNames?limit=100"
+        data = json.loads(urllib.request.urlopen(url2).read())
+        for r in data.get("results", []):
+            if r.get("language") == "deu":
+                return r["vernacularName"]
+    except: pass
+    return None
+
+FAMILIES = [
+    ("Fabaceae", "Hülsenfrüchtler", "Legumes"),
+    ("Rosaceae", "Rosengewächse", "Rose family"),
+    ("Brassicaceae", "Kreuzblütler", "Cabbage family"),
+    ("Apiaceae", "Doldenblütler", "Carrot family"),
+    ("Lamiaceae", "Lippenblütler", "Mint family"),
+    ("Asteraceae", "Korbblütler", "Daisy family"),
+    ("Solanaceae", "Nachtschattengewächse", "Nightshade family"),
+    ("Cucurbitaceae", "Kürbisgewächse", "Gourd family"),
+    ("Poaceae", "Süßgräser", "Grass family"),
+    ("Amaryllidaceae", "Amaryllisgewächse", "Amaryllis family"),
+    ("Boraginaceae", "Raublattgewächse", "Borage family"),
+    ("Adoxaceae", "Moschuskrautgewächse", "Moschatel family"),
+    ("Betulaceae", "Birkengewächse", "Birch family"),
+    ("Fagaceae", "Buchengewächse", "Beech family"),
+    ("Juglandaceae", "Walnussgewächse", "Walnut family"),
+    ("Caprifoliaceae", "Geißblattgewächse", "Honeysuckle family"),
+    ("Grossulariaceae", "Stachelbeergewächse", "Gooseberry family"),
+    ("Ericaceae", "Heidekrautgewächse", "Heath family"),
+    ("Moraceae", "Maulbeergewächse", "Mulberry family"),
+    ("Urticaceae", "Brennnesselgewächse", "Nettle family"),
+    ("Malvaceae", "Malvengewächse", "Mallow family"),
+    ("Polygonaceae", "Knöterichgewächse", "Buckwheat family"),
+    ("Chenopodiaceae", "Gänsefußgewächse", "Goosefoot family"),
+    ("Asparagaceae", "Spargelgewächse", "Asparagus family"),
+    ("Plantaginaceae", "Wegerichgewächse", "Plantain family"),
+]
+
+SPECIES = [
+    ("Sambucus nigra", "Adoxaceae", {"plant_layer": "understory", "nitrogen_fixer": False, "food_uses": "Flowers (cordial, fritters), berries (cooked — syrup, wine)", "medicinal_uses": "Cold/flu remedy, immune support, diaphoretic", "succession_stage": "secondary"}),
+    ("Symphytum officinale", "Boraginaceae", {"plant_layer": "herbaceous", "dynamic_accumulator": True, "food_uses": "Young leaves (limited, contains pyrrolizidine alkaloids)", "medicinal_uses": "Wound healing, bone knitting (external only)", "other_uses": "Dynamic accumulator, mulch/compost activator, animal fodder"}),
+    ("Trifolium pratense", "Fabaceae", {"plant_layer": "ground_cover", "nitrogen_fixer": True, "food_uses": "Flowers, young leaves", "medicinal_uses": "Respiratory, menopausal symptoms", "other_uses": "Green manure, nitrogen fixer, bee forage"}),
+    ("Corylus avellana", "Betulaceae", {"plant_layer": "shrub", "food_uses": "Nuts", "other_uses": "Coppice wood, hedging, wildlife habitat", "succession_stage": "secondary"}),
+    ("Ribes nigrum", "Grossulariaceae", {"plant_layer": "shrub", "food_uses": "Berries, leaves (tea)", "medicinal_uses": "High vitamin C, anti-inflammatory"}),
+    ("Rubus idaeus", "Rosaceae", {"plant_layer": "shrub", "food_uses": "Berries, leaves (tea)", "medicinal_uses": "Leaf tea for pregnancy/digestion", "succession_stage": "pioneer"}),
+    ("Urtica dioica", "Urticaceae", {"plant_layer": "herbaceous", "dynamic_accumulator": True, "food_uses": "Young leaves, seeds", "medicinal_uses": "Anti-inflammatory, prostate, allergies", "other_uses": "Compost activator, fibre, liquid fertiliser"}),
+    ("Borago officinalis", "Boraginaceae", {"plant_layer": "herbaceous", "food_uses": "Flowers, young leaves", "other_uses": "Bee forage, companion plant", "attracts_pollinators": True}),
+    ("Lavandula angustifolia", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Flowers", "medicinal_uses": "Calming, antiseptic, sleep aid", "other_uses": "Bee forage, pest repellent, fragrance", "attracts_pollinators": True}),
+    ("Malus domestica", "Rosaceae", {"plant_layer": "canopy", "food_uses": "Fruit", "pollination_type": "Insect-pollinated"}),
+    ("Prunus domestica", "Rosaceae", {"plant_layer": "canopy", "food_uses": "Fruit", "pollination_type": "Insect-pollinated"}),
+    ("Juglans regia", "Juglandaceae", {"plant_layer": "canopy", "food_uses": "Nuts", "other_uses": "Timber, dye", "allelopathic": True}),
+    ("Fragaria vesca", "Rosaceae", {"plant_layer": "ground_cover", "food_uses": "Berries, leaves (tea)", "ground_cover_quality": "Good"}),
+    ("Allium ursinum", "Amaryllidaceae", {"plant_layer": "ground_cover", "food_uses": "Leaves, flowers, bulbs", "medicinal_uses": "Antimicrobial, blood pressure"}),
+    ("Phacelia tanacetifolia", "Boraginaceae", {"plant_layer": "herbaceous", "other_uses": "Green manure, bee forage, cover crop", "attracts_pollinators": True}),
+    ("Lupinus polyphyllus", "Fabaceae", {"plant_layer": "herbaceous", "nitrogen_fixer": True, "other_uses": "Nitrogen fixer, green manure, ornamental"}),
+    ("Vicia faba", "Fabaceae", {"plant_layer": "herbaceous", "nitrogen_fixer": True, "food_uses": "Beans", "other_uses": "Nitrogen fixer, green manure"}),
+    ("Solanum lycopersicum", "Solanaceae", {"plant_layer": "herbaceous", "food_uses": "Fruit"}),
+    ("Cucurbita pepo", "Cucurbitaceae", {"plant_layer": "ground_cover", "food_uses": "Fruit, seeds, flowers"}),
+    ("Beta vulgaris", "Chenopodiaceae", {"plant_layer": "herbaceous", "food_uses": "Roots, leaves"}),
+    ("Daucus carota", "Apiaceae", {"plant_layer": "herbaceous", "food_uses": "Root"}),
+    ("Calendula officinalis", "Asteraceae", {"plant_layer": "herbaceous", "food_uses": "Flowers", "medicinal_uses": "Wound healing, anti-inflammatory, skin care", "other_uses": "Companion plant, pest deterrent", "attracts_pollinators": True}),
+    ("Melissa officinalis", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Leaves", "medicinal_uses": "Calming, antiviral, digestive", "attracts_pollinators": True}),
+    ("Salvia officinalis", "Lamiaceae", {"plant_layer": "herbaceous", "food_uses": "Leaves", "medicinal_uses": "Sore throat, digestive, antimicrobial"}),
+    ("Thymus vulgaris", "Lamiaceae", {"plant_layer": "ground_cover", "food_uses": "Leaves", "medicinal_uses": "Respiratory, antimicrobial, cough"}),
+]
+
+# Create families
+print("=== Creating families ===")
+family_map = {}
+for sci, de, en in FAMILIES:
+    r = api_post("/families", {"name_scientific": sci, "name_de": de, "name_en": en})
+    if r:
+        family_map[sci] = r["id"]
+        print(f"  ✓ {sci}")
+    time.sleep(0.05)
+print(f"Created {len(family_map)} families\n")
+
+# Create species
+print("=== Creating species (with GBIF German names) ===")
+created = 0
+for sci_name, family_sci, extra in SPECIES:
+    fam_id = family_map.get(family_sci)
+    if not fam_id:
+        print(f"  ✗ {sci_name} — family {family_sci} missing")
+        continue
+    de_name = gbif_de_name(sci_name)
+    data = {"name_scientific": sci_name, "name_de": de_name or "", "name_en": "", "family_id": fam_id, **extra}
+    r = api_post("/species", data)
+    if r:
+        created += 1
+        print(f"  ✓ {sci_name} → {de_name or '(no DE name)'}")
+    time.sleep(0.15)
+print(f"Created {created} species\n")
+
+# Create suppliers  
+print("=== Creating suppliers ===")
+for name, url, country, organic, demeter, notes in [
+    ("Reinsaat", "https://www.reinsaat.at", "AT", True, True, "Austrian biodynamic seed producer, open-pollinated varieties"),
+    ("Magic Garden Seeds", "https://www.magicgardenseeds.com", "DE", False, False, "Specialist seed shop with rare and heritage varieties"),
+]:
+    r = api_post("/suppliers", {"name": name, "url": url, "country": country, "is_organic": organic, "is_demeter": demeter, "notes": notes})
+    if r: print(f"  ✓ {name}")
+print("\nDone!")