Files
herbapi/tools/enrichment/expand_species.py
T

306 lines
15 KiB
Python

#!/usr/bin/env python3
"""Expand HerbAPI species database with common permaculture/garden species."""
import json
import time
import urllib.request
import urllib.parse
import urllib.error
import ssl
BASE_URL = "http://herbapi01.corp.sub-net.at:8080/api/v1"
AUTH = "Bearer km2WjhgyMTHlltwgch5TZADHQ-4uIg0NxBeowD-DHGk"
DELAY = 0.15
# SSL context for GBIF (https)
ssl_ctx = ssl.create_default_context()
def api_get(path):
req = urllib.request.Request(f"{BASE_URL}{path}", headers={"Authorization": AUTH})
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
def api_post(path, data):
body = json.dumps(data).encode()
req = urllib.request.Request(
f"{BASE_URL}{path}",
data=body,
headers={"Authorization": AUTH, "Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read()), resp.status
except urllib.error.HTTPError as e:
err_body = e.read().decode()
print(f" ERROR {e.code}: {err_body}")
return None, e.code
def gbif_get_german_name(scientific_name):
"""Query GBIF for the German vernacular name."""
try:
url = f"https://api.gbif.org/v1/species/match?name={urllib.parse.quote(scientific_name)}"
req = urllib.request.Request(url)
with urllib.request.urlopen(req, context=ssl_ctx, timeout=10) as resp:
match = json.loads(resp.read())
usage_key = match.get("usageKey")
if not usage_key:
return None
url2 = f"https://api.gbif.org/v1/species/{usage_key}/vernacularNames?limit=100"
req2 = urllib.request.Request(url2)
with urllib.request.urlopen(req2, context=ssl_ctx, timeout=10) as resp:
vn = json.loads(resp.read())
for r in vn.get("results", []):
if r.get("language") == "deu":
return r["vernacularName"]
return None
except Exception as e:
print(f" GBIF lookup failed for {scientific_name}: {e}")
return None
# ── Families to ensure exist ─────────────────────────────────────────
FAMILIES_NEEDED = {
"Fabaceae": {"name_en": "Legumes", "name_de": "Hülsenfrüchtler"},
"Solanaceae": {"name_en": "Nightshade family", "name_de": "Nachtschattengewächse"},
"Cucurbitaceae": {"name_en": "Gourd family", "name_de": "Kürbisgewächse"},
"Asteraceae": {"name_en": "Daisy family", "name_de": "Korbblütler"},
"Chenopodiaceae": {"name_en": "Goosefoot family", "name_de": "Gänsefußgewächse"},
"Brassicaceae": {"name_en": "Cabbage family", "name_de": "Kreuzblütler"},
"Amaryllidaceae": {"name_en": "Amaryllis family", "name_de": "Amaryllisgewächse"},
"Apiaceae": {"name_en": "Carrot family", "name_de": "Doldenblütler"},
"Poaceae": {"name_en": "Grass family", "name_de": "Süßgräser"},
"Lamiaceae": {"name_en": "Mint family", "name_de": "Lippenblütler"},
"Caprifoliaceae": {"name_en": "Honeysuckle family", "name_de": "Geißblattgewächse"},
"Rosaceae": {"name_en": "Rose family", "name_de": "Rosengewächse"},
"Grossulariaceae": {"name_en": "Gooseberry family", "name_de": "Stachelbeergewächse"},
"Ericaceae": {"name_en": "Heath family", "name_de": "Heidekrautgewächse"},
"Moraceae": {"name_en": "Mulberry family", "name_de": "Maulbeergewächse"},
# New families not yet in the DB:
"Hypericaceae": {"name_en": "St John's wort family", "name_de": "Johanniskrautgewächse"},
"Tropaeolaceae": {"name_en": "Nasturtium family", "name_de": "Kapuzinerkressengewächse"},
"Elaeagnaceae": {"name_en": "Oleaster family", "name_de": "Ölweidengewächse"},
}
# ── Species to add ───────────────────────────────────────────────────
# Format: (scientific_name, family, name_en, name_de, plant_layer, extra_fields)
SPECIES = [
# Vegetables
("Phaseolus vulgaris", "Fabaceae", "common bean", "Gartenbohne", "herbaceous",
{"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds)"}),
("Phaseolus coccineus", "Fabaceae", "runner bean", "Feuerbohne", "herbaceous",
{"nitrogen_fixer": True, "food_uses": "Beans (pods, seeds), flowers", "attracts_pollinators": True}),
("Pisum sativum", "Fabaceae", "pea", "Erbse", "herbaceous",
{"nitrogen_fixer": True, "food_uses": "Peas, shoots"}),
("Capsicum annuum", "Solanaceae", "pepper", "Paprika", "herbaceous",
{"food_uses": "Fruit"}),
("Cucumis sativus", "Cucurbitaceae", "cucumber", "Gurke", "ground_cover",
{"food_uses": "Fruit"}),
("Cucurbita maxima", "Cucurbitaceae", "winter squash", "Riesenkürbis", "ground_cover",
{"food_uses": "Fruit, seeds, flowers"}),
("Cucurbita moschata", "Cucurbitaceae", "butternut squash", "Moschuskürbis", "ground_cover",
{"food_uses": "Fruit, seeds"}),
("Lactuca sativa", "Asteraceae", "lettuce", "Salat", "herbaceous",
{"food_uses": "Leaves"}),
("Spinacia oleracea", "Chenopodiaceae", "spinach", "Spinat", "herbaceous",
{"food_uses": "Leaves"}),
("Brassica oleracea", "Brassicaceae", "cabbage / kale", "Kohl", "herbaceous",
{"food_uses": "Leaves, flower buds, stems"}),
("Brassica rapa", "Brassicaceae", "turnip", "Rübe", "herbaceous",
{"food_uses": "Root, leaves"}),
("Raphanus sativus", "Brassicaceae", "radish", "Rettich", "herbaceous",
{"food_uses": "Root, leaves, seed pods"}),
("Allium cepa", "Amaryllidaceae", "onion", "Zwiebel", "herbaceous",
{"food_uses": "Bulb, leaves"}),
("Allium sativum", "Amaryllidaceae", "garlic", "Knoblauch", "herbaceous",
{"food_uses": "Bulb, scapes", "medicinal_uses": "Antimicrobial, cardiovascular"}),
("Allium schoenoprasum", "Amaryllidaceae", "chives", "Schnittlauch", "herbaceous",
{"food_uses": "Leaves, flowers", "attracts_pollinators": True}),
("Petroselinum crispum", "Apiaceae", "parsley", "Petersilie", "herbaceous",
{"food_uses": "Leaves, root"}),
("Apium graveolens", "Apiaceae", "celery", "Sellerie", "herbaceous",
{"food_uses": "Stalks, root, leaves"}),
("Foeniculum vulgare", "Apiaceae", "fennel", "Fenchel", "herbaceous",
{"food_uses": "Bulb, fronds, seeds", "attracts_beneficial_insects": True}),
("Pastinaca sativa", "Apiaceae", "parsnip", "Pastinake", "herbaceous",
{"food_uses": "Root"}),
("Zea mays", "Poaceae", "corn", "Mais", "herbaceous",
{"food_uses": "Kernels, cobs"}),
("Solanum melongena", "Solanaceae", "eggplant", "Melanzani", "herbaceous",
{"food_uses": "Fruit"}),
# Herbs
("Ocimum basilicum", "Lamiaceae", "basil", "Basilikum", "herbaceous",
{"food_uses": "Leaves", "attracts_pollinators": True}),
("Origanum vulgare", "Lamiaceae", "oregano", "Oregano", "herbaceous",
{"food_uses": "Leaves", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
("Mentha x piperita", "Lamiaceae", "peppermint", "Pfefferminze", "herbaceous",
{"food_uses": "Leaves (tea, culinary)", "medicinal_uses": "Digestive, headache relief", "invasiveness": "spreading"}),
("Rosmarinus officinalis", "Lamiaceae", "rosemary", "Rosmarin", "herbaceous",
{"food_uses": "Leaves", "attracts_pollinators": True}),
("Anethum graveolens", "Apiaceae", "dill", "Dill", "herbaceous",
{"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
("Coriandrum sativum", "Apiaceae", "coriander", "Koriander", "herbaceous",
{"food_uses": "Leaves, seeds", "attracts_beneficial_insects": True}),
("Artemisia absinthium", "Asteraceae", "wormwood", "Wermut", "herbaceous",
{"medicinal_uses": "Digestive, anti-parasitic", "other_uses": "Companion plant pest deterrent", "allelopathic": True}),
("Achillea millefolium", "Asteraceae", "yarrow", "Schafgarbe", "herbaceous",
{"food_uses": "Young leaves (salad)", "medicinal_uses": "Wound healing, anti-inflammatory",
"dynamic_accumulator": True, "dynamic_accumulator_nutrients": "K, P, Cu",
"attracts_beneficial_insects": True, "attracts_pollinators": True}),
("Hypericum perforatum", "Hypericaceae", "St John's wort", "Johanniskraut", "herbaceous",
{"medicinal_uses": "Antidepressant, wound healing", "attracts_pollinators": True}),
("Echinacea purpurea", "Asteraceae", "echinacea", "Sonnenhut", "herbaceous",
{"medicinal_uses": "Immune stimulant", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
("Valeriana officinalis", "Caprifoliaceae", "valerian", "Baldrian", "herbaceous",
{"medicinal_uses": "Sedative, sleep aid", "attracts_pollinators": True,
"other_uses": "Earthworm attractant (biodynamic)"}),
# Flowers & cover crops
("Tagetes patula", "Asteraceae", "French marigold", "Studentenblume", "herbaceous",
{"other_uses": "Nematode suppression, companion plant", "attracts_pollinators": True}),
("Helianthus annuus", "Asteraceae", "sunflower", "Sonnenblume", "herbaceous",
{"food_uses": "Seeds, oil", "attracts_pollinators": True, "wildlife_value": "Seeds for birds"}),
("Tropaeolum majus", "Tropaeolaceae", "nasturtium", "Kapuzinerkresse", "ground_cover",
{"food_uses": "Leaves, flowers, seeds (capers)", "other_uses": "Trap crop for aphids"}),
("Centaurea cyanus", "Asteraceae", "cornflower", "Kornblume", "herbaceous",
{"food_uses": "Flowers (edible garnish)", "attracts_pollinators": True, "attracts_beneficial_insects": True}),
("Sinapis alba", "Brassicaceae", "white mustard", "Weißer Senf", "herbaceous",
{"food_uses": "Seeds, young leaves", "other_uses": "Green manure, biofumigant"}),
("Trifolium repens", "Fabaceae", "white clover", "Weißklee", "ground_cover",
{"nitrogen_fixer": True, "food_uses": "Flowers (tea), young leaves",
"ground_cover_quality": "excellent", "attracts_pollinators": True}),
("Medicago sativa", "Fabaceae", "alfalfa", "Luzerne", "herbaceous",
{"nitrogen_fixer": True, "food_uses": "Sprouts",
"dynamic_accumulator": True, "dynamic_accumulator_nutrients": "N, K, Ca, Mg, Fe",
"other_uses": "Green manure, deep-rooting soil improver"}),
# Fruit / Trees
("Prunus avium", "Rosaceae", "sweet cherry", "Süßkirsche", "canopy",
{"food_uses": "Fruit", "attracts_pollinators": True, "wildlife_value": "Fruit for birds"}),
("Prunus cerasus", "Rosaceae", "sour cherry", "Sauerkirsche", "understory",
{"food_uses": "Fruit (cooking, preserves)", "attracts_pollinators": True}),
("Pyrus communis", "Rosaceae", "pear", "Birne", "canopy",
{"food_uses": "Fruit", "attracts_pollinators": True}),
("Ribes uva-crispa", "Grossulariaceae", "gooseberry", "Stachelbeere", "shrub",
{"food_uses": "Berries"}),
("Rubus fruticosus", "Rosaceae", "blackberry", "Brombeere", "shrub",
{"food_uses": "Berries, leaves (tea)", "attracts_pollinators": True,
"wildlife_value": "Berries for birds, nesting habitat", "invasiveness": "spreading"}),
("Vaccinium myrtillus", "Ericaceae", "bilberry", "Heidelbeere", "shrub",
{"food_uses": "Berries", "medicinal_uses": "Antioxidant, eye health"}),
("Hippophae rhamnoides", "Elaeagnaceae", "sea buckthorn", "Sanddorn", "shrub",
{"nitrogen_fixer": True, "food_uses": "Berries (juice, oil)",
"medicinal_uses": "High vitamin C, skin care",
"other_uses": "Erosion control, windbreak"}),
("Morus alba", "Moraceae", "white mulberry", "Weiße Maulbeere", "canopy",
{"food_uses": "Fruit, young leaves", "wildlife_value": "Fruit for birds"}),
]
def main():
# 1. Load existing families
print("=== Loading existing families ===")
fam_resp = api_get("/families?per_page=100")
family_map = {} # name_scientific -> id
for f in fam_resp["data"]:
family_map[f["name_scientific"]] = f["id"]
print(f" Found {len(family_map)} existing families")
# 2. Create missing families
print("\n=== Creating missing families ===")
families_created = 0
for fam_name, fam_info in FAMILIES_NEEDED.items():
if fam_name in family_map:
print(f" SKIP (exists): {fam_name}")
continue
payload = {
"name_scientific": fam_name,
"name_en": fam_info["name_en"],
"name_de": fam_info["name_de"],
}
print(f" CREATE: {fam_name} ...", end=" ")
result, status = api_post("/families", payload)
if result and "id" in result:
family_map[fam_name] = result["id"]
print(f"OK ({result['id']})")
families_created += 1
else:
print(f"FAILED (status={status})")
time.sleep(DELAY)
print(f"\n Families created: {families_created}")
# 3. Load existing species
print("\n=== Loading existing species ===")
sp_resp = api_get("/species?per_page=200")
existing_species = set()
for s in sp_resp["data"]:
existing_species.add(s["name_scientific"])
print(f" Found {len(existing_species)} existing species")
# 4. Add new species
print("\n=== Adding new species ===")
created = 0
skipped = 0
failed = 0
for sci_name, family, name_en, name_de, plant_layer, extras in SPECIES:
if sci_name in existing_species:
print(f" SKIP (exists): {sci_name}")
skipped += 1
continue
# Look up family ID
fam_id = family_map.get(family)
if not fam_id:
print(f" SKIP (no family '{family}'): {sci_name}")
failed += 1
continue
# Try GBIF for German name
gbif_de = gbif_get_german_name(sci_name)
if gbif_de:
print(f" GBIF name for {sci_name}: {gbif_de}")
# Use GBIF name if it differs (prefer catalog name as primary, GBIF as validation)
# Keep our curated name_de but log the GBIF one
payload = {
"name_scientific": sci_name,
"family_id": fam_id,
"name_en": name_en,
"name_de": name_de,
"plant_layer": plant_layer,
}
# Add extra fields
for k, v in extras.items():
payload[k] = v
print(f" CREATE: {sci_name} ({name_de}) ...", end=" ")
result, status = api_post("/species", payload)
if result and "id" in result:
print(f"OK ({result['id']})")
created += 1
else:
print(f"FAILED (status={status})")
failed += 1
time.sleep(DELAY)
print(f"\n{'='*50}")
print(f"SUMMARY")
print(f" Families created: {families_created}")
print(f" Species created: {created}")
print(f" Species skipped: {skipped}")
print(f" Species failed: {failed}")
print(f" Total species now: {len(existing_species) + created}")
if __name__ == "__main__":
main()