Files
herbapi/tools/enrichment/seed_interactions.py
T
florian.berthold 963f307cfc Add cross-domain interactions (migration 017)
Generic directed links between plant/fungus/insect entities (subject_kind/slug,
predicate, object_kind/slug). Predicates: pollinates, feeds_on, preys_on,
parasitizes, attracts, mycorrhizal_with, grows_on. Interaction model,
db/interactions.rs (flexible filtered query + per-entity lookup, upsert),
/api/v1/interactions + /{kind}/{ref}/interactions routes. Wired into
stats/export/llms. seed_interactions.py: derives preys_on links + curated
pollinator/mycorrhizal/pest links (existence-checked).
2026-06-05 22:01:56 +02:00

123 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
Seed cross-domain interactions. Two sources:
1. Derived: each insect's `preys_on` list -> insect--preys_on-->insect.
2. Curated: well-documented pollinator/mycorrhizal/pest links — created ONLY
where both entities already exist in the DB (existence-checked, idempotent).
Run: HERBAPI_TOKEN=... python3 seed_interactions.py
"""
import json
import os
import urllib.error
import urllib.request
BASE = os.environ.get("HERBAPI_BASE", "http://herbapi01.corp.sub-net.at:8080/api/v1")
TOKEN = os.environ.get("HERBAPI_TOKEN", "")
def api(path, method="GET", data=None):
r = urllib.request.Request(
BASE + path,
data=json.dumps(data).encode() if data is not None else None,
method=method,
)
r.add_header("Authorization", "Bearer " + TOKEN)
r.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(r) as resp:
return resp.status, json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
return e.code, e.read().decode()[:200]
def all_slugs(path):
out, page = set(), 1
while True:
chunk = api(f"{path}?per_page=100&page={page}")[1]["data"]
if not chunk:
break
out.update(c["slug"] for c in chunk)
if len(chunk) < 100:
break
page += 1
return out
# Curated, widely-documented links: (subj_kind, subj, predicate, obj_kind, obj)
CURATED = [
# pollinators -> nectar/forage plants
("insect", "apis-mellifera", "pollinates", "species", "trifolium-pratense"),
("insect", "apis-mellifera", "pollinates", "species", "trifolium-repens"),
("insect", "apis-mellifera", "pollinates", "species", "borago-officinalis"),
("insect", "apis-mellifera", "pollinates", "species", "phacelia-tanacetifolia"),
("insect", "apis-mellifera", "pollinates", "species", "fagopyrum-esculentum"),
("insect", "apis-mellifera", "pollinates", "species", "helianthus-annuus"),
("insect", "apis-mellifera", "pollinates", "species", "calendula-officinalis"),
("insect", "bombus-terrestris", "pollinates", "species", "trifolium-pratense"),
("insect", "bombus-terrestris", "pollinates", "species", "solanum-lycopersicum"),
("insect", "bombus-terrestris", "pollinates", "species", "vicia-faba"),
("insect", "osmia-bicornis", "pollinates", "species", "malus-domestica"),
("insect", "osmia-bicornis", "pollinates", "species", "prunus-avium"),
("insect", "episyrphus-balteatus", "pollinates", "species", "calendula-officinalis"),
("insect", "episyrphus-balteatus", "pollinates", "species", "phacelia-tanacetifolia"),
# mycorrhizal fungi -> host trees
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "quercus-robur"),
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "fagus-sylvatica"),
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "picea-abies"),
("fungus", "cantharellus-cibarius", "mycorrhizal_with", "species", "fagus-sylvatica"),
("fungus", "cantharellus-cibarius", "mycorrhizal_with", "species", "betula-pendula"),
("fungus", "amanita-phalloides", "mycorrhizal_with", "species", "quercus-robur"),
# saprobic/parasitic fungi -> host wood
("fungus", "grifola-frondosa", "grows_on", "species", "quercus-robur"),
("fungus", "laetiporus-sulphureus", "grows_on", "species", "quercus-robur"),
# pest insects -> host crops
("insect", "leptinotarsa-decemlineata", "feeds_on", "species", "solanum-tuberosum"),
("insect", "pieris-brassicae", "feeds_on", "species", "brassica-oleracea"),
("insect", "plutella-xylostella", "feeds_on", "species", "brassica-oleracea"),
("insect", "delia-radicum", "feeds_on", "species", "brassica-oleracea"),
("insect", "phyllotreta-nemorum", "feeds_on", "species", "brassica-oleracea"),
("insect", "myzus-persicae", "feeds_on", "species", "prunus-persica"),
("insect", "myzus-persicae", "feeds_on", "species", "capsicum-annuum"),
]
def main():
if not TOKEN:
raise SystemExit("HERBAPI_TOKEN not set")
slugs = {
"species": all_slugs("/species"),
"fungus": all_slugs("/fungi"),
"insect": all_slugs("/insects"),
}
print(f"slugs: species={len(slugs['species'])} fungi={len(slugs['fungus'])} insects={len(slugs['insect'])}")
links = []
# 1. derived preys_on from insects
insects = api("/insects?per_page=100")[1]["data"]
for ins in insects:
for prey in ins.get("preys_on") or []:
if prey in slugs["insect"]:
links.append(("insect", ins["slug"], "preys_on", "insect", prey))
# 2. curated, existence-checked
for s_kind, s, pred, o_kind, o in CURATED:
if s in slugs.get(s_kind, set()) and o in slugs.get(o_kind, set()):
links.append((s_kind, s, pred, o_kind, o))
created = errors = 0
for s_kind, s, pred, o_kind, o in links:
body = {"subject_kind": s_kind, "subject_slug": s, "predicate": pred,
"object_kind": o_kind, "object_slug": o}
code, resp = api("/interactions", "POST", body)
if code == 200:
print(f" {s_kind}:{s} --{pred}--> {o_kind}:{o}")
created += 1
else:
print(f" ERROR {s} {pred} {o}: {code} {resp}")
errors += 1
print(f"\ncandidates={len(links)} created/upserted={created} errors={errors}")
if __name__ == "__main__":
main()