963f307cfc
Generic directed links between plant/fungus/insect entities (subject_kind/slug,
predicate, object_kind/slug). Predicates: pollinates, feeds_on, preys_on,
parasitizes, attracts, mycorrhizal_with, grows_on. Interaction model,
db/interactions.rs (flexible filtered query + per-entity lookup, upsert),
/api/v1/interactions + /{kind}/{ref}/interactions routes. Wired into
stats/export/llms. seed_interactions.py: derives preys_on links + curated
pollinator/mycorrhizal/pest links (existence-checked).
123 lines
5.3 KiB
Python
123 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Seed cross-domain interactions. Two sources:
|
|
1. Derived: each insect's `preys_on` list -> insect--preys_on-->insect.
|
|
2. Curated: well-documented pollinator/mycorrhizal/pest links — created ONLY
|
|
where both entities already exist in the DB (existence-checked, idempotent).
|
|
|
|
Run: HERBAPI_TOKEN=... python3 seed_interactions.py
|
|
"""
|
|
import json
|
|
import os
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
BASE = os.environ.get("HERBAPI_BASE", "http://herbapi01.corp.sub-net.at:8080/api/v1")
|
|
TOKEN = os.environ.get("HERBAPI_TOKEN", "")
|
|
|
|
|
|
def api(path, method="GET", data=None):
|
|
r = urllib.request.Request(
|
|
BASE + path,
|
|
data=json.dumps(data).encode() if data is not None else None,
|
|
method=method,
|
|
)
|
|
r.add_header("Authorization", "Bearer " + TOKEN)
|
|
r.add_header("Content-Type", "application/json")
|
|
try:
|
|
with urllib.request.urlopen(r) as resp:
|
|
return resp.status, json.loads(resp.read().decode())
|
|
except urllib.error.HTTPError as e:
|
|
return e.code, e.read().decode()[:200]
|
|
|
|
|
|
def all_slugs(path):
|
|
out, page = set(), 1
|
|
while True:
|
|
chunk = api(f"{path}?per_page=100&page={page}")[1]["data"]
|
|
if not chunk:
|
|
break
|
|
out.update(c["slug"] for c in chunk)
|
|
if len(chunk) < 100:
|
|
break
|
|
page += 1
|
|
return out
|
|
|
|
|
|
# Curated, widely-documented links: (subj_kind, subj, predicate, obj_kind, obj)
|
|
CURATED = [
|
|
# pollinators -> nectar/forage plants
|
|
("insect", "apis-mellifera", "pollinates", "species", "trifolium-pratense"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "trifolium-repens"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "borago-officinalis"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "phacelia-tanacetifolia"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "fagopyrum-esculentum"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "helianthus-annuus"),
|
|
("insect", "apis-mellifera", "pollinates", "species", "calendula-officinalis"),
|
|
("insect", "bombus-terrestris", "pollinates", "species", "trifolium-pratense"),
|
|
("insect", "bombus-terrestris", "pollinates", "species", "solanum-lycopersicum"),
|
|
("insect", "bombus-terrestris", "pollinates", "species", "vicia-faba"),
|
|
("insect", "osmia-bicornis", "pollinates", "species", "malus-domestica"),
|
|
("insect", "osmia-bicornis", "pollinates", "species", "prunus-avium"),
|
|
("insect", "episyrphus-balteatus", "pollinates", "species", "calendula-officinalis"),
|
|
("insect", "episyrphus-balteatus", "pollinates", "species", "phacelia-tanacetifolia"),
|
|
# mycorrhizal fungi -> host trees
|
|
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "quercus-robur"),
|
|
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "fagus-sylvatica"),
|
|
("fungus", "boletus-edulis", "mycorrhizal_with", "species", "picea-abies"),
|
|
("fungus", "cantharellus-cibarius", "mycorrhizal_with", "species", "fagus-sylvatica"),
|
|
("fungus", "cantharellus-cibarius", "mycorrhizal_with", "species", "betula-pendula"),
|
|
("fungus", "amanita-phalloides", "mycorrhizal_with", "species", "quercus-robur"),
|
|
# saprobic/parasitic fungi -> host wood
|
|
("fungus", "grifola-frondosa", "grows_on", "species", "quercus-robur"),
|
|
("fungus", "laetiporus-sulphureus", "grows_on", "species", "quercus-robur"),
|
|
# pest insects -> host crops
|
|
("insect", "leptinotarsa-decemlineata", "feeds_on", "species", "solanum-tuberosum"),
|
|
("insect", "pieris-brassicae", "feeds_on", "species", "brassica-oleracea"),
|
|
("insect", "plutella-xylostella", "feeds_on", "species", "brassica-oleracea"),
|
|
("insect", "delia-radicum", "feeds_on", "species", "brassica-oleracea"),
|
|
("insect", "phyllotreta-nemorum", "feeds_on", "species", "brassica-oleracea"),
|
|
("insect", "myzus-persicae", "feeds_on", "species", "prunus-persica"),
|
|
("insect", "myzus-persicae", "feeds_on", "species", "capsicum-annuum"),
|
|
]
|
|
|
|
|
|
def main():
|
|
if not TOKEN:
|
|
raise SystemExit("HERBAPI_TOKEN not set")
|
|
slugs = {
|
|
"species": all_slugs("/species"),
|
|
"fungus": all_slugs("/fungi"),
|
|
"insect": all_slugs("/insects"),
|
|
}
|
|
print(f"slugs: species={len(slugs['species'])} fungi={len(slugs['fungus'])} insects={len(slugs['insect'])}")
|
|
|
|
links = []
|
|
# 1. derived preys_on from insects
|
|
insects = api("/insects?per_page=100")[1]["data"]
|
|
for ins in insects:
|
|
for prey in ins.get("preys_on") or []:
|
|
if prey in slugs["insect"]:
|
|
links.append(("insect", ins["slug"], "preys_on", "insect", prey))
|
|
# 2. curated, existence-checked
|
|
for s_kind, s, pred, o_kind, o in CURATED:
|
|
if s in slugs.get(s_kind, set()) and o in slugs.get(o_kind, set()):
|
|
links.append((s_kind, s, pred, o_kind, o))
|
|
|
|
created = errors = 0
|
|
for s_kind, s, pred, o_kind, o in links:
|
|
body = {"subject_kind": s_kind, "subject_slug": s, "predicate": pred,
|
|
"object_kind": o_kind, "object_slug": o}
|
|
code, resp = api("/interactions", "POST", body)
|
|
if code == 200:
|
|
print(f" {s_kind}:{s} --{pred}--> {o_kind}:{o}")
|
|
created += 1
|
|
else:
|
|
print(f" ERROR {s} {pred} {o}: {code} {resp}")
|
|
errors += 1
|
|
print(f"\ncandidates={len(links)} created/upserted={created} errors={errors}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|