Enrich: NaturaDB writes structured wildlife fields + full pagination; min_temp from USDA zone
- scrape_naturadb.py: write structured count fields (nectar/pollen/bee/ butterfly/caterpillar/hoverfly/beetle/bird/mammal), native_status, naturadb_tags (not just the text wildlife_value); paginate all species; env-overridable base/token; only fill empty fields. - enrich_botanical.py: derive min_temp from USDA hardiness zone min temp.
This commit is contained in:
@@ -7,6 +7,8 @@ scraping, just botanical facts:
|
||||
equals hardiness_zone_usda.
|
||||
* nitrogen_fixer — true for Fabaceae (legumes) and actinorhizal genera
|
||||
(Frankia symbiosis), false otherwise. This is family/genus-level botany.
|
||||
* min_temp — cold hardiness in °C, derived from the lower USDA zone
|
||||
in hardiness_zone_usda (each zone has a standardized minimum temperature).
|
||||
|
||||
Idempotent: GET full species -> merge only missing/derivable fields -> PUT.
|
||||
Run: HERBAPI_TOKEN=... python3 enrich_botanical.py [--base URL] [--dry-run]
|
||||
@@ -61,6 +63,24 @@ def fixes_nitrogen(family_slug, scientific):
|
||||
return family_slug == "fabaceae" or genus in ACTINORHIZAL_GENERA
|
||||
|
||||
|
||||
# Standardized USDA hardiness-zone minimum temperatures (°C), rounded.
|
||||
USDA_ZONE_MIN_C = {
|
||||
1: -51, 2: -46, 3: -40, 4: -34, 5: -29, 6: -23, 7: -18,
|
||||
8: -12, 9: -7, 10: -1, 11: 4, 12: 10, 13: 16,
|
||||
}
|
||||
|
||||
|
||||
def min_temp_from_zone(zone_str):
|
||||
"""Lower bound of the coldest zone in e.g. '5-9' -> -29.0 (°C)."""
|
||||
if not zone_str:
|
||||
return None
|
||||
first = str(zone_str).split("-")[0].strip()
|
||||
digits = "".join(c for c in first if c.isdigit())
|
||||
if not digits:
|
||||
return None
|
||||
return float(USDA_ZONE_MIN_C.get(int(digits))) if int(digits) in USDA_ZONE_MIN_C else None
|
||||
|
||||
|
||||
def main():
|
||||
global BASE
|
||||
ap = argparse.ArgumentParser()
|
||||
@@ -75,7 +95,7 @@ def main():
|
||||
species = all_species()
|
||||
print(f"{len(species)} species, {len(families)} families")
|
||||
|
||||
changed = {"hardiness_zone_at": 0, "nitrogen_fixer": 0}
|
||||
changed = {"hardiness_zone_at": 0, "nitrogen_fixer": 0, "min_temp": 0}
|
||||
for s in species:
|
||||
updates = {}
|
||||
if not s.get("hardiness_zone_at") and s.get("hardiness_zone_usda"):
|
||||
@@ -84,6 +104,10 @@ def main():
|
||||
updates["nitrogen_fixer"] = fixes_nitrogen(
|
||||
families.get(s["family_id"]), s.get("name_scientific")
|
||||
)
|
||||
if s.get("min_temp") is None:
|
||||
mt = min_temp_from_zone(s.get("hardiness_zone_usda"))
|
||||
if mt is not None:
|
||||
updates["min_temp"] = mt
|
||||
if not updates:
|
||||
continue
|
||||
for k in updates:
|
||||
|
||||
Reference in New Issue
Block a user