#!/usr/bin/env python3 """ Fetch keystone.guru tiles + enemy data for every classic dungeon listed in data/kg_dungeons.json. Outputs: data/kg//floor/z4/_.png raw tiles data/kg//_split_floors.js data/kg//_lang.js The compiled-data path includes a build hash; we discover it once from a known route page and use it for every fetch in this run. """ from __future__ import annotations import argparse import concurrent.futures import json import re import sys import time import urllib.request from pathlib import Path ROOT = Path(__file__).resolve().parent.parent KG_OUT = ROOT / "data" / "kg" REGISTRY = ROOT / "data" / "kg_dungeons.json" UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36" TILE_BASE = "https://assets.keystone.guru/tiles" DATA_BASE = "https://assets.keystone.guru/compiled" ROUTE_PROBE_URL = "https://aws.keystone.guru/route/razorfen-downs/2bhiRi8/ascension-m-rfd/1" HASH_RE = re.compile(r"compiled/([0-9a-f]{40})/") def http_get(url: str, timeout: int = 15) -> bytes: req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=timeout) as r: return r.read() def http_head_ok(url: str, timeout: int = 5) -> bool: req = urllib.request.Request(url, headers={"User-Agent": UA}, method="HEAD") try: with urllib.request.urlopen(req, timeout=timeout) as r: return r.status == 200 except Exception: return False def discover_compiled_hash() -> str: html = http_get(ROUTE_PROBE_URL).decode("utf-8", errors="replace") m = HASH_RE.search(html) if not m: raise RuntimeError("could not find compiled-asset hash on route page") return m.group(1) def discover_floors(tile_key: str, expansion: str, max_zoom: int = 4) -> list[int]: """Probe floor numbers 1..N until the first miss. Uses zoom=1 (the lowest-zoom layer kg ships) for the existence check.""" floors = [] for f in range(1, 20): if http_head_ok(f"{TILE_BASE}/{expansion}/{tile_key}/{f}/1/0_0.png"): floors.append(f) else: break return floors def discover_grid(tile_key: str, expansion: str, floor: int, z: int) -> tuple[int, int]: """Find max x and max y at given zoom.""" max_x = 0 while http_head_ok(f"{TILE_BASE}/{expansion}/{tile_key}/{floor}/{z}/{max_x + 1}_0.png"): max_x += 1 max_y = 0 while http_head_ok(f"{TILE_BASE}/{expansion}/{tile_key}/{floor}/{z}/0_{max_y + 1}.png"): max_y += 1 return max_x + 1, max_y + 1 # counts (cols, rows) def fetch_tile(args) -> tuple[Path, bool]: url, dest = args if dest.exists() and dest.stat().st_size > 0: return dest, True dest.parent.mkdir(parents=True, exist_ok=True) try: data = http_get(url, timeout=30) if not data or data[:4] != b"\x89PNG": return dest, False dest.write_bytes(data) return dest, True except Exception: return dest, False def fetch_dungeon_tiles(d: dict, expansion: str, max_zoom: int, workers: int) -> dict: """For one dungeon, discover floors + grid, parallel-download all tiles.""" tile_key = d["tile_key"] name = d["name"] out_root = KG_OUT / tile_key out_root.mkdir(parents=True, exist_ok=True) floors = discover_floors(tile_key, expansion) info = {"tile_key": tile_key, "name": name, "expansion": expansion, "max_zoom": max_zoom, "floors": []} if not floors: print(f" WARN no floors for {tile_key}", file=sys.stderr) return info jobs = [] for f in floors: cols, rows = discover_grid(tile_key, expansion, f, max_zoom) info["floors"].append({"index": f, "cols": cols, "rows": rows}) floor_dir = out_root / f"floor{f}" / f"z{max_zoom}" for x in range(cols): for y in range(rows): url = f"{TILE_BASE}/{expansion}/{tile_key}/{f}/{max_zoom}/{x}_{y}.png" dest = floor_dir / f"{x}_{y}.png" jobs.append((url, dest)) ok = 0 with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool: for path, success in pool.map(fetch_tile, jobs): if success: ok += 1 print(f" {tile_key}: {ok}/{len(jobs)} tiles, floors={[f['index'] for f in info['floors']]}, " f"grid=" + ", ".join(f"f{f['index']}:{f['cols']}x{f['rows']}" for f in info["floors"])) return info def fetch_dungeon_data(d: dict, compiled_hash: str) -> bool: """Download split_floors.js + en_US.js for one dungeon.""" if not d.get("data_slug") or not d.get("mapping_id"): return False slug = d["data_slug"]; mid = d["mapping_id"]; tile_key = d["tile_key"] out_root = KG_OUT / tile_key out_root.mkdir(parents=True, exist_ok=True) splits_url = f"{DATA_BASE}/{compiled_hash}/mapcontext/data/{slug}/{mid}/split_floors.js" lang_url = f"{DATA_BASE}/{compiled_hash}/mapcontext/data/{slug}/en_US.js" try: (out_root / "split_floors.js").write_bytes(http_get(splits_url)) (out_root / "lang.js").write_bytes(http_get(lang_url)) return True except Exception as e: print(f" data fetch failed for {tile_key}: {e}", file=sys.stderr) return False def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--workers", type=int, default=24) ap.add_argument("--zoom", type=int, default=4) ap.add_argument("--dungeon", help="only fetch this tile_key") args = ap.parse_args() registry = json.loads(REGISTRY.read_text()) expansion = registry.get("_expansion", "classic") compiled_hash = discover_compiled_hash() print(f"compiled hash: {compiled_hash}") targets = registry["dungeons"] if args.dungeon: targets = [d for d in targets if d["tile_key"] == args.dungeon] if not targets: print(f"no dungeon with tile_key={args.dungeon}", file=sys.stderr) return 2 summary = {"compiled_hash": compiled_hash, "dungeons": []} for d in targets: print(f"==> {d['name']} ({d['tile_key']})") info = fetch_dungeon_tiles(d, expansion, args.zoom, args.workers) info["data_fetched"] = fetch_dungeon_data(d, compiled_hash) summary["dungeons"].append(info) (KG_OUT / "_summary.json").write_text(json.dumps(summary, indent=2)) print(f"\nwrote {KG_OUT}/_summary.json — {len(summary['dungeons'])} dungeons") return 0 if __name__ == "__main__": raise SystemExit(main())