Desynth page and improved item info api. Added string substitution to utils.

2025-07-10 03:20:33 +01:00
parent b9b47c96f6
commit ef9b64adfe
38 changed files with 5703 additions and 4489 deletions
--- a/scripts/convert_hq_entries.py
+++ b/scripts/convert_hq_entries.py
@@ -0,0 +1,78 @@
+import json
+from pathlib import Path
+
+
+def convert_hq_entries(csv_path: Path, backup: bool = True) -> None:
+    """Convert HQ1:,HQ2:,HQ3: columns in the desynthesis recipes CSV into a
+    single JSON-like dictionary string stored in the HQ column.
+
+    The CSV currently looks like::
+        Item,Crystal,Ingredients,HQ,Cap
+        Distilled Water x3,Lightning,Tahrongi Cactus,HQ1: Distilled Water x6,HQ2: Distilled Water x9,HQ3: Distilled Waterx12,2
+
+    After conversion it will be::
+        Distilled Water x3,Lightning,Tahrongi Cactus,{"HQ1":"Distilled Water x6","HQ2":"Distilled Water x9","HQ3":"Distilled Waterx12"},2
+    """
+    csv_path = Path(csv_path)
+    if not csv_path.exists():
+        raise FileNotFoundError(csv_path)
+
+    text = csv_path.read_text(encoding="utf-8").splitlines()
+    output_lines = []
+
+    for line in text:
+        # Keep skill category lines (e.g., "Alchemy") unchanged.
+        if "," not in line:
+            output_lines.append(line)
+            continue
+
+        parts = [p.strip() for p in line.split(",")]
+
+        # The header line already has correct length (5).
+        if parts[:5] == ["Item", "Crystal", "Ingredients", "HQ", "Cap"]:
+            output_lines.append(",".join(parts))
+            continue
+
+        # If this row already has 5 columns, leave as-is.
+        if len(parts) <= 5:
+            output_lines.append(",".join(parts))
+            continue
+
+        # Otherwise consolidate HQ columns.
+        item, crystal, ingredients = parts[:3]
+        cap = parts[-1]
+        hq_parts = parts[3:-1]
+
+        hq_dict = {}
+        unnamed_counter = 1
+        for h in hq_parts:
+            h = h.strip()
+            if not h:
+                continue
+            if ":" in h:
+                key, value = h.split(":", 1)
+                hq_dict[key.strip()] = value.strip()
+            else:
+                # Handle unlabeled HQ values by assigning sequential keys.
+                key = f"HQ{unnamed_counter}"
+                unnamed_counter += 1
+                hq_dict[key] = h
+
+        # Build dictionary string with spaces after commas to match example formatting.
+        hq_json_readable = "{" + ",".join([f'"{k}": "{v}"' if "\"" not in v else f'"{k}": {json.dumps(v)}' for k, v in hq_dict.items()]) + "}"
+
+        new_line = ",".join([item, crystal, ingredients, hq_json_readable, cap])
+        output_lines.append(new_line)
+
+    # Backup original file.
+    if backup:
+        backup_path = csv_path.with_suffix(csv_path.suffix + ".bak")
+        if not backup_path.exists():
+            backup_path.write_text("\n".join(text), encoding="utf-8")
+
+    csv_path.write_text("\n".join(output_lines) + "\n", encoding="utf-8")
+
+
+if __name__ == "__main__":
+    target = Path(__file__).resolve().parents[1] / "datasets" / "desythesis_recipes.csv"
+    convert_hq_entries(target)
--- a/scripts/load_desynth_recipes_to_db.py
+++ b/scripts/load_desynth_recipes_to_db.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""Load datasets/desythesis_recipes.csv into PostgreSQL.
+
+This script parses the *desynthesis* recipe CSV which is structured slightly
+differently from the v2 crafting CSVs.  Recipes are grouped under craft
+headings (e.g. "Alchemy", "Smithing"), followed by a header row.
+
+Recent edits mean each recipe row now lists **multiple HQ columns (HQ1, HQ2, HQ3)**
+_directly_ in the CSV instead of a single JSON cell.  A typical section now looks
+like::
+
+    Alchemy
+    Item,Crystal,Ingredients,HQ1,HQ2,HQ3,Cap
+    Distilled Water x3,Lightning,Tahrongi Cactus,HQ1: Distilled Water x6,HQ2: Distilled Water x9,HQ3: Distilled Water x12,2
+
+Some legacy sections may still use the shorter header ``Item,Crystal,Ingredients,HQ,Cap``
+with the HQ values spread across several columns.  Pragmatically we treat **all
+columns between ``Ingredients`` and the final ``Cap`` column as HQ fields** and
+extract at most three of them (hq1-3) for insertion into Postgres.
+
+The resulting database table schema is::
+
+    CREATE TABLE recipes_desynthesis (
+        id SERIAL PRIMARY KEY,
+        craft TEXT NOT NULL,
+        cap INT,
+        item TEXT NOT NULL,
+        crystal TEXT NOT NULL,
+        ingredients TEXT NOT NULL,
+        hq1 TEXT,
+        hq2 TEXT,
+        hq3 TEXT
+    );
+
+Run:
+    python scripts/load_desynth_recipes_to_db.py
+"""
+from __future__ import annotations
+
+import asyncio
+import csv
+import json
+import pathlib
+import re
+from typing import Dict, List, Tuple, Optional
+
+import asyncpg
+
+PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
+CONF_PATH = PROJECT_ROOT / "db.conf"
+CSV_PATH = PROJECT_ROOT / "datasets" / "desythesis_recipes.csv"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+RE_CONF = re.compile(r"^([A-Z0-9_]+)=(.*)$")
+
+
+def parse_db_conf(path: pathlib.Path) -> Dict[str, str]:
+    """Simple KEY=VALUE parser (quotes stripped)."""
+    if not path.exists():
+        raise FileNotFoundError("db.conf not found")
+    conf: Dict[str, str] = {}
+    for line in path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        m = RE_CONF.match(line)
+        if m:
+            k, v = m.group(1), m.group(2).strip().strip("'\"")
+            conf[k] = v
+    required = {"PSQL_HOST", "PSQL_PORT", "PSQL_USER", "PSQL_PASSWORD", "PSQL_DBNAME"}
+    missing = required - conf.keys()
+    if missing:
+        raise RuntimeError(f"Missing keys in db.conf: {', '.join(sorted(missing))}")
+    return conf
+
+
+def parse_csv(csv_path: pathlib.Path) -> List[Tuple[str, Optional[int], str, str, str, Optional[str], Optional[str], Optional[str]]]:
+    """Parse the *desythesis_recipes.csv* file and return rows for COPY.
+
+    The parser is tolerant of the two currently-seen layouts:
+
+    1. ``Item,Crystal,Ingredients,HQ1,HQ2,HQ3,Cap``
+    2. ``Item,Crystal,Ingredients,HQ,Cap`` (legacy header but still multiple HQ
+       columns in the data rows).
+
+    The strategy is therefore:
+        • first three columns are *always* Item, Crystal, Ingredients.
+        • *last* column is CAP.
+        • everything between is treated as HQ fields – the first three of those
+          (if present) are saved as hq1-3.
+    """
+
+    rows: List[Tuple[str, Optional[int], str, str, str, Optional[str], Optional[str], Optional[str]]] = []
+    current_craft: Optional[str] = None
+
+    with csv_path.open(newline="", encoding="utf-8") as fh:
+        reader = csv.reader(fh)
+        for raw in reader:
+            # ------------------------------------------------------------------
+            # Detect craft headings (single-cell rows, e.g. "Alchemy")
+            # ------------------------------------------------------------------
+            if len(raw) == 1:
+                current_craft = raw[0].strip()
+                continue
+
+            # Skip blank lines or header rows
+            if not raw or raw[0].strip().startswith("Item") or current_craft is None:
+                continue
+
+            if len(raw) < 4:
+                # Not enough columns for a valid recipe – skip
+                continue
+
+            # Standard columns
+            item = raw[0].strip()
+            crystal = raw[1].strip()
+            ingredients = raw[2].strip()
+
+            # CAP is *always* the final column
+            cap_raw = raw[-1].strip()
+            try:
+                cap = int(cap_raw) if cap_raw.isdigit() else None
+            except ValueError:
+                cap = None
+
+            # HQ columns: everything between ingredients and cap
+            hq_columns = [c.strip() for c in raw[3:-1]]
+            hq1 = hq_columns[0] if len(hq_columns) > 0 and hq_columns[0] else None
+            hq2 = hq_columns[1] if len(hq_columns) > 1 and hq_columns[1] else None
+            hq3 = hq_columns[2] if len(hq_columns) > 2 and hq_columns[2] else None
+
+            # Clean prefixes like "HQ1: "
+            def _clean(hq_val: Optional[str]) -> Optional[str]:
+                if hq_val and ":" in hq_val:
+                    return hq_val.split(":", 1)[1].strip()
+                return hq_val
+
+            hq1, hq2, hq3 = map(_clean, (hq1, hq2, hq3))
+
+            rows.append((current_craft, cap, item, crystal, ingredients, hq1, hq2, hq3))
+
+    return rows
+
+
+async def recreate_table(conn: asyncpg.Connection) -> None:
+    await conn.execute(
+        """
+        DROP TABLE IF EXISTS recipes_desynthesis;
+        CREATE TABLE recipes_desynthesis (
+            id SERIAL PRIMARY KEY,
+            craft TEXT NOT NULL,
+            cap INT,
+            item TEXT NOT NULL,
+            crystal TEXT NOT NULL,
+            ingredients TEXT NOT NULL,
+            hq1 TEXT,
+            hq2 TEXT,
+            hq3 TEXT
+        );
+        """
+    )
+
+
+async def copy_rows(conn: asyncpg.Connection, rows):
+    await conn.copy_records_to_table(
+        "recipes_desynthesis",
+        records=rows,
+        columns=[
+            "craft",
+            "cap",
+            "item",
+            "crystal",
+            "ingredients",
+            "hq1",
+            "hq2",
+            "hq3",
+        ],
+    )
+
+
+async def main() -> None:
+    if not CSV_PATH.exists():
+        raise SystemExit("CSV file not found – run conversion first")
+
+    conf = parse_db_conf(CONF_PATH)
+    rows = parse_csv(CSV_PATH)
+    print(f"Parsed {len(rows)} recipes from CSV.")
+
+    conn = await asyncpg.connect(
+        host=conf["PSQL_HOST"],
+        port=int(conf["PSQL_PORT"]),
+        user=conf["PSQL_USER"],
+        password=conf["PSQL_PASSWORD"],
+        database=conf["PSQL_DBNAME"],
+    )
+    try:
+        await recreate_table(conn)
+        await copy_rows(conn, rows)
+        print("Loaded recipes_desynthesis table.")
+    finally:
+        await conn.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/load_inventory_to_db.py
+++ b/scripts/load_inventory_to_db.py
@@ -82,6 +82,7 @@ async def ensure_inventory_table(conn: asyncpg.Connection) -> None:
            storage_type TEXT NOT NULL,
            item_name TEXT NOT NULL,
            quantity INT NOT NULL,
+            item_id INT,
            last_updated TIMESTAMPTZ DEFAULT NOW()
        );
        """
@@ -93,7 +94,13 @@ async def truncate_inventory(conn: asyncpg.Connection) -> None:
    await conn.execute("TRUNCATE TABLE inventory;")


-async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, str, int]]) -> None:
+async def fetch_item_ids(conn: asyncpg.Connection, item_names: List[str]) -> Dict[str, int]:
+    """Fetch item IDs from the database."""
+    rows = await conn.fetch("SELECT id, name FROM all_items WHERE name = ANY($1::text[])", item_names)
+    return {row["name"]: row["id"] for row in rows}
+
+
+async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, str, int, int, _dt.datetime]]) -> None:
    """Bulk copy the parsed CSV rows into the DB using ``copy_records_to_table``."""
    await conn.copy_records_to_table(
        "inventory",
@@ -102,6 +109,7 @@ async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, st
            "character_name",
            "storage_type",
            "item_name",
+            "item_id",
            "quantity",
            "last_updated",
        ],
@@ -130,15 +138,24 @@ async def load_inventory(csv_path: pathlib.Path) -> None:
        await truncate_inventory(conn)

        # Parse CSV
-        rows: List[Tuple[str, str, str, int]] = []
+        rows: List[Tuple[str, str, str, int, int]] = []
        with csv_path.open(newline="", encoding="utf-8") as f:
            reader = csv.DictReader(f, delimiter=";", quotechar='"')
+            names_set = set()
+            for r in reader:
+                names_set.add(r["item"].strip())
+            # fetch ids
+            id_rows = await conn.fetch("SELECT id,name FROM all_items WHERE name = ANY($1::text[])", list(names_set))
+            id_map = {row["name"]: row["id"] for row in id_rows}
+            f.seek(0)
+            next(reader)  # skip header again
            for r in reader:
                char = r["char"].strip()
                storage = r["storage"].strip()
                item = r["item"].strip()
                qty = int(r["quantity"].strip()) if r["quantity"].strip() else 0
-                rows.append((char, storage, item, qty, _dt.datetime.utcnow()))
+                item_id = id_map.get(item)
+                rows.append((char, storage, item, item_id, qty, _dt.datetime.utcnow()))

        await copy_csv_to_db(conn, rows)
        print(f"Inserted {len(rows)} inventory rows.")