Desynth page and improved item info api. Added string substitution to utils.
This commit is contained in:
78
scripts/convert_hq_entries.py
Normal file
78
scripts/convert_hq_entries.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def convert_hq_entries(csv_path: Path, backup: bool = True) -> None:
|
||||
"""Convert HQ1:,HQ2:,HQ3: columns in the desynthesis recipes CSV into a
|
||||
single JSON-like dictionary string stored in the HQ column.
|
||||
|
||||
The CSV currently looks like::
|
||||
Item,Crystal,Ingredients,HQ,Cap
|
||||
Distilled Water x3,Lightning,Tahrongi Cactus,HQ1: Distilled Water x6,HQ2: Distilled Water x9,HQ3: Distilled Waterx12,2
|
||||
|
||||
After conversion it will be::
|
||||
Distilled Water x3,Lightning,Tahrongi Cactus,{"HQ1":"Distilled Water x6","HQ2":"Distilled Water x9","HQ3":"Distilled Waterx12"},2
|
||||
"""
|
||||
csv_path = Path(csv_path)
|
||||
if not csv_path.exists():
|
||||
raise FileNotFoundError(csv_path)
|
||||
|
||||
text = csv_path.read_text(encoding="utf-8").splitlines()
|
||||
output_lines = []
|
||||
|
||||
for line in text:
|
||||
# Keep skill category lines (e.g., "Alchemy") unchanged.
|
||||
if "," not in line:
|
||||
output_lines.append(line)
|
||||
continue
|
||||
|
||||
parts = [p.strip() for p in line.split(",")]
|
||||
|
||||
# The header line already has correct length (5).
|
||||
if parts[:5] == ["Item", "Crystal", "Ingredients", "HQ", "Cap"]:
|
||||
output_lines.append(",".join(parts))
|
||||
continue
|
||||
|
||||
# If this row already has 5 columns, leave as-is.
|
||||
if len(parts) <= 5:
|
||||
output_lines.append(",".join(parts))
|
||||
continue
|
||||
|
||||
# Otherwise consolidate HQ columns.
|
||||
item, crystal, ingredients = parts[:3]
|
||||
cap = parts[-1]
|
||||
hq_parts = parts[3:-1]
|
||||
|
||||
hq_dict = {}
|
||||
unnamed_counter = 1
|
||||
for h in hq_parts:
|
||||
h = h.strip()
|
||||
if not h:
|
||||
continue
|
||||
if ":" in h:
|
||||
key, value = h.split(":", 1)
|
||||
hq_dict[key.strip()] = value.strip()
|
||||
else:
|
||||
# Handle unlabeled HQ values by assigning sequential keys.
|
||||
key = f"HQ{unnamed_counter}"
|
||||
unnamed_counter += 1
|
||||
hq_dict[key] = h
|
||||
|
||||
# Build dictionary string with spaces after commas to match example formatting.
|
||||
hq_json_readable = "{" + ",".join([f'"{k}": "{v}"' if "\"" not in v else f'"{k}": {json.dumps(v)}' for k, v in hq_dict.items()]) + "}"
|
||||
|
||||
new_line = ",".join([item, crystal, ingredients, hq_json_readable, cap])
|
||||
output_lines.append(new_line)
|
||||
|
||||
# Backup original file.
|
||||
if backup:
|
||||
backup_path = csv_path.with_suffix(csv_path.suffix + ".bak")
|
||||
if not backup_path.exists():
|
||||
backup_path.write_text("\n".join(text), encoding="utf-8")
|
||||
|
||||
csv_path.write_text("\n".join(output_lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
target = Path(__file__).resolve().parents[1] / "datasets" / "desythesis_recipes.csv"
|
||||
convert_hq_entries(target)
|
||||
207
scripts/load_desynth_recipes_to_db.py
Normal file
207
scripts/load_desynth_recipes_to_db.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Load datasets/desythesis_recipes.csv into PostgreSQL.
|
||||
|
||||
This script parses the *desynthesis* recipe CSV which is structured slightly
|
||||
differently from the v2 crafting CSVs. Recipes are grouped under craft
|
||||
headings (e.g. "Alchemy", "Smithing"), followed by a header row.
|
||||
|
||||
Recent edits mean each recipe row now lists **multiple HQ columns (HQ1, HQ2, HQ3)**
|
||||
_directly_ in the CSV instead of a single JSON cell. A typical section now looks
|
||||
like::
|
||||
|
||||
Alchemy
|
||||
Item,Crystal,Ingredients,HQ1,HQ2,HQ3,Cap
|
||||
Distilled Water x3,Lightning,Tahrongi Cactus,HQ1: Distilled Water x6,HQ2: Distilled Water x9,HQ3: Distilled Water x12,2
|
||||
|
||||
Some legacy sections may still use the shorter header ``Item,Crystal,Ingredients,HQ,Cap``
|
||||
with the HQ values spread across several columns. Pragmatically we treat **all
|
||||
columns between ``Ingredients`` and the final ``Cap`` column as HQ fields** and
|
||||
extract at most three of them (hq1-3) for insertion into Postgres.
|
||||
|
||||
The resulting database table schema is::
|
||||
|
||||
CREATE TABLE recipes_desynthesis (
|
||||
id SERIAL PRIMARY KEY,
|
||||
craft TEXT NOT NULL,
|
||||
cap INT,
|
||||
item TEXT NOT NULL,
|
||||
crystal TEXT NOT NULL,
|
||||
ingredients TEXT NOT NULL,
|
||||
hq1 TEXT,
|
||||
hq2 TEXT,
|
||||
hq3 TEXT
|
||||
);
|
||||
|
||||
Run:
|
||||
python scripts/load_desynth_recipes_to_db.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
import asyncpg
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
CONF_PATH = PROJECT_ROOT / "db.conf"
|
||||
CSV_PATH = PROJECT_ROOT / "datasets" / "desythesis_recipes.csv"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
RE_CONF = re.compile(r"^([A-Z0-9_]+)=(.*)$")
|
||||
|
||||
|
||||
def parse_db_conf(path: pathlib.Path) -> Dict[str, str]:
|
||||
"""Simple KEY=VALUE parser (quotes stripped)."""
|
||||
if not path.exists():
|
||||
raise FileNotFoundError("db.conf not found")
|
||||
conf: Dict[str, str] = {}
|
||||
for line in path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
m = RE_CONF.match(line)
|
||||
if m:
|
||||
k, v = m.group(1), m.group(2).strip().strip("'\"")
|
||||
conf[k] = v
|
||||
required = {"PSQL_HOST", "PSQL_PORT", "PSQL_USER", "PSQL_PASSWORD", "PSQL_DBNAME"}
|
||||
missing = required - conf.keys()
|
||||
if missing:
|
||||
raise RuntimeError(f"Missing keys in db.conf: {', '.join(sorted(missing))}")
|
||||
return conf
|
||||
|
||||
|
||||
def parse_csv(csv_path: pathlib.Path) -> List[Tuple[str, Optional[int], str, str, str, Optional[str], Optional[str], Optional[str]]]:
|
||||
"""Parse the *desythesis_recipes.csv* file and return rows for COPY.
|
||||
|
||||
The parser is tolerant of the two currently-seen layouts:
|
||||
|
||||
1. ``Item,Crystal,Ingredients,HQ1,HQ2,HQ3,Cap``
|
||||
2. ``Item,Crystal,Ingredients,HQ,Cap`` (legacy header but still multiple HQ
|
||||
columns in the data rows).
|
||||
|
||||
The strategy is therefore:
|
||||
• first three columns are *always* Item, Crystal, Ingredients.
|
||||
• *last* column is CAP.
|
||||
• everything between is treated as HQ fields – the first three of those
|
||||
(if present) are saved as hq1-3.
|
||||
"""
|
||||
|
||||
rows: List[Tuple[str, Optional[int], str, str, str, Optional[str], Optional[str], Optional[str]]] = []
|
||||
current_craft: Optional[str] = None
|
||||
|
||||
with csv_path.open(newline="", encoding="utf-8") as fh:
|
||||
reader = csv.reader(fh)
|
||||
for raw in reader:
|
||||
# ------------------------------------------------------------------
|
||||
# Detect craft headings (single-cell rows, e.g. "Alchemy")
|
||||
# ------------------------------------------------------------------
|
||||
if len(raw) == 1:
|
||||
current_craft = raw[0].strip()
|
||||
continue
|
||||
|
||||
# Skip blank lines or header rows
|
||||
if not raw or raw[0].strip().startswith("Item") or current_craft is None:
|
||||
continue
|
||||
|
||||
if len(raw) < 4:
|
||||
# Not enough columns for a valid recipe – skip
|
||||
continue
|
||||
|
||||
# Standard columns
|
||||
item = raw[0].strip()
|
||||
crystal = raw[1].strip()
|
||||
ingredients = raw[2].strip()
|
||||
|
||||
# CAP is *always* the final column
|
||||
cap_raw = raw[-1].strip()
|
||||
try:
|
||||
cap = int(cap_raw) if cap_raw.isdigit() else None
|
||||
except ValueError:
|
||||
cap = None
|
||||
|
||||
# HQ columns: everything between ingredients and cap
|
||||
hq_columns = [c.strip() for c in raw[3:-1]]
|
||||
hq1 = hq_columns[0] if len(hq_columns) > 0 and hq_columns[0] else None
|
||||
hq2 = hq_columns[1] if len(hq_columns) > 1 and hq_columns[1] else None
|
||||
hq3 = hq_columns[2] if len(hq_columns) > 2 and hq_columns[2] else None
|
||||
|
||||
# Clean prefixes like "HQ1: "
|
||||
def _clean(hq_val: Optional[str]) -> Optional[str]:
|
||||
if hq_val and ":" in hq_val:
|
||||
return hq_val.split(":", 1)[1].strip()
|
||||
return hq_val
|
||||
|
||||
hq1, hq2, hq3 = map(_clean, (hq1, hq2, hq3))
|
||||
|
||||
rows.append((current_craft, cap, item, crystal, ingredients, hq1, hq2, hq3))
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
async def recreate_table(conn: asyncpg.Connection) -> None:
|
||||
await conn.execute(
|
||||
"""
|
||||
DROP TABLE IF EXISTS recipes_desynthesis;
|
||||
CREATE TABLE recipes_desynthesis (
|
||||
id SERIAL PRIMARY KEY,
|
||||
craft TEXT NOT NULL,
|
||||
cap INT,
|
||||
item TEXT NOT NULL,
|
||||
crystal TEXT NOT NULL,
|
||||
ingredients TEXT NOT NULL,
|
||||
hq1 TEXT,
|
||||
hq2 TEXT,
|
||||
hq3 TEXT
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def copy_rows(conn: asyncpg.Connection, rows):
|
||||
await conn.copy_records_to_table(
|
||||
"recipes_desynthesis",
|
||||
records=rows,
|
||||
columns=[
|
||||
"craft",
|
||||
"cap",
|
||||
"item",
|
||||
"crystal",
|
||||
"ingredients",
|
||||
"hq1",
|
||||
"hq2",
|
||||
"hq3",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
raise SystemExit("CSV file not found – run conversion first")
|
||||
|
||||
conf = parse_db_conf(CONF_PATH)
|
||||
rows = parse_csv(CSV_PATH)
|
||||
print(f"Parsed {len(rows)} recipes from CSV.")
|
||||
|
||||
conn = await asyncpg.connect(
|
||||
host=conf["PSQL_HOST"],
|
||||
port=int(conf["PSQL_PORT"]),
|
||||
user=conf["PSQL_USER"],
|
||||
password=conf["PSQL_PASSWORD"],
|
||||
database=conf["PSQL_DBNAME"],
|
||||
)
|
||||
try:
|
||||
await recreate_table(conn)
|
||||
await copy_rows(conn, rows)
|
||||
print("Loaded recipes_desynthesis table.")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -82,6 +82,7 @@ async def ensure_inventory_table(conn: asyncpg.Connection) -> None:
|
||||
storage_type TEXT NOT NULL,
|
||||
item_name TEXT NOT NULL,
|
||||
quantity INT NOT NULL,
|
||||
item_id INT,
|
||||
last_updated TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
"""
|
||||
@@ -93,7 +94,13 @@ async def truncate_inventory(conn: asyncpg.Connection) -> None:
|
||||
await conn.execute("TRUNCATE TABLE inventory;")
|
||||
|
||||
|
||||
async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, str, int]]) -> None:
|
||||
async def fetch_item_ids(conn: asyncpg.Connection, item_names: List[str]) -> Dict[str, int]:
|
||||
"""Fetch item IDs from the database."""
|
||||
rows = await conn.fetch("SELECT id, name FROM all_items WHERE name = ANY($1::text[])", item_names)
|
||||
return {row["name"]: row["id"] for row in rows}
|
||||
|
||||
|
||||
async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, str, int, int, _dt.datetime]]) -> None:
|
||||
"""Bulk copy the parsed CSV rows into the DB using ``copy_records_to_table``."""
|
||||
await conn.copy_records_to_table(
|
||||
"inventory",
|
||||
@@ -102,6 +109,7 @@ async def copy_csv_to_db(conn: asyncpg.Connection, rows: List[Tuple[str, str, st
|
||||
"character_name",
|
||||
"storage_type",
|
||||
"item_name",
|
||||
"item_id",
|
||||
"quantity",
|
||||
"last_updated",
|
||||
],
|
||||
@@ -130,15 +138,24 @@ async def load_inventory(csv_path: pathlib.Path) -> None:
|
||||
await truncate_inventory(conn)
|
||||
|
||||
# Parse CSV
|
||||
rows: List[Tuple[str, str, str, int]] = []
|
||||
rows: List[Tuple[str, str, str, int, int]] = []
|
||||
with csv_path.open(newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f, delimiter=";", quotechar='"')
|
||||
names_set = set()
|
||||
for r in reader:
|
||||
names_set.add(r["item"].strip())
|
||||
# fetch ids
|
||||
id_rows = await conn.fetch("SELECT id,name FROM all_items WHERE name = ANY($1::text[])", list(names_set))
|
||||
id_map = {row["name"]: row["id"] for row in id_rows}
|
||||
f.seek(0)
|
||||
next(reader) # skip header again
|
||||
for r in reader:
|
||||
char = r["char"].strip()
|
||||
storage = r["storage"].strip()
|
||||
item = r["item"].strip()
|
||||
qty = int(r["quantity"].strip()) if r["quantity"].strip() else 0
|
||||
rows.append((char, storage, item, qty, _dt.datetime.utcnow()))
|
||||
item_id = id_map.get(item)
|
||||
rows.append((char, storage, item, item_id, qty, _dt.datetime.utcnow()))
|
||||
|
||||
await copy_csv_to_db(conn, rows)
|
||||
print(f"Inserted {len(rows)} inventory rows.")
|
||||
|
||||
Reference in New Issue
Block a user