Initial commit
This commit is contained in:
88
scripts/README.md
Normal file
88
scripts/README.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# Recipe ETL Scripts
|
||||
|
||||
This directory contains helper scripts for extracting Woodworking recipe data
|
||||
from the raw **datasets/Woodworking.txt** file and loading it into the project
|
||||
PostgreSQL database.
|
||||
|
||||
## File overview
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| **woodworking_to_csv.py** | Legacy first-pass parser → `datasets/Woodworking.csv`. |
|
||||
| **woodworking_to_csv_v2.py** | Improved parser that matches the spec (category, level, sub-crafts, ingredients, HQ yields, etc.) → `datasets/Woodworking_v2.csv`. |
|
||||
| **recipes_to_csv_v2.py** | Generic parser. `python recipes_to_csv_v2.py <Craft>` processes one craft; use `python recipes_to_csv_v2.py --all` **or simply omit the argument** to parse every `.txt` file under `datasets/`, producing `datasets/<Craft>_v2.csv` for each. |
|
||||
| **load_woodworking_to_db.py** | Loader for the legacy CSV (kept for reference). |
|
||||
| **load_woodworking_v2_to_db.py** | Drops & recreates **recipes_woodworking** table and bulk-loads `Woodworking_v2.csv`. |
|
||||
| **load_recipes_v2_to_db.py** | Generic loader. `python load_recipes_v2_to_db.py <Craft>` loads one craft; omit the argument to load **all** generated CSVs into their respective `recipes_<craft>` tables. |
|
||||
| **requirements.txt** | Minimal Python dependencies for the scripts. |
|
||||
| **venv/** | Local virtual-environment created by the setup steps below. |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* Python ≥ 3.9
|
||||
* PostgreSQL instance reachable with credentials in `db.conf` at project root:
|
||||
|
||||
```ini
|
||||
PSQL_HOST=…
|
||||
PSQL_PORT=…
|
||||
PSQL_USER=…
|
||||
PSQL_PASSWORD=…
|
||||
PSQL_DBNAME=…
|
||||
```
|
||||
|
||||
## Quick start (Woodworking example)
|
||||
|
||||
```bash
|
||||
# 1. From project root
|
||||
cd scripts
|
||||
|
||||
# 2. Create & activate virtualenv (only once)
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
|
||||
# 3. Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 4. Generate CSVs for **all** crafts
|
||||
python recipes_to_csv_v2.py --all # or simply `python recipes_to_csv_v2.py`
|
||||
|
||||
# 5. Load all crafts into the DB (drops/recreates each table)
|
||||
python load_recipes_v2_to_db.py
|
||||
```
|
||||
|
||||
To work with a **single craft**, specify its name instead:
|
||||
|
||||
```bash
|
||||
python recipes_to_csv_v2.py Smithing # generate Smithing_v2.csv
|
||||
python load_recipes_v2_to_db.py Smithing # load only Smithing recipes
|
||||
```
|
||||
|
||||
The loader will output e.g.:
|
||||
|
||||
```
|
||||
Wrote 480 recipes -> datasets/Woodworking_v2.csv
|
||||
Loaded recipes into new recipes_woodworking table.
|
||||
```
|
||||
|
||||
## CSV schema (v2)
|
||||
|
||||
Column | Notes
|
||||
------ | -----
|
||||
`category` | Craft rank without level range (e.g. "Amateur")
|
||||
`level` | Recipe level integer
|
||||
`subcrafts` | JSON list `[["Smithing",2],["Alchemy",7]]`
|
||||
`name` | NQ product name
|
||||
`crystal` | Element used (Wind, Earth, etc.)
|
||||
`key_item` | Required key item (blank if none)
|
||||
`ingredients` | JSON list `[["Arrowwood Log",1]]`
|
||||
`hq_yields` | JSON list HQ1-HQ3 e.g. `[["Arrowwood Lumber",6],["Arrowwood Lumber",9],["Arrowwood Lumber",12]]`
|
||||
|
||||
## Parsing rules
|
||||
|
||||
* Item quantities are detected only when the suffix uses an “x” (e.g. `Lumber x6`).
|
||||
* Strings such as `Bronze Leggings +1` are treated as the **full item name**; the `+1/+2/+3` suffix is preserved.
|
||||
|
||||
## Developing / debugging
|
||||
|
||||
* Edit the parsers as needed, then rerun them to regenerate CSV.
|
||||
* Feel free to add new scripts here; remember to update **requirements.txt** & this README.
|
||||
167
scripts/load_recipes_v2_to_db.py
Normal file
167
scripts/load_recipes_v2_to_db.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Load <Craft>_v2.csv into PostgreSQL.
|
||||
|
||||
Usage:
|
||||
python load_recipes_v2_to_db.py <CRAFT>
|
||||
|
||||
The script drop-creates table `recipes_<craft>` (lowercased) with the generic
|
||||
v2 schema, then bulk-loads from the CSV produced by recipes_to_csv_v2.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Dict
|
||||
|
||||
import asyncpg
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
CONF_PATH = PROJECT_ROOT / "db.conf"
|
||||
DATASETS_DIR = PROJECT_ROOT / "datasets"
|
||||
|
||||
RE_KEY = re.compile(r"^([A-Z0-9_]+)=(.*)$")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Category mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
CATEGORY_RANGES = [
|
||||
("Amateur", 1, 10),
|
||||
("Recruit", 8, 20),
|
||||
("Initiate", 18, 30),
|
||||
("Novice", 28, 40),
|
||||
("Apprentice", 38, 50),
|
||||
("Journeyman", 48, 60),
|
||||
("Craftsman", 58, 70),
|
||||
("Artisan", 68, 80),
|
||||
("Adept", 78, 90),
|
||||
("Veteran", 88, 100),
|
||||
("Expert", 98, 110),
|
||||
("Authority", 111, 120),
|
||||
]
|
||||
|
||||
def category_for_level(level: int) -> str:
|
||||
for name, lo, hi in CATEGORY_RANGES:
|
||||
if lo <= level <= hi:
|
||||
return name
|
||||
return "Unknown"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_db_conf(path: pathlib.Path) -> Dict[str, str]:
|
||||
conf: Dict[str, str] = {}
|
||||
for line in path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
if (m := RE_KEY.match(line)):
|
||||
k, v = m.group(1), m.group(2).strip().strip("'\"")
|
||||
conf[k] = v
|
||||
return conf
|
||||
|
||||
|
||||
async def recreate_table(conn: asyncpg.Connection, craft: str):
|
||||
table = f"recipes_{craft.lower()}"
|
||||
await conn.execute(f"DROP TABLE IF EXISTS {table};")
|
||||
await conn.execute(
|
||||
f"""
|
||||
CREATE TABLE {table} (
|
||||
id SERIAL PRIMARY KEY,
|
||||
category TEXT NOT NULL,
|
||||
level INT NOT NULL,
|
||||
subcrafts JSONB,
|
||||
name TEXT NOT NULL,
|
||||
crystal TEXT NOT NULL,
|
||||
key_item TEXT,
|
||||
ingredients JSONB,
|
||||
hq_yields JSONB
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def insert_csv(conn: asyncpg.Connection, craft: str, csv_path: pathlib.Path):
|
||||
table = f"recipes_{craft.lower()}"
|
||||
with csv_path.open(encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
records = []
|
||||
for row in reader:
|
||||
records.append(
|
||||
(
|
||||
category_for_level(int(row["level"])),
|
||||
int(row["level"]),
|
||||
json.dumps(json.loads(row["subcrafts"] or "[]")),
|
||||
row["name"],
|
||||
row["crystal"],
|
||||
row["key_item"] or None,
|
||||
json.dumps(json.loads(row["ingredients"] or "[]")),
|
||||
json.dumps(json.loads(row["hq_yields"] or "[]")),
|
||||
)
|
||||
)
|
||||
await conn.copy_records_to_table(
|
||||
table,
|
||||
records=records,
|
||||
columns=[
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def process_craft(conn: asyncpg.Connection, craft: str):
|
||||
csv_path = DATASETS_DIR / f"{craft}_v2.csv"
|
||||
if not csv_path.exists():
|
||||
print(f"CSV not found for {craft}, skipping.")
|
||||
return
|
||||
await recreate_table(conn, craft)
|
||||
await insert_csv(conn, craft, csv_path)
|
||||
print(f"Loaded {craft} -> recipes_{craft.lower()} table.")
|
||||
|
||||
|
||||
async def main_async(craft: str | None):
|
||||
conf = parse_db_conf(CONF_PATH)
|
||||
conn = await asyncpg.connect(
|
||||
host=conf["PSQL_HOST"],
|
||||
port=int(conf["PSQL_PORT"]),
|
||||
user=conf["PSQL_USER"],
|
||||
password=conf["PSQL_PASSWORD"],
|
||||
database=conf["PSQL_DBNAME"],
|
||||
)
|
||||
try:
|
||||
if craft:
|
||||
await process_craft(conn, craft)
|
||||
else:
|
||||
# Scan datasets dir
|
||||
for p in DATASETS_DIR.glob("*_v2.csv"):
|
||||
c = p.stem.replace("_v2", "")
|
||||
await process_craft(conn, c)
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser(description="Load <Craft>_v2.csv into DB")
|
||||
p.add_argument("craft", nargs="?", help="Craft name; if omitted, load all *_v2.csv files")
|
||||
args = p.parse_args()
|
||||
craft_arg = args.craft.strip() if args.craft else None
|
||||
asyncio.run(main_async(craft_arg))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
146
scripts/load_woodworking_to_db.py
Normal file
146
scripts/load_woodworking_to_db.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Create recipes_woodworking table and load data from datasets/Woodworking.csv.
|
||||
|
||||
Usage:
|
||||
python3 scripts/load_woodworking_to_db.py
|
||||
|
||||
The script reads database connection details from db.conf located at the project root.
|
||||
It is idempotent – creating the table only if it doesn't already exist, then
|
||||
inserting new rows (it truncates beforehand to avoid duplicates).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import asyncpg
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
CONF_PATH = PROJECT_ROOT / "db.conf"
|
||||
CSV_PATH = PROJECT_ROOT / "datasets/Woodworking.csv"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_db_conf(path: pathlib.Path) -> Dict[str, str]:
|
||||
"""Parse simple KEY=VALUE lines into a dict."""
|
||||
conf: Dict[str, str] = {}
|
||||
pattern = re.compile(r"^([A-Z0-9_]+)=(.*)$")
|
||||
for line in path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
m = pattern.match(line)
|
||||
if m:
|
||||
key, value = m.group(1), m.group(2)
|
||||
# Remove surrounding quotes if present
|
||||
value = value.strip().strip("'\"")
|
||||
conf[key] = value
|
||||
required = {"PSQL_HOST", "PSQL_PORT", "PSQL_USER", "PSQL_PASSWORD", "PSQL_DBNAME"}
|
||||
missing = required - conf.keys()
|
||||
if missing:
|
||||
raise RuntimeError(f"Missing keys in db.conf: {', '.join(sorted(missing))}")
|
||||
return conf
|
||||
|
||||
|
||||
async def create_table(conn: asyncpg.Connection) -> None:
|
||||
await conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS recipes_woodworking (
|
||||
id SERIAL PRIMARY KEY,
|
||||
category TEXT NOT NULL,
|
||||
level INT NOT NULL,
|
||||
product_name TEXT NOT NULL,
|
||||
nq_yield INT,
|
||||
hq1_yield INT,
|
||||
hq2_yield INT,
|
||||
hq3_yield INT,
|
||||
crystal TEXT,
|
||||
ingredients TEXT
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def truncate_table(conn: asyncpg.Connection) -> None:
|
||||
await conn.execute("TRUNCATE TABLE recipes_woodworking;")
|
||||
|
||||
|
||||
async def insert_rows(conn: asyncpg.Connection, rows: List[Dict[str, str]]) -> None:
|
||||
"""Bulk insert via copy protocol for speed."""
|
||||
# Prepare iterable of tuples converting blanks to None and ints accordingly
|
||||
tuples = []
|
||||
for r in rows:
|
||||
tuples.append(
|
||||
(
|
||||
r["category"],
|
||||
int(r["level"]),
|
||||
r["product_name"],
|
||||
_to_int_or_none(r["nq_yield"]),
|
||||
_to_int_or_none(r["hq1_yield"]),
|
||||
_to_int_or_none(r["hq2_yield"]),
|
||||
_to_int_or_none(r["hq3_yield"]),
|
||||
r["crystal"] or None,
|
||||
r["ingredients"] or None,
|
||||
)
|
||||
)
|
||||
|
||||
await conn.copy_records_to_table(
|
||||
"recipes_woodworking",
|
||||
records=tuples,
|
||||
columns=[
|
||||
"category",
|
||||
"level",
|
||||
"product_name",
|
||||
"nq_yield",
|
||||
"hq1_yield",
|
||||
"hq2_yield",
|
||||
"hq3_yield",
|
||||
"crystal",
|
||||
"ingredients",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _to_int_or_none(s: str) -> Optional[int]:
|
||||
s = s.strip()
|
||||
return int(s) if s else None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
raise SystemExit("CSV file not found. Run woodworking_to_csv.py first.")
|
||||
|
||||
conf = parse_db_conf(CONF_PATH)
|
||||
|
||||
conn = await asyncpg.connect(
|
||||
host=conf["PSQL_HOST"],
|
||||
port=int(conf["PSQL_PORT"]),
|
||||
user=conf["PSQL_USER"],
|
||||
password=conf["PSQL_PASSWORD"],
|
||||
database=conf["PSQL_DBNAME"],
|
||||
)
|
||||
try:
|
||||
await create_table(conn)
|
||||
await truncate_table(conn)
|
||||
|
||||
with CSV_PATH.open(newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
rows = list(reader)
|
||||
|
||||
await insert_rows(conn, rows)
|
||||
print(f"Inserted {len(rows)} rows into recipes_woodworking.")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
134
scripts/load_woodworking_v2_to_db.py
Normal file
134
scripts/load_woodworking_v2_to_db.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Load datasets/Woodworking_v2.csv into PostgreSQL (recipes_woodworking table).
|
||||
Drops the old table if present and creates a new one matching the v2 schema.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import asyncpg
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
CONF_PATH = PROJECT_ROOT / "db.conf"
|
||||
CSV_PATH = PROJECT_ROOT / "datasets/Woodworking_v2.csv"
|
||||
|
||||
RE_KEY = re.compile(r"^([A-Z0-9_]+)=(.*)$")
|
||||
|
||||
|
||||
def parse_db_conf(path: pathlib.Path) -> Dict[str, str]:
|
||||
data: Dict[str, str] = {}
|
||||
for line in path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
m = RE_KEY.match(line)
|
||||
if m:
|
||||
k, v = m.group(1), m.group(2).strip().strip("'\"")
|
||||
data[k] = v
|
||||
return data
|
||||
|
||||
|
||||
async def recreate_table(conn: asyncpg.Connection):
|
||||
await conn.execute("DROP TABLE IF EXISTS recipes_woodworking;")
|
||||
await conn.execute(
|
||||
"""
|
||||
CREATE TABLE recipes_woodworking (
|
||||
id SERIAL PRIMARY KEY,
|
||||
category TEXT NOT NULL,
|
||||
level INT NOT NULL,
|
||||
subcrafts JSONB,
|
||||
name TEXT NOT NULL,
|
||||
crystal TEXT NOT NULL,
|
||||
key_item TEXT,
|
||||
ingredients JSONB,
|
||||
hq_yields JSONB
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
CATEGORY_RANGES = [
|
||||
("Amateur", 1, 10),
|
||||
("Recruit", 8, 20),
|
||||
("Initiate", 18, 30),
|
||||
("Novice", 28, 40),
|
||||
("Apprentice", 38, 50),
|
||||
("Journeyman", 48, 60),
|
||||
("Craftsman", 58, 70),
|
||||
("Artisan", 68, 80),
|
||||
("Adept", 78, 90),
|
||||
("Veteran", 88, 100),
|
||||
("Expert", 98, 110),
|
||||
("Authority", 111, 120),
|
||||
]
|
||||
|
||||
def category_for_level(level: int) -> str:
|
||||
"""Return the category name that includes the given level.
|
||||
|
||||
If multiple ranges overlap, the first match in CATEGORY_RANGES is returned.
|
||||
"""
|
||||
for name, lo, hi in CATEGORY_RANGES:
|
||||
if lo <= level <= hi:
|
||||
return name
|
||||
return "Unknown"
|
||||
|
||||
|
||||
async def insert_csv(conn: asyncpg.Connection):
|
||||
with CSV_PATH.open(encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
records = []
|
||||
for row in reader:
|
||||
records.append(
|
||||
(
|
||||
category_for_level(int(row["level"])),
|
||||
int(row["level"]),
|
||||
json.dumps(json.loads(row["subcrafts"] or "[]")), # jsonb text
|
||||
row["name"],
|
||||
row["crystal"],
|
||||
row["key_item"] or None,
|
||||
json.dumps(json.loads(row["ingredients"] or "[]")), # jsonb text
|
||||
json.dumps(json.loads(row["hq_yields"] or "[]")), # jsonb text
|
||||
)
|
||||
)
|
||||
await conn.copy_records_to_table(
|
||||
"recipes_woodworking",
|
||||
records=records,
|
||||
columns=[
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
if not CSV_PATH.exists():
|
||||
raise SystemExit("CSV v2 not found; run parser first.")
|
||||
conf = parse_db_conf(CONF_PATH)
|
||||
conn = await asyncpg.connect(
|
||||
host=conf["PSQL_HOST"],
|
||||
port=int(conf["PSQL_PORT"]),
|
||||
user=conf["PSQL_USER"],
|
||||
password=conf["PSQL_PASSWORD"],
|
||||
database=conf["PSQL_DBNAME"],
|
||||
)
|
||||
try:
|
||||
await recreate_table(conn)
|
||||
await insert_csv(conn)
|
||||
print("Loaded recipes into new recipes_woodworking table.")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
146
scripts/populate_spells_from_scrolls.py
Normal file
146
scripts/populate_spells_from_scrolls.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""Populate the spells table using scroll information from usable_items.
|
||||
|
||||
Assumptions
|
||||
-----------
|
||||
1. `usable_items` table has (at least) the following columns:
|
||||
- item_name (text)
|
||||
- description (text)
|
||||
- type_description (text) where scrolls have the value `SCROLL`
|
||||
2. The spell name can be derived from the item name by stripping common prefixes like
|
||||
"Scroll of ", "Scroll: ", etc. This heuristic can be adjusted if necessary.
|
||||
3. Job / level information is embedded in the description in patterns like
|
||||
"RDM Lv. 1", "WHM Lv.75", etc. Multiple jobs may appear in one description.
|
||||
4. The database URL is provided via the `DATABASE_URL` environment variable, e.g.
|
||||
postgresql+psycopg2://user:password@host/dbname
|
||||
|
||||
Usage
|
||||
-----
|
||||
$ export DATABASE_URL=postgresql+psycopg2://...
|
||||
$ python scripts/populate_spells_from_scrolls.py
|
||||
|
||||
The script will insert new rows or update existing rows in the `spells` table.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List
|
||||
|
||||
from sqlalchemy import MetaData, Table, select, create_engine, update, insert
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Job abbreviations to column mapping (identity mapping here but could differ)
|
||||
JOBS: List[str] = [
|
||||
"run", "whm", "blm", "rdm", "pld", "drk", "brd", "nin", "smn", "cor", "sch", "geo",
|
||||
]
|
||||
|
||||
# Regex to capture patterns like "RDM Lv. 1" or "RDM Lv.1" (space optional)
|
||||
JOB_LV_PATTERN = re.compile(r"([A-Z]{3})\s*Lv\.?\s*(\d+)")
|
||||
|
||||
def _derive_spell_name(scroll_name: str) -> str:
|
||||
"""Convert a scroll item name to a spell name.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> _derive_spell_name("Scroll of Fire")
|
||||
'Fire'
|
||||
>>> _derive_spell_name("Scroll: Cure IV")
|
||||
'Cure IV'
|
||||
"""
|
||||
# Remove common prefixes
|
||||
prefixes = ["Scroll of ", "Scroll: ", "Scroll "]
|
||||
for p in prefixes:
|
||||
if scroll_name.startswith(p):
|
||||
return scroll_name[len(p):].strip()
|
||||
return scroll_name.strip()
|
||||
|
||||
|
||||
def _parse_job_levels(description: str) -> Dict[str, int]:
|
||||
"""Extract job-level mappings from a description string."""
|
||||
mapping: Dict[str, int] = {}
|
||||
for job, lvl in JOB_LV_PATTERN.findall(description):
|
||||
job_l = job.lower()
|
||||
if job_l in JOBS:
|
||||
mapping[job_l] = int(lvl)
|
||||
return mapping
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
def _get_engine() -> Engine:
|
||||
"""Return SQLAlchemy engine using DATABASE_URL or db.conf."""
|
||||
url = os.getenv("DATABASE_URL")
|
||||
if not url:
|
||||
# Attempt to build from db.conf at project root
|
||||
conf_path = Path(__file__).resolve().parents[1] / "db.conf"
|
||||
if not conf_path.exists():
|
||||
raise RuntimeError("DATABASE_URL env var not set and db.conf not found")
|
||||
cfg: Dict[str, str] = {}
|
||||
with conf_path.open() as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
if "=" not in line:
|
||||
continue
|
||||
k, v = line.split("=", 1)
|
||||
cfg[k.strip()] = v.strip().strip("'\"") # remove quotes if any
|
||||
try:
|
||||
url = (
|
||||
f"postgresql+psycopg2://{cfg['PSQL_USER']}:{cfg['PSQL_PASSWORD']}@"
|
||||
f"{cfg['PSQL_HOST']}:{cfg.get('PSQL_PORT', '5432')}/{cfg['PSQL_DBNAME']}"
|
||||
)
|
||||
except KeyError as e:
|
||||
raise RuntimeError(f"Missing key in db.conf: {e}")
|
||||
return create_engine(url)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
engine = _get_engine()
|
||||
|
||||
meta = MetaData()
|
||||
usable_items = Table("usable_items", meta, autoload_with=engine)
|
||||
spells = Table("spells", meta, autoload_with=engine)
|
||||
|
||||
with Session(engine) as session:
|
||||
# Fetch scroll items
|
||||
scroll_rows = session.execute(
|
||||
select(
|
||||
usable_items.c.name,
|
||||
usable_items.c.description
|
||||
).where(usable_items.c.type_description == "SCROLL")
|
||||
).all()
|
||||
|
||||
for name, description in scroll_rows:
|
||||
spell_name = _derive_spell_name(name)
|
||||
job_levels = _parse_job_levels(description or "")
|
||||
|
||||
# Build values dict w/ None default
|
||||
values = {job: None for job in JOBS}
|
||||
values.update(job_levels)
|
||||
values["name"] = spell_name
|
||||
|
||||
# Upsert logic
|
||||
existing = session.execute(
|
||||
select(spells.c.name).where(spells.c.name == spell_name)
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing row
|
||||
stmt = (
|
||||
update(spells)
|
||||
.where(spells.c.name == spell_name)
|
||||
.values(**job_levels)
|
||||
)
|
||||
session.execute(stmt)
|
||||
else:
|
||||
stmt = insert(spells).values(**values)
|
||||
session.execute(stmt)
|
||||
|
||||
session.commit()
|
||||
print(f"Processed {len(scroll_rows)} scrolls. Spells table updated.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
279
scripts/recipes_to_csv_v2.py
Normal file
279
scripts/recipes_to_csv_v2.py
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generic recipe parser for crafting disciplines -> <Craft>_v2.csv
|
||||
|
||||
Usage:
|
||||
python recipes_to_csv_v2.py <CRAFT>
|
||||
|
||||
Where <CRAFT> matches the name of the .txt file in the datasets directory,
|
||||
for example `Woodworking`, `Smithing`, `Goldsmithing`, `Alchemy`, etc.
|
||||
|
||||
The script produces a CSV `<Craft>_v2.csv` inside the datasets directory
|
||||
having the following columns (identical to the Woodworking v2 spec):
|
||||
|
||||
category, level, subcrafts, name, crystal, key_item, ingredients, hq_yields
|
||||
|
||||
See scripts/README.md for details of each column.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
DATASETS_DIR = PROJECT_ROOT / "datasets"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regex helpers (compiled at runtime where craft-dependent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RE_CATEGORY = re.compile(r"^([A-Za-z' ]+) \([0-9]+-[0-9]+\)$")
|
||||
RE_SUBCRAFTS = re.compile(r"Sub Craft\(s\): (.+)")
|
||||
RE_KEY_ITEM = re.compile(r"Key Item: (.+)")
|
||||
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
||||
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
||||
# Quantity delimiter is strictly 'xN' (e.g., "Lumber x6").
|
||||
# Patterns like "+1" denote HQ variant and should be preserved in the name.
|
||||
RE_ITEM_QTY = re.compile(r"(.+?)\s*x(\d+)$", re.IGNORECASE)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper functions / dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def norm(s: str) -> str:
|
||||
"""Normalise whitespace inside a string."""
|
||||
return re.sub(r"\s+", " ", s.strip())
|
||||
|
||||
|
||||
def split_item_qty(text: str) -> Tuple[str, int]:
|
||||
"""Split "Foo x3" → ("Foo", 3). Quantity defaults to 1."""
|
||||
text = norm(text)
|
||||
m = RE_ITEM_QTY.match(text)
|
||||
if m:
|
||||
name, qty = m.group(1), int(m.group(2))
|
||||
else:
|
||||
name, qty = text, 1
|
||||
return name, qty
|
||||
|
||||
|
||||
class Recipe:
|
||||
__slots__ = (
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
)
|
||||
|
||||
def __init__(self, category: str, level: int):
|
||||
self.category = category
|
||||
self.level = level
|
||||
self.subcrafts: List[Tuple[str, int]] = []
|
||||
self.name: str = ""
|
||||
self.crystal: str = ""
|
||||
self.key_item: Optional[str] = None
|
||||
self.ingredients: List[Tuple[str, int]] = []
|
||||
self.hq_yields: List[Optional[Tuple[str, int]]] = [None, None, None]
|
||||
|
||||
def row(self) -> List[str]:
|
||||
return [
|
||||
self.category,
|
||||
str(self.level),
|
||||
json.dumps(self.subcrafts, ensure_ascii=False),
|
||||
self.name,
|
||||
self.crystal,
|
||||
self.key_item or "",
|
||||
json.dumps(self.ingredients, ensure_ascii=False),
|
||||
json.dumps(self.hq_yields, ensure_ascii=False),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core parse routine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse(txt_path: pathlib.Path, craft_name: str) -> List[Recipe]:
|
||||
"""Parse a crafting text file into Recipe objects.
|
||||
|
||||
The parsing strategy is now:
|
||||
1. "Main Craft:" marks the *metadata* for the upcoming recipe – level, optional
|
||||
sub-crafts, and key item.
|
||||
2. Ingredient lines follow until an "NQ:" line is reached. The first recipe
|
||||
ingredient that contains the word "Crystal" determines the crystal type
|
||||
and is removed from the ingredients list.
|
||||
3. An "NQ:" line finalises the recipe: we capture the product name and then
|
||||
look ahead for up to three "HQx:" lines that describe HQ yields.
|
||||
|
||||
Crucially, *"NQ:" now acts as the definitive boundary between recipes*.
|
||||
This allows the parser to cope with datasets where multiple successive
|
||||
"Main Craft:" lines appear without an intervening "NQ:".
|
||||
"""
|
||||
|
||||
lines = txt_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
n = len(lines)
|
||||
i = 0
|
||||
|
||||
current_category: str = ""
|
||||
recipes: List[Recipe] = []
|
||||
|
||||
# Craft-specific regex (compiled once per run)
|
||||
RE_MAIN = re.compile(rf"Main Craft: {re.escape(craft_name)} - \(([^)]*)\)")
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# 1) Category header
|
||||
if (m_cat := RE_CATEGORY.match(line)):
|
||||
current_category = m_cat.group(1)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 2) Start of a recipe – line beginning with "NQ:"
|
||||
if not line.startswith("NQ:"):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# -------------------------------
|
||||
# New recipe initialised
|
||||
# -------------------------------
|
||||
rec = Recipe(current_category, level=0)
|
||||
rec.name, _ = split_item_qty(line[len("NQ:"):].strip())
|
||||
|
||||
# Collect block until next NQ or EOF
|
||||
block_lines: List[str] = []
|
||||
i += 1
|
||||
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
||||
block_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# ------------------------------------
|
||||
# Parse metadata & ingredients in block
|
||||
# ------------------------------------
|
||||
for raw in block_lines:
|
||||
look = raw.strip()
|
||||
if not look:
|
||||
continue
|
||||
|
||||
# Skip icon decorator lines early
|
||||
if look.endswith("-Icon.gif"):
|
||||
continue
|
||||
|
||||
# Main Craft – level
|
||||
if (m_main := RE_MAIN.search(look)):
|
||||
level_raw = m_main.group(1)
|
||||
# Handle ranges like "115~120" or "115-120" by taking the lower bound
|
||||
m_range = re.match(r"(\d+)", level_raw)
|
||||
if m_range:
|
||||
rec.level = int(m_range.group(1))
|
||||
else:
|
||||
rec.level = 0
|
||||
continue
|
||||
|
||||
# Sub crafts
|
||||
if (m_sc := RE_SUBCRAFTS.match(look)):
|
||||
for part in m_sc.group(1).split(','):
|
||||
part = part.strip()
|
||||
if m := re.match(r"([A-Za-z]+) - \((\d+)\)", part):
|
||||
rec.subcrafts.append((m.group(1), int(m.group(2))))
|
||||
continue
|
||||
|
||||
# Key item
|
||||
if (m_ki := RE_KEY_ITEM.match(look)):
|
||||
rec.key_item = m_ki.group(1)
|
||||
continue
|
||||
|
||||
# HQ lines
|
||||
if look.startswith("HQ"):
|
||||
if (m_hq := RE_HQ.match(look)):
|
||||
idx = int(m_hq.group(1)) - 1
|
||||
name, qty = split_item_qty(m_hq.group(2))
|
||||
rec.hq_yields[idx] = (name, qty)
|
||||
continue
|
||||
|
||||
# Otherwise treat as ingredient
|
||||
name, qty = split_item_qty(look)
|
||||
rec.ingredients.append((name, qty))
|
||||
|
||||
# Determine crystal & clean ingredient list
|
||||
for name, qty in rec.ingredients:
|
||||
if "Crystal" in name:
|
||||
rec.crystal = name.split()[0]
|
||||
break
|
||||
if rec.crystal:
|
||||
rec.ingredients = [p for p in rec.ingredients if "Crystal" not in p[0]]
|
||||
else:
|
||||
rec.crystal = "Unknown"
|
||||
|
||||
recipes.append(rec)
|
||||
|
||||
return recipes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entrypoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> None:
|
||||
"""CLI entrypoint.
|
||||
|
||||
Usage examples:
|
||||
# Process a single craft
|
||||
python recipes_to_csv_v2.py Woodworking
|
||||
|
||||
# Process all *.txt files in the datasets directory
|
||||
python recipes_to_csv_v2.py --all
|
||||
|
||||
# Omit positional arg – defaults to --all
|
||||
python recipes_to_csv_v2.py
|
||||
"""
|
||||
argp = argparse.ArgumentParser(description="Parse <Craft>.txt into CSV.")
|
||||
argp.add_argument("craft", nargs="?", help="Craft name, e.g. Woodworking, Smithing")
|
||||
argp.add_argument("--all", action="store_true", help="Process every .txt file in datasets/")
|
||||
args = argp.parse_args()
|
||||
|
||||
# Determine which crafts to process
|
||||
if args.all or not args.craft:
|
||||
crafts = [p.stem for p in DATASETS_DIR.glob("*.txt")]
|
||||
if not crafts:
|
||||
raise SystemExit(f"No .txt files found in {DATASETS_DIR}")
|
||||
else:
|
||||
crafts = [args.craft.strip()]
|
||||
|
||||
for craft in crafts:
|
||||
txt_path = DATASETS_DIR / f"{craft}.txt"
|
||||
if not txt_path.exists():
|
||||
print(f"[WARN] Dataset file not found: {txt_path}")
|
||||
continue
|
||||
|
||||
csv_path = DATASETS_DIR / f"{craft}_v2.csv"
|
||||
recipes = parse(txt_path, craft)
|
||||
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
])
|
||||
for r in recipes:
|
||||
writer.writerow(r.row())
|
||||
|
||||
rel = csv_path.relative_to(PROJECT_ROOT)
|
||||
print(f"Wrote {len(recipes)} recipes -> {rel}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
2
scripts/requirements.txt
Normal file
2
scripts/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
asyncpg==0.29.0
|
||||
python-dotenv==1.0.1
|
||||
201
scripts/woodworking_to_csv.py
Normal file
201
scripts/woodworking_to_csv.py
Normal file
@@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Parse the semi-structured datasets/Woodworking.txt file and export it as a CSV.
|
||||
|
||||
The output CSV will be written to datasets/Woodworking.csv with the following columns:
|
||||
category – Text group header e.g. Amateur (1-10)
|
||||
level – Woodworking skill level for the recipe (integer)
|
||||
product_name – Produced item (without quantity suffix)
|
||||
nq_yield – Quantity produced on a normal quality synth (may be empty)
|
||||
hq1_yield – Quantity produced on HQ1 synth (may be empty)
|
||||
hq2_yield – Quantity produced on HQ2 synth (may be empty)
|
||||
hq3_yield – Quantity produced on HQ3 synth (may be empty)
|
||||
crystal – Crystal used for the synth (Wind Crystal, Earth Crystal, etc.)
|
||||
ingredients – Semi-colon-separated list of remaining ingredients (excluding the crystal)
|
||||
|
||||
Run the script from the project root:
|
||||
python3 scripts/woodworking_to_csv.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import pathlib
|
||||
import re
|
||||
from typing import List, Optional
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
TXT_PATH = PROJECT_ROOT / "datasets/Woodworking.txt"
|
||||
CSV_PATH = PROJECT_ROOT / "datasets/Woodworking.csv"
|
||||
|
||||
# --- Regex patterns ---------------------------------------------------------
|
||||
RE_CATEGORY = re.compile(r"^[A-Za-z' ]+ \([0-9]+-[0-9]+\)$")
|
||||
RE_MAIN_CRAFT = re.compile(r"Main Craft: Woodworking - \((\d+)\)")
|
||||
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
||||
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
||||
RE_ITEM_QTY = re.compile(r"(.+?)\s+x(\d+)$")
|
||||
|
||||
|
||||
def normalise_whitespace(text: str) -> str:
|
||||
"""Collapse internal runs of whitespace and trim."""
|
||||
return re.sub(r"\s+", " ", text.strip())
|
||||
|
||||
|
||||
class Recipe:
|
||||
__slots__ = (
|
||||
"category",
|
||||
"level",
|
||||
"product_name",
|
||||
"nq_yield",
|
||||
"hq1_yield",
|
||||
"hq2_yield",
|
||||
"hq3_yield",
|
||||
"crystal",
|
||||
"ingredients",
|
||||
)
|
||||
|
||||
def __init__(self, category: str, level: int):
|
||||
self.category: str = category
|
||||
self.level: int = level
|
||||
self.product_name: str = ""
|
||||
self.nq_yield: str = ""
|
||||
self.hq1_yield: str = ""
|
||||
self.hq2_yield: str = ""
|
||||
self.hq3_yield: str = ""
|
||||
self.crystal: str = ""
|
||||
self.ingredients: List[str] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helper methods
|
||||
# ------------------------------------------------------------------
|
||||
def _set_product(self, text: str, nq: bool = False) -> None:
|
||||
name, qty = self._split_item_qty(text)
|
||||
self.product_name = name
|
||||
if nq:
|
||||
self.nq_yield = qty
|
||||
|
||||
def _add_hq(self, idx: int, text: str) -> None:
|
||||
_name, qty = self._split_item_qty(text)
|
||||
if idx == 1:
|
||||
self.hq1_yield = qty
|
||||
elif idx == 2:
|
||||
self.hq2_yield = qty
|
||||
elif idx == 3:
|
||||
self.hq3_yield = qty
|
||||
|
||||
@staticmethod
|
||||
def _split_item_qty(text: str) -> tuple[str, str]:
|
||||
text = normalise_whitespace(text)
|
||||
m = RE_ITEM_QTY.match(text)
|
||||
if m:
|
||||
return m.group(1), m.group(2)
|
||||
return text, ""
|
||||
|
||||
def to_row(self) -> List[str]:
|
||||
return [
|
||||
self.category,
|
||||
str(self.level),
|
||||
self.product_name,
|
||||
self.nq_yield,
|
||||
self.hq1_yield,
|
||||
self.hq2_yield,
|
||||
self.hq3_yield,
|
||||
self.crystal,
|
||||
"; ".join(self.ingredients),
|
||||
]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Parsing logic
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
def parse_file(txt_path: pathlib.Path) -> List[Recipe]:
|
||||
recipes: List[Recipe] = []
|
||||
current_category = ""
|
||||
lines = txt_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# Update category headers e.g. "Amateur (1-10)"
|
||||
if RE_CATEGORY.match(line):
|
||||
current_category = line
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Detect start of a recipe block
|
||||
m_main = RE_MAIN_CRAFT.search(line)
|
||||
if m_main:
|
||||
level = int(m_main.group(1))
|
||||
rec = Recipe(current_category, level)
|
||||
i += 1
|
||||
|
||||
# Collect ingredients until we hit the NQ line
|
||||
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
||||
ing_line = lines[i].strip()
|
||||
if ing_line:
|
||||
rec.ingredients.append(normalise_whitespace(ing_line))
|
||||
i += 1
|
||||
|
||||
# Extract crystal (first ingredient if it contains "Crystal")
|
||||
if rec.ingredients and "Crystal" in rec.ingredients[0]:
|
||||
rec.crystal = rec.ingredients.pop(0)
|
||||
|
||||
# Now we should be at the NQ line
|
||||
if i < n and (m_nq := RE_NQ.match(lines[i].strip())):
|
||||
rec._set_product(m_nq.group(1), nq=True)
|
||||
i += 1
|
||||
else:
|
||||
# Malformed entry – skip ahead
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Collect HQ lines (0–3 lines)
|
||||
while i < n and lines[i].lstrip().startswith("HQ"):
|
||||
m_hq = RE_HQ.match(lines[i].strip())
|
||||
if m_hq:
|
||||
idx = int(m_hq.group(1))
|
||||
rec._add_hq(idx, m_hq.group(2))
|
||||
i += 1
|
||||
|
||||
recipes.append(rec)
|
||||
continue # skip to next line without increment to avoid double increment
|
||||
|
||||
# Fallback increment
|
||||
i += 1
|
||||
|
||||
return recipes
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# CSV writer
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
def write_csv(recipes: List[Recipe], csv_path: pathlib.Path) -> None:
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(
|
||||
[
|
||||
"category",
|
||||
"level",
|
||||
"product_name",
|
||||
"nq_yield",
|
||||
"hq1_yield",
|
||||
"hq2_yield",
|
||||
"hq3_yield",
|
||||
"crystal",
|
||||
"ingredients",
|
||||
]
|
||||
)
|
||||
for r in recipes:
|
||||
writer.writerow(r.to_row())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not TXT_PATH.exists():
|
||||
raise SystemExit(f"Input file not found: {TXT_PATH}")
|
||||
|
||||
recs = parse_file(TXT_PATH)
|
||||
write_csv(recs, CSV_PATH)
|
||||
print(f"Parsed {len(recs)} recipes -> {CSV_PATH.relative_to(PROJECT_ROOT)}")
|
||||
210
scripts/woodworking_to_csv_v2.py
Normal file
210
scripts/woodworking_to_csv_v2.py
Normal file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Improved parser for Woodworking recipes -> Woodworking_v2.csv
|
||||
|
||||
This version follows the spec provided by the user:
|
||||
|
||||
Category: category name (without level range)
|
||||
Level: recipe level (integer)
|
||||
Sub-Crafts: JSON list of [name, level]
|
||||
Name: product NQ name
|
||||
Crystal: crystal element (Earth, Wind, etc.)
|
||||
Key Item: key item name or null
|
||||
Ingredients: JSON list of [name, quantity]
|
||||
HQ Yields: JSON list ordered HQ1..HQ3 of [name, quantity] (nulls if N/A)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
TXT_PATH = PROJECT_ROOT / "datasets/Woodworking.txt"
|
||||
CSV_PATH = PROJECT_ROOT / "datasets/Woodworking_v2.csv"
|
||||
|
||||
RE_CATEGORY = re.compile(r"^([A-Za-z' ]+) \([0-9]+-[0-9]+\)$")
|
||||
RE_MAIN = re.compile(r"Main Craft: Woodworking - \((\d+)\)")
|
||||
RE_SUBCRAFTS = re.compile(r"Sub Craft\(s\): (.+)")
|
||||
RE_KEY_ITEM = re.compile(r"Key Item: (.+)")
|
||||
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
||||
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
||||
RE_ITEM_QTY = re.compile(r"(.+?)\s*(?:x|\+)(\d+)$")
|
||||
|
||||
|
||||
def norm(s: str) -> str:
|
||||
return re.sub(r"\s+", " ", s.strip())
|
||||
|
||||
|
||||
def split_item_qty(text: str) -> Tuple[str, int]:
|
||||
text = norm(text)
|
||||
m = RE_ITEM_QTY.match(text)
|
||||
if m:
|
||||
name, qty = m.group(1), int(m.group(2))
|
||||
else:
|
||||
name, qty = text, 1
|
||||
return name, qty
|
||||
|
||||
|
||||
class Recipe:
|
||||
__slots__ = (
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
)
|
||||
|
||||
def __init__(self, category: str, level: int):
|
||||
self.category = category
|
||||
self.level = level
|
||||
self.subcrafts: List[Tuple[str, int]] = []
|
||||
self.name: str = ""
|
||||
self.crystal: str = ""
|
||||
self.key_item: Optional[str] = None
|
||||
self.ingredients: List[Tuple[str, int]] = []
|
||||
# index 0,1,2 for HQ1..3; (name, qty) or None
|
||||
self.hq_yields: List[Optional[Tuple[str, int]]] = [None, None, None]
|
||||
|
||||
def row(self) -> List[str]:
|
||||
return [
|
||||
self.category,
|
||||
str(self.level),
|
||||
json.dumps(self.subcrafts, ensure_ascii=False),
|
||||
self.name,
|
||||
self.crystal,
|
||||
self.key_item or "",
|
||||
json.dumps(self.ingredients, ensure_ascii=False),
|
||||
json.dumps(self.hq_yields, ensure_ascii=False),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse() -> List[Recipe]:
|
||||
lines = TXT_PATH.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
n = len(lines)
|
||||
i = 0
|
||||
current_category = ""
|
||||
recipes: List[Recipe] = []
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# Category header
|
||||
m_cat = RE_CATEGORY.match(line)
|
||||
if m_cat:
|
||||
current_category = m_cat.group(1)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Main Craft start
|
||||
m_main = RE_MAIN.search(line)
|
||||
if m_main:
|
||||
level = int(m_main.group(1))
|
||||
rec = Recipe(current_category, level)
|
||||
i += 1
|
||||
|
||||
# look ahead for optional Sub Craft(s) & Key Item lines
|
||||
while i < n:
|
||||
look = lines[i].strip()
|
||||
if not look:
|
||||
i += 1
|
||||
continue
|
||||
if RE_SUBCRAFTS.match(look):
|
||||
sub_text = RE_SUBCRAFTS.match(look).group(1)
|
||||
# Split by commas
|
||||
for part in sub_text.split(','):
|
||||
part = part.strip()
|
||||
m_sc = re.match(r"([A-Za-z]+) - \((\d+)\)", part)
|
||||
if m_sc:
|
||||
rec.subcrafts.append((m_sc.group(1), int(m_sc.group(2))))
|
||||
i += 1
|
||||
continue
|
||||
if RE_KEY_ITEM.match(look):
|
||||
rec.key_item = RE_KEY_ITEM.match(look).group(1)
|
||||
i += 1
|
||||
continue
|
||||
# If line starts with NQ: we stop metadata collection
|
||||
if look.startswith("NQ:") or look.startswith("HQ"):
|
||||
break
|
||||
# Ingredient lines normally start with crystal or item names and are indented
|
||||
if not look.startswith("NQ:"):
|
||||
# Ingredient collection will happen in separate loop
|
||||
break
|
||||
i += 1
|
||||
|
||||
# Collect ingredients until NQ line encountered
|
||||
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
||||
ingr_line = lines[i].strip()
|
||||
if ingr_line and not ingr_line.endswith("-Icon.gif"):
|
||||
name, qty = split_item_qty(ingr_line)
|
||||
rec.ingredients.append((name, qty))
|
||||
i += 1
|
||||
|
||||
# Determine crystal (must contain 'Crystal')
|
||||
for name, qty in rec.ingredients:
|
||||
if "Crystal" in name:
|
||||
rec.crystal = name.split()[0] # Earth Crystal -> Earth
|
||||
break
|
||||
if rec.crystal:
|
||||
# Remove crystal from ingredient list
|
||||
rec.ingredients = [pair for pair in rec.ingredients if "Crystal" not in pair[0]]
|
||||
else:
|
||||
rec.crystal = "Unknown"
|
||||
|
||||
# NQ line
|
||||
if i < n and (m_nq := RE_NQ.match(lines[i].strip())):
|
||||
rec.name, _ = split_item_qty(m_nq.group(1))
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Skip blank lines before HQ entries
|
||||
while i < n and not lines[i].strip():
|
||||
i += 1
|
||||
# HQ lines
|
||||
while i < n and lines[i].lstrip().startswith("HQ"):
|
||||
m_hq = RE_HQ.match(lines[i].strip())
|
||||
if m_hq:
|
||||
idx = int(m_hq.group(1)) - 1
|
||||
name, qty = split_item_qty(m_hq.group(2))
|
||||
rec.hq_yields[idx] = (name, qty)
|
||||
i += 1
|
||||
|
||||
recipes.append(rec)
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
return recipes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> None:
|
||||
recs = parse()
|
||||
CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with CSV_PATH.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
])
|
||||
for r in recs:
|
||||
writer.writerow(r.row())
|
||||
print(f"Wrote {len(recs)} recipes -> {CSV_PATH.relative_to(PROJECT_ROOT)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user