280 lines
9.1 KiB
Python
280 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
||
"""Generic recipe parser for crafting disciplines -> <Craft>_v2.csv
|
||
|
||
Usage:
|
||
python recipes_to_csv_v2.py <CRAFT>
|
||
|
||
Where <CRAFT> matches the name of the .txt file in the datasets directory,
|
||
for example `Woodworking`, `Smithing`, `Goldsmithing`, `Alchemy`, etc.
|
||
|
||
The script produces a CSV `<Craft>_v2.csv` inside the datasets directory
|
||
having the following columns (identical to the Woodworking v2 spec):
|
||
|
||
category, level, subcrafts, name, crystal, key_item, ingredients, hq_yields
|
||
|
||
See scripts/README.md for details of each column.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import csv
|
||
import json
|
||
import pathlib
|
||
import re
|
||
from typing import List, Optional, Tuple
|
||
|
||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||
DATASETS_DIR = PROJECT_ROOT / "datasets"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Regex helpers (compiled at runtime where craft-dependent)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
RE_CATEGORY = re.compile(r"^([A-Za-z' ]+) \([0-9]+-[0-9]+\)$")
|
||
RE_SUBCRAFTS = re.compile(r"Sub Craft\(s\): (.+)")
|
||
RE_KEY_ITEM = re.compile(r"Key Item: (.+)")
|
||
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
||
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
||
# Quantity delimiter is strictly 'xN' (e.g., "Lumber x6").
|
||
# Patterns like "+1" denote HQ variant and should be preserved in the name.
|
||
RE_ITEM_QTY = re.compile(r"(.+?)\s*x(\d+)$", re.IGNORECASE)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helper functions / dataclasses
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def norm(s: str) -> str:
|
||
"""Normalise whitespace inside a string."""
|
||
return re.sub(r"\s+", " ", s.strip())
|
||
|
||
|
||
def split_item_qty(text: str) -> Tuple[str, int]:
|
||
"""Split "Foo x3" → ("Foo", 3). Quantity defaults to 1."""
|
||
text = norm(text)
|
||
m = RE_ITEM_QTY.match(text)
|
||
if m:
|
||
name, qty = m.group(1), int(m.group(2))
|
||
else:
|
||
name, qty = text, 1
|
||
return name, qty
|
||
|
||
|
||
class Recipe:
|
||
__slots__ = (
|
||
"category",
|
||
"level",
|
||
"subcrafts",
|
||
"name",
|
||
"crystal",
|
||
"key_item",
|
||
"ingredients",
|
||
"hq_yields",
|
||
)
|
||
|
||
def __init__(self, category: str, level: int):
|
||
self.category = category
|
||
self.level = level
|
||
self.subcrafts: List[Tuple[str, int]] = []
|
||
self.name: str = ""
|
||
self.crystal: str = ""
|
||
self.key_item: Optional[str] = None
|
||
self.ingredients: List[Tuple[str, int]] = []
|
||
self.hq_yields: List[Optional[Tuple[str, int]]] = [None, None, None]
|
||
|
||
def row(self) -> List[str]:
|
||
return [
|
||
self.category,
|
||
str(self.level),
|
||
json.dumps(self.subcrafts, ensure_ascii=False),
|
||
self.name,
|
||
self.crystal,
|
||
self.key_item or "",
|
||
json.dumps(self.ingredients, ensure_ascii=False),
|
||
json.dumps(self.hq_yields, ensure_ascii=False),
|
||
]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Core parse routine
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def parse(txt_path: pathlib.Path, craft_name: str) -> List[Recipe]:
|
||
"""Parse a crafting text file into Recipe objects.
|
||
|
||
The parsing strategy is now:
|
||
1. "Main Craft:" marks the *metadata* for the upcoming recipe – level, optional
|
||
sub-crafts, and key item.
|
||
2. Ingredient lines follow until an "NQ:" line is reached. The first recipe
|
||
ingredient that contains the word "Crystal" determines the crystal type
|
||
and is removed from the ingredients list.
|
||
3. An "NQ:" line finalises the recipe: we capture the product name and then
|
||
look ahead for up to three "HQx:" lines that describe HQ yields.
|
||
|
||
Crucially, *"NQ:" now acts as the definitive boundary between recipes*.
|
||
This allows the parser to cope with datasets where multiple successive
|
||
"Main Craft:" lines appear without an intervening "NQ:".
|
||
"""
|
||
|
||
lines = txt_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||
n = len(lines)
|
||
i = 0
|
||
|
||
current_category: str = ""
|
||
recipes: List[Recipe] = []
|
||
|
||
# Craft-specific regex (compiled once per run)
|
||
RE_MAIN = re.compile(rf"Main Craft: {re.escape(craft_name)} - \(([^)]*)\)")
|
||
|
||
while i < n:
|
||
line = lines[i].strip()
|
||
|
||
# 1) Category header
|
||
if (m_cat := RE_CATEGORY.match(line)):
|
||
current_category = m_cat.group(1)
|
||
i += 1
|
||
continue
|
||
|
||
# 2) Start of a recipe – line beginning with "NQ:"
|
||
if not line.startswith("NQ:"):
|
||
i += 1
|
||
continue
|
||
|
||
# -------------------------------
|
||
# New recipe initialised
|
||
# -------------------------------
|
||
rec = Recipe(current_category, level=0)
|
||
rec.name, _ = split_item_qty(line[len("NQ:"):].strip())
|
||
|
||
# Collect block until next NQ or EOF
|
||
block_lines: List[str] = []
|
||
i += 1
|
||
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
||
block_lines.append(lines[i])
|
||
i += 1
|
||
|
||
# ------------------------------------
|
||
# Parse metadata & ingredients in block
|
||
# ------------------------------------
|
||
for raw in block_lines:
|
||
look = raw.strip()
|
||
if not look:
|
||
continue
|
||
|
||
# Skip icon decorator lines early
|
||
if look.endswith("-Icon.gif"):
|
||
continue
|
||
|
||
# Main Craft – level
|
||
if (m_main := RE_MAIN.search(look)):
|
||
level_raw = m_main.group(1)
|
||
# Handle ranges like "115~120" or "115-120" by taking the lower bound
|
||
m_range = re.match(r"(\d+)", level_raw)
|
||
if m_range:
|
||
rec.level = int(m_range.group(1))
|
||
else:
|
||
rec.level = 0
|
||
continue
|
||
|
||
# Sub crafts
|
||
if (m_sc := RE_SUBCRAFTS.match(look)):
|
||
for part in m_sc.group(1).split(','):
|
||
part = part.strip()
|
||
if m := re.match(r"([A-Za-z]+) - \((\d+)\)", part):
|
||
rec.subcrafts.append((m.group(1), int(m.group(2))))
|
||
continue
|
||
|
||
# Key item
|
||
if (m_ki := RE_KEY_ITEM.match(look)):
|
||
rec.key_item = m_ki.group(1)
|
||
continue
|
||
|
||
# HQ lines
|
||
if look.startswith("HQ"):
|
||
if (m_hq := RE_HQ.match(look)):
|
||
idx = int(m_hq.group(1)) - 1
|
||
name, qty = split_item_qty(m_hq.group(2))
|
||
rec.hq_yields[idx] = (name, qty)
|
||
continue
|
||
|
||
# Otherwise treat as ingredient
|
||
name, qty = split_item_qty(look)
|
||
rec.ingredients.append((name, qty))
|
||
|
||
# Determine crystal & clean ingredient list
|
||
for name, qty in rec.ingredients:
|
||
if "Crystal" in name:
|
||
rec.crystal = name.split()[0]
|
||
break
|
||
if rec.crystal:
|
||
rec.ingredients = [p for p in rec.ingredients if "Crystal" not in p[0]]
|
||
else:
|
||
rec.crystal = "Unknown"
|
||
|
||
recipes.append(rec)
|
||
|
||
return recipes
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Entrypoint
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def main() -> None:
|
||
"""CLI entrypoint.
|
||
|
||
Usage examples:
|
||
# Process a single craft
|
||
python recipes_to_csv_v2.py Woodworking
|
||
|
||
# Process all *.txt files in the datasets directory
|
||
python recipes_to_csv_v2.py --all
|
||
|
||
# Omit positional arg – defaults to --all
|
||
python recipes_to_csv_v2.py
|
||
"""
|
||
argp = argparse.ArgumentParser(description="Parse <Craft>.txt into CSV.")
|
||
argp.add_argument("craft", nargs="?", help="Craft name, e.g. Woodworking, Smithing")
|
||
argp.add_argument("--all", action="store_true", help="Process every .txt file in datasets/")
|
||
args = argp.parse_args()
|
||
|
||
# Determine which crafts to process
|
||
if args.all or not args.craft:
|
||
crafts = [p.stem for p in DATASETS_DIR.glob("*.txt")]
|
||
if not crafts:
|
||
raise SystemExit(f"No .txt files found in {DATASETS_DIR}")
|
||
else:
|
||
crafts = [args.craft.strip()]
|
||
|
||
for craft in crafts:
|
||
txt_path = DATASETS_DIR / f"{craft}.txt"
|
||
if not txt_path.exists():
|
||
print(f"[WARN] Dataset file not found: {txt_path}")
|
||
continue
|
||
|
||
csv_path = DATASETS_DIR / f"{craft}_v2.csv"
|
||
recipes = parse(txt_path, craft)
|
||
|
||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||
writer = csv.writer(f)
|
||
writer.writerow([
|
||
"category",
|
||
"level",
|
||
"subcrafts",
|
||
"name",
|
||
"crystal",
|
||
"key_item",
|
||
"ingredients",
|
||
"hq_yields",
|
||
])
|
||
for r in recipes:
|
||
writer.writerow(r.row())
|
||
|
||
rel = csv_path.relative_to(PROJECT_ROOT)
|
||
print(f"Wrote {len(recipes)} recipes -> {rel}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|