Initial commit
This commit is contained in:
279
scripts/recipes_to_csv_v2.py
Normal file
279
scripts/recipes_to_csv_v2.py
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generic recipe parser for crafting disciplines -> <Craft>_v2.csv
|
||||
|
||||
Usage:
|
||||
python recipes_to_csv_v2.py <CRAFT>
|
||||
|
||||
Where <CRAFT> matches the name of the .txt file in the datasets directory,
|
||||
for example `Woodworking`, `Smithing`, `Goldsmithing`, `Alchemy`, etc.
|
||||
|
||||
The script produces a CSV `<Craft>_v2.csv` inside the datasets directory
|
||||
having the following columns (identical to the Woodworking v2 spec):
|
||||
|
||||
category, level, subcrafts, name, crystal, key_item, ingredients, hq_yields
|
||||
|
||||
See scripts/README.md for details of each column.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
DATASETS_DIR = PROJECT_ROOT / "datasets"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regex helpers (compiled at runtime where craft-dependent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RE_CATEGORY = re.compile(r"^([A-Za-z' ]+) \([0-9]+-[0-9]+\)$")
|
||||
RE_SUBCRAFTS = re.compile(r"Sub Craft\(s\): (.+)")
|
||||
RE_KEY_ITEM = re.compile(r"Key Item: (.+)")
|
||||
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
||||
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
||||
# Quantity delimiter is strictly 'xN' (e.g., "Lumber x6").
|
||||
# Patterns like "+1" denote HQ variant and should be preserved in the name.
|
||||
RE_ITEM_QTY = re.compile(r"(.+?)\s*x(\d+)$", re.IGNORECASE)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper functions / dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def norm(s: str) -> str:
|
||||
"""Normalise whitespace inside a string."""
|
||||
return re.sub(r"\s+", " ", s.strip())
|
||||
|
||||
|
||||
def split_item_qty(text: str) -> Tuple[str, int]:
|
||||
"""Split "Foo x3" → ("Foo", 3). Quantity defaults to 1."""
|
||||
text = norm(text)
|
||||
m = RE_ITEM_QTY.match(text)
|
||||
if m:
|
||||
name, qty = m.group(1), int(m.group(2))
|
||||
else:
|
||||
name, qty = text, 1
|
||||
return name, qty
|
||||
|
||||
|
||||
class Recipe:
|
||||
__slots__ = (
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
)
|
||||
|
||||
def __init__(self, category: str, level: int):
|
||||
self.category = category
|
||||
self.level = level
|
||||
self.subcrafts: List[Tuple[str, int]] = []
|
||||
self.name: str = ""
|
||||
self.crystal: str = ""
|
||||
self.key_item: Optional[str] = None
|
||||
self.ingredients: List[Tuple[str, int]] = []
|
||||
self.hq_yields: List[Optional[Tuple[str, int]]] = [None, None, None]
|
||||
|
||||
def row(self) -> List[str]:
|
||||
return [
|
||||
self.category,
|
||||
str(self.level),
|
||||
json.dumps(self.subcrafts, ensure_ascii=False),
|
||||
self.name,
|
||||
self.crystal,
|
||||
self.key_item or "",
|
||||
json.dumps(self.ingredients, ensure_ascii=False),
|
||||
json.dumps(self.hq_yields, ensure_ascii=False),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core parse routine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse(txt_path: pathlib.Path, craft_name: str) -> List[Recipe]:
|
||||
"""Parse a crafting text file into Recipe objects.
|
||||
|
||||
The parsing strategy is now:
|
||||
1. "Main Craft:" marks the *metadata* for the upcoming recipe – level, optional
|
||||
sub-crafts, and key item.
|
||||
2. Ingredient lines follow until an "NQ:" line is reached. The first recipe
|
||||
ingredient that contains the word "Crystal" determines the crystal type
|
||||
and is removed from the ingredients list.
|
||||
3. An "NQ:" line finalises the recipe: we capture the product name and then
|
||||
look ahead for up to three "HQx:" lines that describe HQ yields.
|
||||
|
||||
Crucially, *"NQ:" now acts as the definitive boundary between recipes*.
|
||||
This allows the parser to cope with datasets where multiple successive
|
||||
"Main Craft:" lines appear without an intervening "NQ:".
|
||||
"""
|
||||
|
||||
lines = txt_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
n = len(lines)
|
||||
i = 0
|
||||
|
||||
current_category: str = ""
|
||||
recipes: List[Recipe] = []
|
||||
|
||||
# Craft-specific regex (compiled once per run)
|
||||
RE_MAIN = re.compile(rf"Main Craft: {re.escape(craft_name)} - \(([^)]*)\)")
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# 1) Category header
|
||||
if (m_cat := RE_CATEGORY.match(line)):
|
||||
current_category = m_cat.group(1)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 2) Start of a recipe – line beginning with "NQ:"
|
||||
if not line.startswith("NQ:"):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# -------------------------------
|
||||
# New recipe initialised
|
||||
# -------------------------------
|
||||
rec = Recipe(current_category, level=0)
|
||||
rec.name, _ = split_item_qty(line[len("NQ:"):].strip())
|
||||
|
||||
# Collect block until next NQ or EOF
|
||||
block_lines: List[str] = []
|
||||
i += 1
|
||||
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
||||
block_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# ------------------------------------
|
||||
# Parse metadata & ingredients in block
|
||||
# ------------------------------------
|
||||
for raw in block_lines:
|
||||
look = raw.strip()
|
||||
if not look:
|
||||
continue
|
||||
|
||||
# Skip icon decorator lines early
|
||||
if look.endswith("-Icon.gif"):
|
||||
continue
|
||||
|
||||
# Main Craft – level
|
||||
if (m_main := RE_MAIN.search(look)):
|
||||
level_raw = m_main.group(1)
|
||||
# Handle ranges like "115~120" or "115-120" by taking the lower bound
|
||||
m_range = re.match(r"(\d+)", level_raw)
|
||||
if m_range:
|
||||
rec.level = int(m_range.group(1))
|
||||
else:
|
||||
rec.level = 0
|
||||
continue
|
||||
|
||||
# Sub crafts
|
||||
if (m_sc := RE_SUBCRAFTS.match(look)):
|
||||
for part in m_sc.group(1).split(','):
|
||||
part = part.strip()
|
||||
if m := re.match(r"([A-Za-z]+) - \((\d+)\)", part):
|
||||
rec.subcrafts.append((m.group(1), int(m.group(2))))
|
||||
continue
|
||||
|
||||
# Key item
|
||||
if (m_ki := RE_KEY_ITEM.match(look)):
|
||||
rec.key_item = m_ki.group(1)
|
||||
continue
|
||||
|
||||
# HQ lines
|
||||
if look.startswith("HQ"):
|
||||
if (m_hq := RE_HQ.match(look)):
|
||||
idx = int(m_hq.group(1)) - 1
|
||||
name, qty = split_item_qty(m_hq.group(2))
|
||||
rec.hq_yields[idx] = (name, qty)
|
||||
continue
|
||||
|
||||
# Otherwise treat as ingredient
|
||||
name, qty = split_item_qty(look)
|
||||
rec.ingredients.append((name, qty))
|
||||
|
||||
# Determine crystal & clean ingredient list
|
||||
for name, qty in rec.ingredients:
|
||||
if "Crystal" in name:
|
||||
rec.crystal = name.split()[0]
|
||||
break
|
||||
if rec.crystal:
|
||||
rec.ingredients = [p for p in rec.ingredients if "Crystal" not in p[0]]
|
||||
else:
|
||||
rec.crystal = "Unknown"
|
||||
|
||||
recipes.append(rec)
|
||||
|
||||
return recipes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entrypoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> None:
|
||||
"""CLI entrypoint.
|
||||
|
||||
Usage examples:
|
||||
# Process a single craft
|
||||
python recipes_to_csv_v2.py Woodworking
|
||||
|
||||
# Process all *.txt files in the datasets directory
|
||||
python recipes_to_csv_v2.py --all
|
||||
|
||||
# Omit positional arg – defaults to --all
|
||||
python recipes_to_csv_v2.py
|
||||
"""
|
||||
argp = argparse.ArgumentParser(description="Parse <Craft>.txt into CSV.")
|
||||
argp.add_argument("craft", nargs="?", help="Craft name, e.g. Woodworking, Smithing")
|
||||
argp.add_argument("--all", action="store_true", help="Process every .txt file in datasets/")
|
||||
args = argp.parse_args()
|
||||
|
||||
# Determine which crafts to process
|
||||
if args.all or not args.craft:
|
||||
crafts = [p.stem for p in DATASETS_DIR.glob("*.txt")]
|
||||
if not crafts:
|
||||
raise SystemExit(f"No .txt files found in {DATASETS_DIR}")
|
||||
else:
|
||||
crafts = [args.craft.strip()]
|
||||
|
||||
for craft in crafts:
|
||||
txt_path = DATASETS_DIR / f"{craft}.txt"
|
||||
if not txt_path.exists():
|
||||
print(f"[WARN] Dataset file not found: {txt_path}")
|
||||
continue
|
||||
|
||||
csv_path = DATASETS_DIR / f"{craft}_v2.csv"
|
||||
recipes = parse(txt_path, craft)
|
||||
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([
|
||||
"category",
|
||||
"level",
|
||||
"subcrafts",
|
||||
"name",
|
||||
"crystal",
|
||||
"key_item",
|
||||
"ingredients",
|
||||
"hq_yields",
|
||||
])
|
||||
for r in recipes:
|
||||
writer.writerow(r.row())
|
||||
|
||||
rel = csv_path.relative_to(PROJECT_ROOT)
|
||||
print(f"Wrote {len(recipes)} recipes -> {rel}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user