211 lines
6.6 KiB
Python
211 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Improved parser for Woodworking recipes -> Woodworking_v2.csv
|
|
|
|
This version follows the spec provided by the user:
|
|
|
|
Category: category name (without level range)
|
|
Level: recipe level (integer)
|
|
Sub-Crafts: JSON list of [name, level]
|
|
Name: product NQ name
|
|
Crystal: crystal element (Earth, Wind, etc.)
|
|
Key Item: key item name or null
|
|
Ingredients: JSON list of [name, quantity]
|
|
HQ Yields: JSON list ordered HQ1..HQ3 of [name, quantity] (nulls if N/A)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import json
|
|
import pathlib
|
|
import re
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
|
TXT_PATH = PROJECT_ROOT / "datasets/Woodworking.txt"
|
|
CSV_PATH = PROJECT_ROOT / "datasets/Woodworking_v2.csv"
|
|
|
|
RE_CATEGORY = re.compile(r"^([A-Za-z' ]+) \([0-9]+-[0-9]+\)$")
|
|
RE_MAIN = re.compile(r"Main Craft: Woodworking - \((\d+)\)")
|
|
RE_SUBCRAFTS = re.compile(r"Sub Craft\(s\): (.+)")
|
|
RE_KEY_ITEM = re.compile(r"Key Item: (.+)")
|
|
RE_NQ = re.compile(r"NQ:\s*(.+)")
|
|
RE_HQ = re.compile(r"HQ(\d):\s*(.+)")
|
|
RE_ITEM_QTY = re.compile(r"(.+?)\s*(?:x|\+)(\d+)$")
|
|
|
|
|
|
def norm(s: str) -> str:
|
|
return re.sub(r"\s+", " ", s.strip())
|
|
|
|
|
|
def split_item_qty(text: str) -> Tuple[str, int]:
|
|
text = norm(text)
|
|
m = RE_ITEM_QTY.match(text)
|
|
if m:
|
|
name, qty = m.group(1), int(m.group(2))
|
|
else:
|
|
name, qty = text, 1
|
|
return name, qty
|
|
|
|
|
|
class Recipe:
|
|
__slots__ = (
|
|
"category",
|
|
"level",
|
|
"subcrafts",
|
|
"name",
|
|
"crystal",
|
|
"key_item",
|
|
"ingredients",
|
|
"hq_yields",
|
|
)
|
|
|
|
def __init__(self, category: str, level: int):
|
|
self.category = category
|
|
self.level = level
|
|
self.subcrafts: List[Tuple[str, int]] = []
|
|
self.name: str = ""
|
|
self.crystal: str = ""
|
|
self.key_item: Optional[str] = None
|
|
self.ingredients: List[Tuple[str, int]] = []
|
|
# index 0,1,2 for HQ1..3; (name, qty) or None
|
|
self.hq_yields: List[Optional[Tuple[str, int]]] = [None, None, None]
|
|
|
|
def row(self) -> List[str]:
|
|
return [
|
|
self.category,
|
|
str(self.level),
|
|
json.dumps(self.subcrafts, ensure_ascii=False),
|
|
self.name,
|
|
self.crystal,
|
|
self.key_item or "",
|
|
json.dumps(self.ingredients, ensure_ascii=False),
|
|
json.dumps(self.hq_yields, ensure_ascii=False),
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def parse() -> List[Recipe]:
|
|
lines = TXT_PATH.read_text(encoding="utf-8", errors="ignore").splitlines()
|
|
n = len(lines)
|
|
i = 0
|
|
current_category = ""
|
|
recipes: List[Recipe] = []
|
|
|
|
while i < n:
|
|
line = lines[i].strip()
|
|
|
|
# Category header
|
|
m_cat = RE_CATEGORY.match(line)
|
|
if m_cat:
|
|
current_category = m_cat.group(1)
|
|
i += 1
|
|
continue
|
|
|
|
# Main Craft start
|
|
m_main = RE_MAIN.search(line)
|
|
if m_main:
|
|
level = int(m_main.group(1))
|
|
rec = Recipe(current_category, level)
|
|
i += 1
|
|
|
|
# look ahead for optional Sub Craft(s) & Key Item lines
|
|
while i < n:
|
|
look = lines[i].strip()
|
|
if not look:
|
|
i += 1
|
|
continue
|
|
if RE_SUBCRAFTS.match(look):
|
|
sub_text = RE_SUBCRAFTS.match(look).group(1)
|
|
# Split by commas
|
|
for part in sub_text.split(','):
|
|
part = part.strip()
|
|
m_sc = re.match(r"([A-Za-z]+) - \((\d+)\)", part)
|
|
if m_sc:
|
|
rec.subcrafts.append((m_sc.group(1), int(m_sc.group(2))))
|
|
i += 1
|
|
continue
|
|
if RE_KEY_ITEM.match(look):
|
|
rec.key_item = RE_KEY_ITEM.match(look).group(1)
|
|
i += 1
|
|
continue
|
|
# If line starts with NQ: we stop metadata collection
|
|
if look.startswith("NQ:") or look.startswith("HQ"):
|
|
break
|
|
# Ingredient lines normally start with crystal or item names and are indented
|
|
if not look.startswith("NQ:"):
|
|
# Ingredient collection will happen in separate loop
|
|
break
|
|
i += 1
|
|
|
|
# Collect ingredients until NQ line encountered
|
|
while i < n and not lines[i].lstrip().startswith("NQ:"):
|
|
ingr_line = lines[i].strip()
|
|
if ingr_line and not ingr_line.endswith("-Icon.gif"):
|
|
name, qty = split_item_qty(ingr_line)
|
|
rec.ingredients.append((name, qty))
|
|
i += 1
|
|
|
|
# Determine crystal (must contain 'Crystal')
|
|
for name, qty in rec.ingredients:
|
|
if "Crystal" in name:
|
|
rec.crystal = name.split()[0] # Earth Crystal -> Earth
|
|
break
|
|
if rec.crystal:
|
|
# Remove crystal from ingredient list
|
|
rec.ingredients = [pair for pair in rec.ingredients if "Crystal" not in pair[0]]
|
|
else:
|
|
rec.crystal = "Unknown"
|
|
|
|
# NQ line
|
|
if i < n and (m_nq := RE_NQ.match(lines[i].strip())):
|
|
rec.name, _ = split_item_qty(m_nq.group(1))
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
|
|
# Skip blank lines before HQ entries
|
|
while i < n and not lines[i].strip():
|
|
i += 1
|
|
# HQ lines
|
|
while i < n and lines[i].lstrip().startswith("HQ"):
|
|
m_hq = RE_HQ.match(lines[i].strip())
|
|
if m_hq:
|
|
idx = int(m_hq.group(1)) - 1
|
|
name, qty = split_item_qty(m_hq.group(2))
|
|
rec.hq_yields[idx] = (name, qty)
|
|
i += 1
|
|
|
|
recipes.append(rec)
|
|
continue
|
|
|
|
i += 1
|
|
|
|
return recipes
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main() -> None:
|
|
recs = parse()
|
|
CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
with CSV_PATH.open("w", newline="", encoding="utf-8") as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([
|
|
"category",
|
|
"level",
|
|
"subcrafts",
|
|
"name",
|
|
"crystal",
|
|
"key_item",
|
|
"ingredients",
|
|
"hq_yields",
|
|
])
|
|
for r in recs:
|
|
writer.writerow(r.row())
|
|
print(f"Wrote {len(recs)} recipes -> {CSV_PATH.relative_to(PROJECT_ROOT)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|