Add get_character_from_url tool for direct URL-based fetching
Allows users to pass a specific URL to fetch character data from. Fandom URLs are auto-detected and processed with the structured MediaWiki API pipeline; non-Fandom URLs use a generic HTML scrape. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,28 @@ Deletes a specific character from the cache.
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
### `get_character_from_url`
|
||||||
|
|
||||||
|
Fetches character data from a specific URL you provide. Fandom wiki URLs are detected automatically and processed with the same structured extraction as `get_character`, giving richer results. Non-Fandom URLs are scraped generically.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
| Name | Type | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `url` | string | Full URL to the character page |
|
||||||
|
| `name` | string | Character name (optional — inferred from the page title for Fandom URLs) |
|
||||||
|
| `franchise` | string | Franchise name (optional) |
|
||||||
|
|
||||||
|
**Example prompt:**
|
||||||
|
> Use get_character_from_url to fetch data from https://finalfantasy.fandom.com/wiki/Tifa_Lockhart
|
||||||
|
|
||||||
|
**When to use instead of `get_character`:**
|
||||||
|
- The character's wiki page isn't found by the automatic search
|
||||||
|
- You want data from a specific page (e.g. a particular version or alternate wiki)
|
||||||
|
- The franchise isn't in the supported list and you have a direct Fandom URL
|
||||||
|
- You have a non-Fandom source you want to pull from
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### `generate_image_prompt`
|
### `generate_image_prompt`
|
||||||
|
|
||||||
Builds a comma-separated tag list for image generation tools (Stable Diffusion, Midjourney, DALL-E, etc.).
|
Builds a comma-separated tag list for image generation tools (Stable Diffusion, Midjourney, DALL-E, etc.).
|
||||||
|
|||||||
@@ -415,6 +415,167 @@ def _build_character(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# URL-based fetching
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_FANDOM_URL_RE = re.compile(
|
||||||
|
r"^https?://(?P<wiki>[a-z0-9-]+)\.fandom\.com/wiki/(?P<title>.+?)(?:#.*)?$",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_character_from_url(url: str, name: str = "", franchise: str = "") -> CharacterData:
|
||||||
|
"""Fetch character data from a user-supplied URL.
|
||||||
|
|
||||||
|
If the URL points to a Fandom wiki page, the MediaWiki API is used for
|
||||||
|
structured section extraction (same as the franchise-based path).
|
||||||
|
Otherwise, a plain HTML scrape is performed.
|
||||||
|
"""
|
||||||
|
sections: dict[str, str] = {}
|
||||||
|
sources: list[str] = [url]
|
||||||
|
|
||||||
|
m = _FANDOM_URL_RE.match(url.strip())
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, headers=HEADERS) as client:
|
||||||
|
if m:
|
||||||
|
wiki = m.group("wiki")
|
||||||
|
page_title = m.group("title").replace("_", " ")
|
||||||
|
fandom = await _fetch_fandom_by_title(client, page_title, wiki)
|
||||||
|
if fandom:
|
||||||
|
sections.update(fandom["sections"])
|
||||||
|
# Use the canonical URL returned by the API
|
||||||
|
sources = [fandom["url"]]
|
||||||
|
if not name:
|
||||||
|
name = page_title
|
||||||
|
else:
|
||||||
|
# Generic HTML scrape for non-Fandom URLs
|
||||||
|
scraped = await _scrape_generic(client, url)
|
||||||
|
if scraped:
|
||||||
|
sections.update(scraped)
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = "Unknown"
|
||||||
|
if not franchise:
|
||||||
|
franchise = "Unknown"
|
||||||
|
|
||||||
|
return _build_character(name, franchise, sections, sources)
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_fandom_by_title(
|
||||||
|
client: httpx.AsyncClient, title: str, wiki: str
|
||||||
|
) -> dict | None:
|
||||||
|
"""Fetch a Fandom page by exact title (no search step needed)."""
|
||||||
|
base = f"https://{wiki}.fandom.com"
|
||||||
|
api = f"{base}/api.php"
|
||||||
|
|
||||||
|
# Resolve title to pageid
|
||||||
|
try:
|
||||||
|
resp = await client.get(
|
||||||
|
api,
|
||||||
|
params={"action": "query", "titles": title, "format": "json"},
|
||||||
|
)
|
||||||
|
except httpx.HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
pages = resp.json().get("query", {}).get("pages", {})
|
||||||
|
page = next(iter(pages.values()), {})
|
||||||
|
pageid = page.get("pageid")
|
||||||
|
if not pageid:
|
||||||
|
return None
|
||||||
|
|
||||||
|
canonical_title = page.get("title", title)
|
||||||
|
|
||||||
|
# Get section list
|
||||||
|
try:
|
||||||
|
resp = await client.get(
|
||||||
|
api,
|
||||||
|
params={"action": "parse", "pageid": pageid, "prop": "sections", "format": "json"},
|
||||||
|
)
|
||||||
|
except httpx.HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
all_sections = resp.json().get("parse", {}).get("sections", [])
|
||||||
|
|
||||||
|
sections: dict[str, str] = {}
|
||||||
|
lead_text = await _fetch_fandom_section(client, api, pageid, 0)
|
||||||
|
if lead_text:
|
||||||
|
sections["description"] = lead_text[:1500]
|
||||||
|
|
||||||
|
for sec in all_sections:
|
||||||
|
sec_title = _strip_html(sec.get("line", ""))
|
||||||
|
field = _classify_section(sec_title)
|
||||||
|
if field is None:
|
||||||
|
continue
|
||||||
|
index = sec.get("index", "")
|
||||||
|
if not index:
|
||||||
|
continue
|
||||||
|
text = await _fetch_fandom_section(client, api, pageid, index)
|
||||||
|
if text:
|
||||||
|
sections[sec_title.lower()] = text[:3000]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"sections": sections,
|
||||||
|
"url": f"{base}/wiki/{canonical_title.replace(' ', '_')}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _scrape_generic(client: httpx.AsyncClient, url: str) -> dict[str, str] | None:
|
||||||
|
"""Best-effort HTML scrape for non-Fandom pages."""
|
||||||
|
try:
|
||||||
|
resp = await client.get(url)
|
||||||
|
except httpx.HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
# Remove navigation, headers, footers, scripts, styles
|
||||||
|
for tag in soup.select("nav, header, footer, script, style, aside, .sidebar, .navbox"):
|
||||||
|
tag.decompose()
|
||||||
|
|
||||||
|
sections: dict[str, str] = {}
|
||||||
|
|
||||||
|
# Try to grab the first few paragraphs as a description
|
||||||
|
paragraphs = soup.find_all("p")
|
||||||
|
desc_parts = []
|
||||||
|
for p in paragraphs[:10]:
|
||||||
|
text = p.get_text(separator=" ", strip=True)
|
||||||
|
if len(text) > 30:
|
||||||
|
desc_parts.append(text)
|
||||||
|
if sum(len(t) for t in desc_parts) > 1500:
|
||||||
|
break
|
||||||
|
if desc_parts:
|
||||||
|
sections["description"] = " ".join(desc_parts)[:1500]
|
||||||
|
|
||||||
|
# Try to extract headed sections
|
||||||
|
for heading in soup.find_all(["h2", "h3"]):
|
||||||
|
title = heading.get_text(strip=True)
|
||||||
|
field = _classify_section(title)
|
||||||
|
if field is None:
|
||||||
|
continue
|
||||||
|
# Collect text until the next heading
|
||||||
|
content_parts = []
|
||||||
|
for sibling in heading.find_next_siblings():
|
||||||
|
if sibling.name in {"h2", "h3", "h1"}:
|
||||||
|
break
|
||||||
|
text = sibling.get_text(separator=" ", strip=True)
|
||||||
|
if text:
|
||||||
|
content_parts.append(text)
|
||||||
|
if content_parts:
|
||||||
|
sections[title.lower()] = " ".join(content_parts)[:3000]
|
||||||
|
|
||||||
|
return sections if sections else None
|
||||||
|
|
||||||
|
|
||||||
def _extract_list_items(text: str) -> list[str]:
|
def _extract_list_items(text: str) -> list[str]:
|
||||||
"""Extract bullet items or split prose into sentences if no bullet structure."""
|
"""Extract bullet items or split prose into sentences if no bullet structure."""
|
||||||
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
from .cache import delete_cached, get_cached, list_cached, save_cache
|
from .cache import delete_cached, get_cached, list_cached, save_cache
|
||||||
from .fetcher import fetch_character
|
from .fetcher import fetch_character, fetch_character_from_url
|
||||||
from .models import CharacterData
|
from .models import CharacterData
|
||||||
|
|
||||||
mcp = FastMCP("character-details")
|
mcp = FastMCP("character-details")
|
||||||
@@ -123,6 +123,29 @@ async def remove_character(name: str, franchise: str) -> str:
|
|||||||
return f"{name} ({franchise}) was not in the cache."
|
return f"{name} ({franchise}) was not in the cache."
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def get_character_from_url(
|
||||||
|
url: str,
|
||||||
|
name: str = "",
|
||||||
|
franchise: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Fetch character details from a specific URL.
|
||||||
|
|
||||||
|
If the URL points to a Fandom wiki page, structured section extraction
|
||||||
|
is used automatically for richer results. Otherwise a generic HTML
|
||||||
|
scrape is performed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Full URL to the character page (e.g. "https://finalfantasy.fandom.com/wiki/Tifa_Lockhart")
|
||||||
|
name: Character name (optional — inferred from the page title for Fandom URLs)
|
||||||
|
franchise: Franchise or series (optional)
|
||||||
|
"""
|
||||||
|
character = await fetch_character_from_url(url, name, franchise)
|
||||||
|
save_cache(character)
|
||||||
|
return _format_character(character)
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def generate_image_prompt(
|
async def generate_image_prompt(
|
||||||
name: str,
|
name: str,
|
||||||
|
|||||||
Reference in New Issue
Block a user