Add get_character_from_url tool for direct URL-based fetching
Allows users to pass a specific URL to fetch character data from. Fandom URLs are auto-detected and processed with the structured MediaWiki API pipeline; non-Fandom URLs use a generic HTML scrape. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,28 @@ Deletes a specific character from the cache.
|
||||
|
||||
---
|
||||
|
||||
### `get_character_from_url`
|
||||
|
||||
Fetches character data from a specific URL you provide. Fandom wiki URLs are detected automatically and processed with the same structured extraction as `get_character`, giving richer results. Non-Fandom URLs are scraped generically.
|
||||
|
||||
**Parameters:**
|
||||
| Name | Type | Description |
|
||||
|---|---|---|
|
||||
| `url` | string | Full URL to the character page |
|
||||
| `name` | string | Character name (optional — inferred from the page title for Fandom URLs) |
|
||||
| `franchise` | string | Franchise name (optional) |
|
||||
|
||||
**Example prompt:**
|
||||
> Use get_character_from_url to fetch data from https://finalfantasy.fandom.com/wiki/Tifa_Lockhart
|
||||
|
||||
**When to use instead of `get_character`:**
|
||||
- The character's wiki page isn't found by the automatic search
|
||||
- You want data from a specific page (e.g. a particular version or alternate wiki)
|
||||
- The franchise isn't in the supported list and you have a direct Fandom URL
|
||||
- You have a non-Fandom source you want to pull from
|
||||
|
||||
---
|
||||
|
||||
### `generate_image_prompt`
|
||||
|
||||
Builds a comma-separated tag list for image generation tools (Stable Diffusion, Midjourney, DALL-E, etc.).
|
||||
|
||||
@@ -415,6 +415,167 @@ def _build_character(
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL-based fetching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FANDOM_URL_RE = re.compile(
|
||||
r"^https?://(?P<wiki>[a-z0-9-]+)\.fandom\.com/wiki/(?P<title>.+?)(?:#.*)?$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
async def fetch_character_from_url(url: str, name: str = "", franchise: str = "") -> CharacterData:
|
||||
"""Fetch character data from a user-supplied URL.
|
||||
|
||||
If the URL points to a Fandom wiki page, the MediaWiki API is used for
|
||||
structured section extraction (same as the franchise-based path).
|
||||
Otherwise, a plain HTML scrape is performed.
|
||||
"""
|
||||
sections: dict[str, str] = {}
|
||||
sources: list[str] = [url]
|
||||
|
||||
m = _FANDOM_URL_RE.match(url.strip())
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, headers=HEADERS) as client:
|
||||
if m:
|
||||
wiki = m.group("wiki")
|
||||
page_title = m.group("title").replace("_", " ")
|
||||
fandom = await _fetch_fandom_by_title(client, page_title, wiki)
|
||||
if fandom:
|
||||
sections.update(fandom["sections"])
|
||||
# Use the canonical URL returned by the API
|
||||
sources = [fandom["url"]]
|
||||
if not name:
|
||||
name = page_title
|
||||
else:
|
||||
# Generic HTML scrape for non-Fandom URLs
|
||||
scraped = await _scrape_generic(client, url)
|
||||
if scraped:
|
||||
sections.update(scraped)
|
||||
|
||||
if not name:
|
||||
name = "Unknown"
|
||||
if not franchise:
|
||||
franchise = "Unknown"
|
||||
|
||||
return _build_character(name, franchise, sections, sources)
|
||||
|
||||
|
||||
async def _fetch_fandom_by_title(
|
||||
client: httpx.AsyncClient, title: str, wiki: str
|
||||
) -> dict | None:
|
||||
"""Fetch a Fandom page by exact title (no search step needed)."""
|
||||
base = f"https://{wiki}.fandom.com"
|
||||
api = f"{base}/api.php"
|
||||
|
||||
# Resolve title to pageid
|
||||
try:
|
||||
resp = await client.get(
|
||||
api,
|
||||
params={"action": "query", "titles": title, "format": "json"},
|
||||
)
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
pages = resp.json().get("query", {}).get("pages", {})
|
||||
page = next(iter(pages.values()), {})
|
||||
pageid = page.get("pageid")
|
||||
if not pageid:
|
||||
return None
|
||||
|
||||
canonical_title = page.get("title", title)
|
||||
|
||||
# Get section list
|
||||
try:
|
||||
resp = await client.get(
|
||||
api,
|
||||
params={"action": "parse", "pageid": pageid, "prop": "sections", "format": "json"},
|
||||
)
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
all_sections = resp.json().get("parse", {}).get("sections", [])
|
||||
|
||||
sections: dict[str, str] = {}
|
||||
lead_text = await _fetch_fandom_section(client, api, pageid, 0)
|
||||
if lead_text:
|
||||
sections["description"] = lead_text[:1500]
|
||||
|
||||
for sec in all_sections:
|
||||
sec_title = _strip_html(sec.get("line", ""))
|
||||
field = _classify_section(sec_title)
|
||||
if field is None:
|
||||
continue
|
||||
index = sec.get("index", "")
|
||||
if not index:
|
||||
continue
|
||||
text = await _fetch_fandom_section(client, api, pageid, index)
|
||||
if text:
|
||||
sections[sec_title.lower()] = text[:3000]
|
||||
|
||||
return {
|
||||
"sections": sections,
|
||||
"url": f"{base}/wiki/{canonical_title.replace(' ', '_')}",
|
||||
}
|
||||
|
||||
|
||||
async def _scrape_generic(client: httpx.AsyncClient, url: str) -> dict[str, str] | None:
|
||||
"""Best-effort HTML scrape for non-Fandom pages."""
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
# Remove navigation, headers, footers, scripts, styles
|
||||
for tag in soup.select("nav, header, footer, script, style, aside, .sidebar, .navbox"):
|
||||
tag.decompose()
|
||||
|
||||
sections: dict[str, str] = {}
|
||||
|
||||
# Try to grab the first few paragraphs as a description
|
||||
paragraphs = soup.find_all("p")
|
||||
desc_parts = []
|
||||
for p in paragraphs[:10]:
|
||||
text = p.get_text(separator=" ", strip=True)
|
||||
if len(text) > 30:
|
||||
desc_parts.append(text)
|
||||
if sum(len(t) for t in desc_parts) > 1500:
|
||||
break
|
||||
if desc_parts:
|
||||
sections["description"] = " ".join(desc_parts)[:1500]
|
||||
|
||||
# Try to extract headed sections
|
||||
for heading in soup.find_all(["h2", "h3"]):
|
||||
title = heading.get_text(strip=True)
|
||||
field = _classify_section(title)
|
||||
if field is None:
|
||||
continue
|
||||
# Collect text until the next heading
|
||||
content_parts = []
|
||||
for sibling in heading.find_next_siblings():
|
||||
if sibling.name in {"h2", "h3", "h1"}:
|
||||
break
|
||||
text = sibling.get_text(separator=" ", strip=True)
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
if content_parts:
|
||||
sections[title.lower()] = " ".join(content_parts)[:3000]
|
||||
|
||||
return sections if sections else None
|
||||
|
||||
|
||||
def _extract_list_items(text: str) -> list[str]:
|
||||
"""Extract bullet items or split prose into sentences if no bullet structure."""
|
||||
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from .cache import delete_cached, get_cached, list_cached, save_cache
|
||||
from .fetcher import fetch_character
|
||||
from .fetcher import fetch_character, fetch_character_from_url
|
||||
from .models import CharacterData
|
||||
|
||||
mcp = FastMCP("character-details")
|
||||
@@ -123,6 +123,29 @@ async def remove_character(name: str, franchise: str) -> str:
|
||||
return f"{name} ({franchise}) was not in the cache."
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def get_character_from_url(
|
||||
url: str,
|
||||
name: str = "",
|
||||
franchise: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Fetch character details from a specific URL.
|
||||
|
||||
If the URL points to a Fandom wiki page, structured section extraction
|
||||
is used automatically for richer results. Otherwise a generic HTML
|
||||
scrape is performed.
|
||||
|
||||
Args:
|
||||
url: Full URL to the character page (e.g. "https://finalfantasy.fandom.com/wiki/Tifa_Lockhart")
|
||||
name: Character name (optional — inferred from the page title for Fandom URLs)
|
||||
franchise: Franchise or series (optional)
|
||||
"""
|
||||
character = await fetch_character_from_url(url, name, franchise)
|
||||
save_cache(character)
|
||||
return _format_character(character)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def generate_image_prompt(
|
||||
name: str,
|
||||
|
||||
Reference in New Issue
Block a user