Files
character-browser/services/llm.py
Aodhan Collins 32a73b02f5 Add semantic tagging, search, favourite/NSFW filtering, and LLM job queue
Replaces old list-format tags (which duplicated prompt content) with structured
dict tags per category (origin_series, outfit_type, participants, style_type,
scene_type, etc.). Tags are now purely organizational metadata — removed from
the prompt pipeline entirely.

Adds is_favourite and is_nsfw columns to all 8 resource models. Favourite is
DB-only (user preference); NSFW is mirrored in JSON tags for rescan persistence.
All library pages get filter controls and favourites-first sorting.

Introduces a parallel LLM job queue (_enqueue_task + _llm_queue_worker) for
background tag regeneration, with the same status polling UI as ComfyUI jobs.
Fixes call_llm() to use has_request_context() fallback for background threads.

Adds global search (/search) across resources and gallery images, with navbar
search bar. Adds gallery image sidecar JSON for per-image favourite/NSFW metadata.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 03:22:09 +00:00

230 lines
8.9 KiB
Python

import os
import json
import asyncio
import requests
from flask import has_request_context, request as flask_request
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from models import Settings
DANBOORU_TOOLS = [
{
"type": "function",
"function": {
"name": "search_tags",
"description": "Prefix/full-text search for Danbooru tags. Returns rich tag objects ordered by relevance.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search string. Trailing * added automatically."},
"limit": {"type": "integer", "description": "Max results (1-200)", "default": 20},
"category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "validate_tags",
"description": "Exact-match validation for a list of tags. Splits into valid, deprecated, and invalid.",
"parameters": {
"type": "object",
"properties": {
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags to validate."}
},
"required": ["tags"]
}
}
},
{
"type": "function",
"function": {
"name": "suggest_tags",
"description": "Autocomplete-style suggestions for a partial or approximate tag. Sorted by post count.",
"parameters": {
"type": "object",
"properties": {
"partial": {"type": "string", "description": "Partial tag or rough approximation."},
"limit": {"type": "integer", "description": "Max suggestions (1-50)", "default": 10},
"category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
},
"required": ["partial"]
}
}
}
]
async def _run_mcp_tool(name, arguments):
server_params = StdioServerParameters(
command="docker",
args=["run", "--rm", "-i", "danbooru-mcp:latest"],
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(name, arguments)
return result.content[0].text
def call_mcp_tool(name, arguments):
try:
return asyncio.run(_run_mcp_tool(name, arguments))
except Exception as e:
print(f"MCP Tool Error: {e}")
return json.dumps({"error": str(e)})
async def _run_character_mcp_tool(name, arguments):
server_params = StdioServerParameters(
command="docker",
args=["run", "--rm", "-i",
"-v", "character-cache:/root/.local/share/character_details",
"character-mcp:latest"],
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(name, arguments)
return result.content[0].text
def call_character_mcp_tool(name, arguments):
try:
return asyncio.run(_run_character_mcp_tool(name, arguments))
except Exception as e:
print(f"Character MCP Tool Error: {e}")
return None
def load_prompt(filename):
path = os.path.join('data/prompts', filename)
if os.path.exists(path):
with open(path, 'r') as f:
return f.read()
return None
def call_llm(prompt, system_prompt="You are a creative assistant."):
settings = Settings.query.first()
if not settings:
raise ValueError("Settings not configured.")
is_local = settings.llm_provider != 'openrouter'
if not is_local:
if not settings.openrouter_api_key:
raise ValueError("OpenRouter API Key not configured. Please configure it in Settings.")
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
"HTTP-Referer": flask_request.url_root if has_request_context() else "http://localhost:5000/",
"X-Title": "Character Browser"
}
model = settings.openrouter_model or 'google/gemini-2.0-flash-001'
else:
# Local provider (Ollama or LMStudio)
if not settings.local_base_url:
raise ValueError(f"{settings.llm_provider.title()} Base URL not configured.")
url = f"{settings.local_base_url.rstrip('/')}/chat/completions"
headers = {"Content-Type": "application/json"}
model = settings.local_model
if not model:
raise ValueError(f"No local model selected for {settings.llm_provider.title()}. Please select one in Settings.")
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
max_turns = 15
tool_turns_remaining = 8 # stop offering tools after this many tool-calling turns
use_tools = True
format_retries = 3 # retries allowed for unexpected response format
while max_turns > 0:
max_turns -= 1
data = {
"model": model,
"messages": messages,
}
# Only add tools if supported/requested and we haven't exhausted tool turns
if use_tools and tool_turns_remaining > 0:
data["tools"] = DANBOORU_TOOLS
data["tool_choice"] = "auto"
try:
response = requests.post(url, headers=headers, json=data, timeout=120)
# If 400 Bad Request and we were using tools, try once without tools
if response.status_code == 400 and use_tools:
print(f"LLM Provider {settings.llm_provider} rejected tools. Retrying without tool calling...")
use_tools = False
max_turns += 1 # Reset turn for the retry
continue
response.raise_for_status()
result = response.json()
# Validate expected OpenAI-compatible response shape
if 'choices' not in result or not result['choices']:
raise KeyError('choices')
message = result['choices'][0].get('message')
if message is None:
raise KeyError('message')
if message.get('tool_calls'):
tool_turns_remaining -= 1
messages.append(message)
for tool_call in message['tool_calls']:
name = tool_call['function']['name']
args = json.loads(tool_call['function']['arguments'])
print(f"Executing MCP tool: {name}({args})")
tool_result = call_mcp_tool(name, args)
messages.append({
"role": "tool",
"tool_call_id": tool_call['id'],
"name": name,
"content": tool_result
})
if tool_turns_remaining <= 0:
print("Tool turn limit reached — next request will not offer tools")
continue
return message['content']
except requests.exceptions.RequestException as e:
error_body = ""
try: error_body = f" - Body: {response.text}"
except: pass
raise RuntimeError(f"LLM API request failed: {str(e)}{error_body}") from e
except (KeyError, IndexError) as e:
# Log the raw response to help diagnose the issue
raw = ""
try: raw = response.text[:500]
except: pass
print(f"Unexpected LLM response format (key={e}). Raw response: {raw}")
if format_retries > 0:
format_retries -= 1
max_turns += 1 # don't burn a turn on a format error
# Ask the model to try again with the correct format
messages.append({
"role": "user",
"content": (
"Your previous response was not in the expected format. "
"Please respond with valid JSON only, exactly as specified in the system prompt. "
"Do not include any explanation or markdown — only the raw JSON object."
)
})
print(f"Retrying after format error ({format_retries} retries left)…")
continue
raise RuntimeError(f"Unexpected LLM response format after retries: {str(e)}") from e
raise RuntimeError("LLM tool calling loop exceeded maximum turns")