Add extra prompts, endless generation, random character default, and small fixes

- Add extra positive/negative prompt textareas to all 9 detail pages with session persistence - Add Endless generation button to all detail pages (continuous preview generation until stopped) - Default character selector to "Random Character" on all secondary detail pages - Fix queue clear endpoint (remove spurious auth check) - Refactor app.py into routes/ and services/ modules - Update CLAUDE.md with new architecture documentation - Various data file updates and cleanup Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 02:07:16 +00:00
parent 1b8a798c31
commit 5e4348ebc1
170 changed files with 17367 additions and 9781 deletions
--- a/services/llm.py
+++ b/services/llm.py
@@ -0,0 +1,203 @@
+import os
+import json
+import asyncio
+import requests
+from flask import request as flask_request
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+from models import Settings
+
+DANBOORU_TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_tags",
+            "description": "Prefix/full-text search for Danbooru tags. Returns rich tag objects ordered by relevance.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "Search string. Trailing * added automatically."},
+                    "limit": {"type": "integer", "description": "Max results (1-200)", "default": 20},
+                    "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "validate_tags",
+            "description": "Exact-match validation for a list of tags. Splits into valid, deprecated, and invalid.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags to validate."}
+                },
+                "required": ["tags"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "suggest_tags",
+            "description": "Autocomplete-style suggestions for a partial or approximate tag. Sorted by post count.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "partial": {"type": "string", "description": "Partial tag or rough approximation."},
+                    "limit": {"type": "integer", "description": "Max suggestions (1-50)", "default": 10},
+                    "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
+                },
+                "required": ["partial"]
+            }
+        }
+    }
+]
+
+
+async def _run_mcp_tool(name, arguments):
+    server_params = StdioServerParameters(
+        command="docker",
+        args=["run", "--rm", "-i", "danbooru-mcp:latest"],
+    )
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+            result = await session.call_tool(name, arguments)
+            return result.content[0].text
+
+
+def call_mcp_tool(name, arguments):
+    try:
+        return asyncio.run(_run_mcp_tool(name, arguments))
+    except Exception as e:
+        print(f"MCP Tool Error: {e}")
+        return json.dumps({"error": str(e)})
+
+
+def load_prompt(filename):
+    path = os.path.join('data/prompts', filename)
+    if os.path.exists(path):
+        with open(path, 'r') as f:
+            return f.read()
+    return None
+
+
+def call_llm(prompt, system_prompt="You are a creative assistant."):
+    settings = Settings.query.first()
+    if not settings:
+        raise ValueError("Settings not configured.")
+
+    is_local = settings.llm_provider != 'openrouter'
+
+    if not is_local:
+        if not settings.openrouter_api_key:
+            raise ValueError("OpenRouter API Key not configured. Please configure it in Settings.")
+
+        url = "https://openrouter.ai/api/v1/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {settings.openrouter_api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": flask_request.url_root,
+            "X-Title": "Character Browser"
+        }
+        model = settings.openrouter_model or 'google/gemini-2.0-flash-001'
+    else:
+        # Local provider (Ollama or LMStudio)
+        if not settings.local_base_url:
+            raise ValueError(f"{settings.llm_provider.title()} Base URL not configured.")
+
+        url = f"{settings.local_base_url.rstrip('/')}/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        model = settings.local_model
+        if not model:
+            raise ValueError(f"No local model selected for {settings.llm_provider.title()}. Please select one in Settings.")
+
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt}
+    ]
+
+    max_turns = 10
+    use_tools = True
+    format_retries = 3  # retries allowed for unexpected response format
+
+    while max_turns > 0:
+        max_turns -= 1
+        data = {
+            "model": model,
+            "messages": messages,
+        }
+
+        # Only add tools if supported/requested
+        if use_tools:
+            data["tools"] = DANBOORU_TOOLS
+            data["tool_choice"] = "auto"
+
+        try:
+            response = requests.post(url, headers=headers, json=data)
+
+            # If 400 Bad Request and we were using tools, try once without tools
+            if response.status_code == 400 and use_tools:
+                print(f"LLM Provider {settings.llm_provider} rejected tools. Retrying without tool calling...")
+                use_tools = False
+                max_turns += 1 # Reset turn for the retry
+                continue
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Validate expected OpenAI-compatible response shape
+            if 'choices' not in result or not result['choices']:
+                raise KeyError('choices')
+
+            message = result['choices'][0].get('message')
+            if message is None:
+                raise KeyError('message')
+
+            if message.get('tool_calls'):
+                messages.append(message)
+                for tool_call in message['tool_calls']:
+                    name = tool_call['function']['name']
+                    args = json.loads(tool_call['function']['arguments'])
+                    print(f"Executing MCP tool: {name}({args})")
+                    tool_result = call_mcp_tool(name, args)
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tool_call['id'],
+                        "name": name,
+                        "content": tool_result
+                    })
+                continue
+
+            return message['content']
+        except requests.exceptions.RequestException as e:
+            error_body = ""
+            try: error_body = f" - Body: {response.text}"
+            except: pass
+            raise RuntimeError(f"LLM API request failed: {str(e)}{error_body}") from e
+        except (KeyError, IndexError) as e:
+            # Log the raw response to help diagnose the issue
+            raw = ""
+            try: raw = response.text[:500]
+            except: pass
+            print(f"Unexpected LLM response format (key={e}). Raw response: {raw}")
+            if format_retries > 0:
+                format_retries -= 1
+                max_turns += 1  # don't burn a turn on a format error
+                # Ask the model to try again with the correct format
+                messages.append({
+                    "role": "user",
+                    "content": (
+                        "Your previous response was not in the expected format. "
+                        "Please respond with valid JSON only, exactly as specified in the system prompt. "
+                        "Do not include any explanation or markdown — only the raw JSON object."
+                    )
+                })
+                print(f"Retrying after format error ({format_retries} retries left)…")
+                continue
+            raise RuntimeError(f"Unexpected LLM response format after retries: {str(e)}") from e
+
+    raise RuntimeError("LLM tool calling loop exceeded maximum turns")