import os import json import asyncio import logging import requests from flask import has_request_context, request as flask_request from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client from models import Settings logger = logging.getLogger('gaze') DANBOORU_TOOLS = [ { "type": "function", "function": { "name": "search_tags", "description": "Prefix/full-text search for Danbooru tags. Returns rich tag objects ordered by relevance.", "parameters": { "type": "object", "properties": { "query": {"type": "string", "description": "Search string. Trailing * added automatically."}, "limit": {"type": "integer", "description": "Max results (1-200)", "default": 20}, "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."} }, "required": ["query"] } } }, { "type": "function", "function": { "name": "validate_tags", "description": "Exact-match validation for a list of tags. Splits into valid, deprecated, and invalid.", "parameters": { "type": "object", "properties": { "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags to validate."} }, "required": ["tags"] } } }, { "type": "function", "function": { "name": "suggest_tags", "description": "Autocomplete-style suggestions for a partial or approximate tag. Sorted by post count.", "parameters": { "type": "object", "properties": { "partial": {"type": "string", "description": "Partial tag or rough approximation."}, "limit": {"type": "integer", "description": "Max suggestions (1-50)", "default": 10}, "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."} }, "required": ["partial"] } } } ] async def _run_mcp_tool(name, arguments): server_params = StdioServerParameters( command="docker", args=["run", "--rm", "-i", "danbooru-mcp:latest"], ) async with stdio_client(server_params) as (read, write): async with ClientSession(read, write) as session: await session.initialize() result = await session.call_tool(name, arguments) return result.content[0].text def call_mcp_tool(name, arguments): try: return asyncio.run(_run_mcp_tool(name, arguments)) except Exception as e: logger.error("MCP Tool Error: %s", e) return json.dumps({"error": str(e)}) async def _run_character_mcp_tool(name, arguments): server_params = StdioServerParameters( command="docker", args=["run", "--rm", "-i", "-v", "character-cache:/root/.local/share/character_details", "character-mcp:latest"], ) async with stdio_client(server_params) as (read, write): async with ClientSession(read, write) as session: await session.initialize() result = await session.call_tool(name, arguments) return result.content[0].text def call_character_mcp_tool(name, arguments): try: return asyncio.run(_run_character_mcp_tool(name, arguments)) except Exception as e: logger.error("Character MCP Tool Error: %s", e) return None def load_prompt(filename): path = os.path.join('data/prompts', filename) if os.path.exists(path): with open(path, 'r') as f: return f.read() return None def call_llm(prompt, system_prompt="You are a creative assistant."): settings = Settings.query.first() if not settings: raise ValueError("Settings not configured.") is_local = settings.llm_provider != 'openrouter' if not is_local: if not settings.openrouter_api_key: raise ValueError("OpenRouter API Key not configured. Please configure it in Settings.") url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {settings.openrouter_api_key}", "Content-Type": "application/json", "HTTP-Referer": flask_request.url_root if has_request_context() else "http://localhost:5000/", "X-Title": "Character Browser" } model = settings.openrouter_model or 'google/gemini-2.0-flash-001' else: # Local provider (Ollama or LMStudio) if not settings.local_base_url: raise ValueError(f"{settings.llm_provider.title()} Base URL not configured.") url = f"{settings.local_base_url.rstrip('/')}/chat/completions" headers = {"Content-Type": "application/json"} model = settings.local_model if not model: raise ValueError(f"No local model selected for {settings.llm_provider.title()}. Please select one in Settings.") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ] max_turns = 15 tool_turns_remaining = 8 # stop offering tools after this many tool-calling turns use_tools = True format_retries = 3 # retries allowed for unexpected response format while max_turns > 0: max_turns -= 1 data = { "model": model, "messages": messages, } # Only add tools if supported/requested and we haven't exhausted tool turns if use_tools and tool_turns_remaining > 0: data["tools"] = DANBOORU_TOOLS data["tool_choice"] = "auto" try: response = requests.post(url, headers=headers, json=data, timeout=120) # If 400 Bad Request and we were using tools, try once without tools if response.status_code == 400 and use_tools: logger.warning("LLM Provider %s rejected tools. Retrying without tool calling...", settings.llm_provider) use_tools = False max_turns += 1 # Reset turn for the retry continue response.raise_for_status() result = response.json() # Validate expected OpenAI-compatible response shape if 'choices' not in result or not result['choices']: raise KeyError('choices') message = result['choices'][0].get('message') if message is None: raise KeyError('message') if message.get('tool_calls'): tool_turns_remaining -= 1 messages.append(message) for tool_call in message['tool_calls']: name = tool_call['function']['name'] args = json.loads(tool_call['function']['arguments']) logger.debug("Executing MCP tool: %s(%s)", name, args) tool_result = call_mcp_tool(name, args) messages.append({ "role": "tool", "tool_call_id": tool_call['id'], "name": name, "content": tool_result }) if tool_turns_remaining <= 0: logger.warning("Tool turn limit reached — next request will not offer tools") continue return message['content'] except requests.exceptions.RequestException as e: error_body = "" try: error_body = f" - Body: {response.text}" except Exception: pass raise RuntimeError(f"LLM API request failed: {str(e)}{error_body}") from e except (KeyError, IndexError) as e: # Log the raw response to help diagnose the issue raw = "" try: raw = response.text[:500] except Exception: pass logger.warning("Unexpected LLM response format (key=%s). Raw response: %s", e, raw) if format_retries > 0: format_retries -= 1 max_turns += 1 # don't burn a turn on a format error # Ask the model to try again with the correct format messages.append({ "role": "user", "content": ( "Your previous response was not in the expected format. " "Please respond with valid JSON only, exactly as specified in the system prompt. " "Do not include any explanation or markdown — only the raw JSON object." ) }) logger.info("Retrying after format error (%d retries left)…", format_retries) continue raise RuntimeError(f"Unexpected LLM response format after retries: {str(e)}") from e raise RuntimeError("LLM tool calling loop exceeded maximum turns")