character-browser/services/llm.py

import os
import json
import asyncio
import requests
from flask import has_request_context, request as flask_request
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from models import Settings

DANBOORU_TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "search_tags",
            "description": "Prefix/full-text search for Danbooru tags. Returns rich tag objects ordered by relevance.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "Search string. Trailing * added automatically."},
                    "limit": {"type": "integer", "description": "Max results (1-200)", "default": 20},
                    "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
                },
                "required": ["query"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "validate_tags",
            "description": "Exact-match validation for a list of tags. Splits into valid, deprecated, and invalid.",
            "parameters": {
                "type": "object",
                "properties": {
                    "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags to validate."}
                },
                "required": ["tags"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "suggest_tags",
            "description": "Autocomplete-style suggestions for a partial or approximate tag. Sorted by post count.",
            "parameters": {
                "type": "object",
                "properties": {
                    "partial": {"type": "string", "description": "Partial tag or rough approximation."},
                    "limit": {"type": "integer", "description": "Max suggestions (1-50)", "default": 10},
                    "category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
                },
                "required": ["partial"]
            }
        }
    }
]


async def _run_mcp_tool(name, arguments):
    server_params = StdioServerParameters(
        command="docker",
        args=["run", "--rm", "-i", "danbooru-mcp:latest"],
    )
    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            await session.initialize()
            result = await session.call_tool(name, arguments)
            return result.content[0].text


def call_mcp_tool(name, arguments):
    try:
        return asyncio.run(_run_mcp_tool(name, arguments))
    except Exception as e:
        print(f"MCP Tool Error: {e}")
        return json.dumps({"error": str(e)})


async def _run_character_mcp_tool(name, arguments):
    server_params = StdioServerParameters(
        command="docker",
        args=["run", "--rm", "-i",
               "-v", "character-cache:/root/.local/share/character_details",
               "character-mcp:latest"],
    )
    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            await session.initialize()
            result = await session.call_tool(name, arguments)
            return result.content[0].text


def call_character_mcp_tool(name, arguments):
    try:
        return asyncio.run(_run_character_mcp_tool(name, arguments))
    except Exception as e:
        print(f"Character MCP Tool Error: {e}")
        return None


def load_prompt(filename):
    path = os.path.join('data/prompts', filename)
    if os.path.exists(path):
        with open(path, 'r') as f:
            return f.read()
    return None


def call_llm(prompt, system_prompt="You are a creative assistant."):
    settings = Settings.query.first()
    if not settings:
        raise ValueError("Settings not configured.")

    is_local = settings.llm_provider != 'openrouter'

    if not is_local:
        if not settings.openrouter_api_key:
            raise ValueError("OpenRouter API Key not configured. Please configure it in Settings.")

        url = "https://openrouter.ai/api/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {settings.openrouter_api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": flask_request.url_root if has_request_context() else "http://localhost:5000/",
            "X-Title": "Character Browser"
        }
        model = settings.openrouter_model or 'google/gemini-2.0-flash-001'
    else:
        # Local provider (Ollama or LMStudio)
        if not settings.local_base_url:
            raise ValueError(f"{settings.llm_provider.title()} Base URL not configured.")

        url = f"{settings.local_base_url.rstrip('/')}/chat/completions"
        headers = {"Content-Type": "application/json"}
        model = settings.local_model
        if not model:
            raise ValueError(f"No local model selected for {settings.llm_provider.title()}. Please select one in Settings.")

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]

    max_turns = 15
    tool_turns_remaining = 8  # stop offering tools after this many tool-calling turns
    use_tools = True
    format_retries = 3  # retries allowed for unexpected response format

    while max_turns > 0:
        max_turns -= 1
        data = {
            "model": model,
            "messages": messages,
        }

        # Only add tools if supported/requested and we haven't exhausted tool turns
        if use_tools and tool_turns_remaining > 0:
            data["tools"] = DANBOORU_TOOLS
            data["tool_choice"] = "auto"

        try:
            response = requests.post(url, headers=headers, json=data, timeout=120)

            # If 400 Bad Request and we were using tools, try once without tools
            if response.status_code == 400 and use_tools:
                print(f"LLM Provider {settings.llm_provider} rejected tools. Retrying without tool calling...")
                use_tools = False
                max_turns += 1 # Reset turn for the retry
                continue

            response.raise_for_status()
            result = response.json()

            # Validate expected OpenAI-compatible response shape
            if 'choices' not in result or not result['choices']:
                raise KeyError('choices')

            message = result['choices'][0].get('message')
            if message is None:
                raise KeyError('message')

            if message.get('tool_calls'):
                tool_turns_remaining -= 1
                messages.append(message)
                for tool_call in message['tool_calls']:
                    name = tool_call['function']['name']
                    args = json.loads(tool_call['function']['arguments'])
                    print(f"Executing MCP tool: {name}({args})")
                    tool_result = call_mcp_tool(name, args)
                    messages.append({
                        "role": "tool",
                        "tool_call_id": tool_call['id'],
                        "name": name,
                        "content": tool_result
                    })
                if tool_turns_remaining <= 0:
                    print("Tool turn limit reached — next request will not offer tools")
                continue

            return message['content']
        except requests.exceptions.RequestException as e:
            error_body = ""
            try: error_body = f" - Body: {response.text}"
            except: pass
            raise RuntimeError(f"LLM API request failed: {str(e)}{error_body}") from e
        except (KeyError, IndexError) as e:
            # Log the raw response to help diagnose the issue
            raw = ""
            try: raw = response.text[:500]
            except: pass
            print(f"Unexpected LLM response format (key={e}). Raw response: {raw}")
            if format_retries > 0:
                format_retries -= 1
                max_turns += 1  # don't burn a turn on a format error
                # Ask the model to try again with the correct format
                messages.append({
                    "role": "user",
                    "content": (
                        "Your previous response was not in the expected format. "
                        "Please respond with valid JSON only, exactly as specified in the system prompt. "
                        "Do not include any explanation or markdown — only the raw JSON object."
                    )
                })
                print(f"Retrying after format error ({format_retries} retries left)…")
                continue
            raise RuntimeError(f"Unexpected LLM response format after retries: {str(e)}") from e

    raise RuntimeError("LLM tool calling loop exceeded maximum turns")