Add extra prompts, endless generation, random character default, and small fixes
- Add extra positive/negative prompt textareas to all 9 detail pages with session persistence - Add Endless generation button to all detail pages (continuous preview generation until stopped) - Default character selector to "Random Character" on all secondary detail pages - Fix queue clear endpoint (remove spurious auth check) - Refactor app.py into routes/ and services/ modules - Update CLAUDE.md with new architecture documentation - Various data file updates and cleanup Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
203
services/llm.py
Normal file
203
services/llm.py
Normal file
@@ -0,0 +1,203 @@
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
import requests
|
||||
from flask import request as flask_request
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
from mcp.client.stdio import stdio_client
|
||||
from models import Settings
|
||||
|
||||
DANBOORU_TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_tags",
|
||||
"description": "Prefix/full-text search for Danbooru tags. Returns rich tag objects ordered by relevance.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "Search string. Trailing * added automatically."},
|
||||
"limit": {"type": "integer", "description": "Max results (1-200)", "default": 20},
|
||||
"category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "validate_tags",
|
||||
"description": "Exact-match validation for a list of tags. Splits into valid, deprecated, and invalid.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags to validate."}
|
||||
},
|
||||
"required": ["tags"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "suggest_tags",
|
||||
"description": "Autocomplete-style suggestions for a partial or approximate tag. Sorted by post count.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"partial": {"type": "string", "description": "Partial tag or rough approximation."},
|
||||
"limit": {"type": "integer", "description": "Max suggestions (1-50)", "default": 10},
|
||||
"category": {"type": "string", "enum": ["general", "artist", "copyright", "character", "meta"], "description": "Optional category filter."}
|
||||
},
|
||||
"required": ["partial"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
async def _run_mcp_tool(name, arguments):
|
||||
server_params = StdioServerParameters(
|
||||
command="docker",
|
||||
args=["run", "--rm", "-i", "danbooru-mcp:latest"],
|
||||
)
|
||||
async with stdio_client(server_params) as (read, write):
|
||||
async with ClientSession(read, write) as session:
|
||||
await session.initialize()
|
||||
result = await session.call_tool(name, arguments)
|
||||
return result.content[0].text
|
||||
|
||||
|
||||
def call_mcp_tool(name, arguments):
|
||||
try:
|
||||
return asyncio.run(_run_mcp_tool(name, arguments))
|
||||
except Exception as e:
|
||||
print(f"MCP Tool Error: {e}")
|
||||
return json.dumps({"error": str(e)})
|
||||
|
||||
|
||||
def load_prompt(filename):
|
||||
path = os.path.join('data/prompts', filename)
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r') as f:
|
||||
return f.read()
|
||||
return None
|
||||
|
||||
|
||||
def call_llm(prompt, system_prompt="You are a creative assistant."):
|
||||
settings = Settings.query.first()
|
||||
if not settings:
|
||||
raise ValueError("Settings not configured.")
|
||||
|
||||
is_local = settings.llm_provider != 'openrouter'
|
||||
|
||||
if not is_local:
|
||||
if not settings.openrouter_api_key:
|
||||
raise ValueError("OpenRouter API Key not configured. Please configure it in Settings.")
|
||||
|
||||
url = "https://openrouter.ai/api/v1/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": flask_request.url_root,
|
||||
"X-Title": "Character Browser"
|
||||
}
|
||||
model = settings.openrouter_model or 'google/gemini-2.0-flash-001'
|
||||
else:
|
||||
# Local provider (Ollama or LMStudio)
|
||||
if not settings.local_base_url:
|
||||
raise ValueError(f"{settings.llm_provider.title()} Base URL not configured.")
|
||||
|
||||
url = f"{settings.local_base_url.rstrip('/')}/chat/completions"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
model = settings.local_model
|
||||
if not model:
|
||||
raise ValueError(f"No local model selected for {settings.llm_provider.title()}. Please select one in Settings.")
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
max_turns = 10
|
||||
use_tools = True
|
||||
format_retries = 3 # retries allowed for unexpected response format
|
||||
|
||||
while max_turns > 0:
|
||||
max_turns -= 1
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
}
|
||||
|
||||
# Only add tools if supported/requested
|
||||
if use_tools:
|
||||
data["tools"] = DANBOORU_TOOLS
|
||||
data["tool_choice"] = "auto"
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
|
||||
# If 400 Bad Request and we were using tools, try once without tools
|
||||
if response.status_code == 400 and use_tools:
|
||||
print(f"LLM Provider {settings.llm_provider} rejected tools. Retrying without tool calling...")
|
||||
use_tools = False
|
||||
max_turns += 1 # Reset turn for the retry
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Validate expected OpenAI-compatible response shape
|
||||
if 'choices' not in result or not result['choices']:
|
||||
raise KeyError('choices')
|
||||
|
||||
message = result['choices'][0].get('message')
|
||||
if message is None:
|
||||
raise KeyError('message')
|
||||
|
||||
if message.get('tool_calls'):
|
||||
messages.append(message)
|
||||
for tool_call in message['tool_calls']:
|
||||
name = tool_call['function']['name']
|
||||
args = json.loads(tool_call['function']['arguments'])
|
||||
print(f"Executing MCP tool: {name}({args})")
|
||||
tool_result = call_mcp_tool(name, args)
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call['id'],
|
||||
"name": name,
|
||||
"content": tool_result
|
||||
})
|
||||
continue
|
||||
|
||||
return message['content']
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_body = ""
|
||||
try: error_body = f" - Body: {response.text}"
|
||||
except: pass
|
||||
raise RuntimeError(f"LLM API request failed: {str(e)}{error_body}") from e
|
||||
except (KeyError, IndexError) as e:
|
||||
# Log the raw response to help diagnose the issue
|
||||
raw = ""
|
||||
try: raw = response.text[:500]
|
||||
except: pass
|
||||
print(f"Unexpected LLM response format (key={e}). Raw response: {raw}")
|
||||
if format_retries > 0:
|
||||
format_retries -= 1
|
||||
max_turns += 1 # don't burn a turn on a format error
|
||||
# Ask the model to try again with the correct format
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Your previous response was not in the expected format. "
|
||||
"Please respond with valid JSON only, exactly as specified in the system prompt. "
|
||||
"Do not include any explanation or markdown — only the raw JSON object."
|
||||
)
|
||||
})
|
||||
print(f"Retrying after format error ({format_retries} retries left)…")
|
||||
continue
|
||||
raise RuntimeError(f"Unexpected LLM response format after retries: {str(e)}") from e
|
||||
|
||||
raise RuntimeError("LLM tool calling loop exceeded maximum turns")
|
||||
Reference in New Issue
Block a user