SQLite + sqlite-vec replaces JSON memory files with semantic search, follow-up injection, privacy levels, and lifecycle management. Six prompt styles (quick/standard/creative/roleplayer/game-master/storyteller) with per-style Claude model tiering (Haiku/Sonnet/Opus), temperature control, and section stripping. Characters can set default style and per-style overrides. Dream character import and GAZE character linking in the dashboard editor with auto-populated fields, cover image resolution, and preset assignment. Bridge: session isolation (conversation_id / 12h satellite buckets), model routing refactor, PUT/DELETE support, memory REST endpoints. Dashboard: mobile-responsive sidebar, retry button, style picker in chat, follow-up banner, memory lifecycle/privacy UI, cloud model options in editor. Wyoming TTS: upgraded to v1.8.0 for HA 1.7.2 compatibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
866 lines
28 KiB
Python
866 lines
28 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
HomeAI Memory Store — SQLite + Vector Search
|
|
|
|
Replaces flat JSON memory files with a structured SQLite database
|
|
using sqlite-vec for semantic similarity search.
|
|
|
|
Used by:
|
|
- openclaw-http-bridge.py (memory retrieval + follow-up injection)
|
|
- memory-ctl skill (CLI memory management)
|
|
- Dashboard API (REST endpoints via bridge)
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import struct
|
|
import time
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import sqlite_vec
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
DATA_DIR = Path(os.environ.get("DATA_DIR", os.path.expanduser("~/homeai-data")))
|
|
MEMORIES_DIR = DATA_DIR / "memories"
|
|
DB_PATH = MEMORIES_DIR / "memories.db"
|
|
EMBEDDING_DIM = 384 # all-MiniLM-L6-v2
|
|
|
|
# Privacy keywords for rule-based classification
|
|
PRIVACY_KEYWORDS = {
|
|
"local_only": [
|
|
"health", "illness", "sick", "doctor", "medical", "medication", "surgery",
|
|
"salary", "bank", "financial", "debt", "mortgage", "tax",
|
|
"depression", "anxiety", "therapy", "divorce", "breakup",
|
|
],
|
|
"sensitive": [
|
|
"address", "phone", "email", "password", "birthday",
|
|
],
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Embedding model (lazy-loaded singleton)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_embedder = None
|
|
|
|
|
|
def _get_embedder():
|
|
"""Lazy-load the sentence-transformers model."""
|
|
global _embedder
|
|
if _embedder is None:
|
|
from sentence_transformers import SentenceTransformer
|
|
_embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
|
return _embedder
|
|
|
|
|
|
def get_embedding(text: str) -> list[float]:
|
|
"""Compute a 384-dim embedding for the given text."""
|
|
model = _get_embedder()
|
|
vec = model.encode(text, normalize_embeddings=True)
|
|
return vec.tolist()
|
|
|
|
|
|
def _serialize_f32(vec: list[float]) -> bytes:
|
|
"""Serialize a float list to little-endian bytes for sqlite-vec."""
|
|
return struct.pack(f"<{len(vec)}f", *vec)
|
|
|
|
|
|
def _deserialize_f32(blob: bytes) -> list[float]:
|
|
"""Deserialize sqlite-vec float bytes back to a list."""
|
|
n = len(blob) // 4
|
|
return list(struct.unpack(f"<{n}f", blob))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Database initialization
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_db: Optional[sqlite3.Connection] = None
|
|
|
|
|
|
def init_db() -> sqlite3.Connection:
|
|
"""Initialize the SQLite database with schema and sqlite-vec extension."""
|
|
global _db
|
|
if _db is not None:
|
|
return _db
|
|
|
|
MEMORIES_DIR.mkdir(parents=True, exist_ok=True)
|
|
db = sqlite3.connect(str(DB_PATH), check_same_thread=False)
|
|
db.enable_load_extension(True)
|
|
sqlite_vec.load(db)
|
|
db.enable_load_extension(False)
|
|
db.row_factory = sqlite3.Row
|
|
|
|
db.executescript("""
|
|
CREATE TABLE IF NOT EXISTS memories (
|
|
id TEXT PRIMARY KEY,
|
|
character_id TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
memory_type TEXT NOT NULL DEFAULT 'semantic',
|
|
category TEXT NOT NULL DEFAULT 'other',
|
|
privacy_level TEXT NOT NULL DEFAULT 'standard',
|
|
importance REAL NOT NULL DEFAULT 0.5,
|
|
lifecycle_state TEXT NOT NULL DEFAULT 'active',
|
|
follow_up_due TEXT,
|
|
follow_up_context TEXT,
|
|
source TEXT DEFAULT 'user_explicit',
|
|
created_at TEXT NOT NULL,
|
|
last_accessed TEXT,
|
|
expires_at TEXT,
|
|
previous_value TEXT,
|
|
tags TEXT,
|
|
surfaced_count INTEGER DEFAULT 0
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_memories_character
|
|
ON memories(character_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_lifecycle
|
|
ON memories(lifecycle_state);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_type
|
|
ON memories(memory_type);
|
|
""")
|
|
|
|
# Create the vec0 virtual table for vector search
|
|
# sqlite-vec requires this specific syntax
|
|
db.execute(f"""
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings USING vec0(
|
|
id TEXT PRIMARY KEY,
|
|
embedding float[{EMBEDDING_DIM}]
|
|
)
|
|
""")
|
|
|
|
# Partial index for follow-ups (created manually since executescript can't
|
|
# handle IF NOT EXISTS for partial indexes cleanly on all versions)
|
|
try:
|
|
db.execute("""
|
|
CREATE INDEX idx_memories_followup
|
|
ON memories(lifecycle_state, follow_up_due)
|
|
WHERE lifecycle_state = 'pending_followup'
|
|
""")
|
|
except sqlite3.OperationalError:
|
|
pass # index already exists
|
|
|
|
db.commit()
|
|
_db = db
|
|
return db
|
|
|
|
|
|
def _get_db() -> sqlite3.Connection:
|
|
"""Get or initialize the database connection."""
|
|
if _db is None:
|
|
return init_db()
|
|
return _db
|
|
|
|
|
|
def _row_to_dict(row: sqlite3.Row) -> dict:
|
|
"""Convert a sqlite3.Row to a plain dict."""
|
|
return dict(row)
|
|
|
|
|
|
def _generate_id() -> str:
|
|
"""Generate a unique memory ID."""
|
|
return f"m_{int(time.time() * 1000)}"
|
|
|
|
|
|
def _now_iso() -> str:
|
|
"""Current UTC time as ISO string."""
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Write-time classification (rule-based, Phase 1)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def classify_memory(content: str) -> dict:
|
|
"""Rule-based classification for memory properties.
|
|
Returns defaults that can be overridden by explicit parameters."""
|
|
content_lower = content.lower()
|
|
|
|
# Privacy detection
|
|
privacy = "standard"
|
|
for level, keywords in PRIVACY_KEYWORDS.items():
|
|
if any(kw in content_lower for kw in keywords):
|
|
privacy = level
|
|
break
|
|
|
|
# Memory type detection
|
|
memory_type = "semantic"
|
|
temporal_markers = [
|
|
"today", "yesterday", "tonight", "this morning", "just now",
|
|
"feeling", "right now", "this week", "earlier",
|
|
]
|
|
if any(kw in content_lower for kw in temporal_markers):
|
|
memory_type = "episodic"
|
|
|
|
# Importance heuristic
|
|
importance = 0.5
|
|
if privacy == "local_only":
|
|
importance = 0.7
|
|
elif privacy == "sensitive":
|
|
importance = 0.6
|
|
|
|
return {
|
|
"memory_type": memory_type,
|
|
"privacy_level": privacy,
|
|
"importance": importance,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CRUD operations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def add_memory(
|
|
character_id: str,
|
|
content: str,
|
|
memory_type: str | None = None,
|
|
category: str = "other",
|
|
importance: float | None = None,
|
|
privacy_level: str | None = None,
|
|
tags: list[str] | None = None,
|
|
follow_up_due: str | None = None,
|
|
follow_up_context: str | None = None,
|
|
source: str = "user_explicit",
|
|
expires_at: str | None = None,
|
|
) -> dict:
|
|
"""Add a new memory record. Auto-classifies fields not explicitly set."""
|
|
db = _get_db()
|
|
classified = classify_memory(content)
|
|
|
|
memory_type = memory_type or classified["memory_type"]
|
|
privacy_level = privacy_level or classified["privacy_level"]
|
|
importance = importance if importance is not None else classified["importance"]
|
|
|
|
lifecycle_state = "active"
|
|
if follow_up_due or follow_up_context:
|
|
lifecycle_state = "pending_followup"
|
|
if not follow_up_due:
|
|
follow_up_due = "next_interaction"
|
|
|
|
mem_id = _generate_id()
|
|
now = _now_iso()
|
|
|
|
# Generate embedding
|
|
embedding = get_embedding(content)
|
|
|
|
db.execute("""
|
|
INSERT INTO memories (
|
|
id, character_id, content, memory_type, category,
|
|
privacy_level, importance, lifecycle_state,
|
|
follow_up_due, follow_up_context, source,
|
|
created_at, tags, surfaced_count
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
|
|
""", (
|
|
mem_id, character_id, content, memory_type, category,
|
|
privacy_level, importance, lifecycle_state,
|
|
follow_up_due, follow_up_context, source,
|
|
now, json.dumps(tags) if tags else None,
|
|
))
|
|
|
|
# Insert embedding into vec0 table
|
|
db.execute(
|
|
"INSERT INTO memory_embeddings (id, embedding) VALUES (?, ?)",
|
|
(mem_id, _serialize_f32(embedding)),
|
|
)
|
|
|
|
db.commit()
|
|
|
|
return {
|
|
"id": mem_id,
|
|
"character_id": character_id,
|
|
"content": content,
|
|
"memory_type": memory_type,
|
|
"category": category,
|
|
"privacy_level": privacy_level,
|
|
"importance": importance,
|
|
"lifecycle_state": lifecycle_state,
|
|
"follow_up_due": follow_up_due,
|
|
"follow_up_context": follow_up_context,
|
|
"source": source,
|
|
"created_at": now,
|
|
"tags": tags,
|
|
}
|
|
|
|
|
|
def update_memory(memory_id: str, **fields) -> dict | None:
|
|
"""Update specific fields on a memory record."""
|
|
db = _get_db()
|
|
|
|
# Validate that memory exists
|
|
row = db.execute("SELECT * FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
if not row:
|
|
return None
|
|
|
|
allowed = {
|
|
"content", "memory_type", "category", "privacy_level", "importance",
|
|
"lifecycle_state", "follow_up_due", "follow_up_context", "source",
|
|
"last_accessed", "expires_at", "previous_value", "tags", "surfaced_count",
|
|
}
|
|
updates = {k: v for k, v in fields.items() if k in allowed}
|
|
if not updates:
|
|
return _row_to_dict(row)
|
|
|
|
# If content changed, update embedding and store previous value
|
|
if "content" in updates:
|
|
updates["previous_value"] = row["content"]
|
|
embedding = get_embedding(updates["content"])
|
|
# Update vec0 table: delete old, insert new
|
|
db.execute("DELETE FROM memory_embeddings WHERE id = ?", (memory_id,))
|
|
db.execute(
|
|
"INSERT INTO memory_embeddings (id, embedding) VALUES (?, ?)",
|
|
(memory_id, _serialize_f32(embedding)),
|
|
)
|
|
|
|
if "tags" in updates and isinstance(updates["tags"], list):
|
|
updates["tags"] = json.dumps(updates["tags"])
|
|
|
|
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
|
values = list(updates.values()) + [memory_id]
|
|
db.execute(f"UPDATE memories SET {set_clause} WHERE id = ?", values)
|
|
db.commit()
|
|
|
|
row = db.execute("SELECT * FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
return _row_to_dict(row) if row else None
|
|
|
|
|
|
def delete_memory(memory_id: str) -> bool:
|
|
"""Delete a memory record and its embedding."""
|
|
db = _get_db()
|
|
row = db.execute("SELECT id FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
if not row:
|
|
return False
|
|
db.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
|
|
db.execute("DELETE FROM memory_embeddings WHERE id = ?", (memory_id,))
|
|
db.commit()
|
|
return True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Retrieval
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def retrieve_memories(
|
|
character_id: str,
|
|
context_text: str = "",
|
|
limit: int = 20,
|
|
exclude_private_for_cloud: bool = False,
|
|
) -> list[dict]:
|
|
"""Dual retrieval: semantic similarity + recency, merged and ranked.
|
|
|
|
If context_text is empty, falls back to recency-only retrieval.
|
|
"""
|
|
db = _get_db()
|
|
|
|
privacy_filter = ""
|
|
if exclude_private_for_cloud:
|
|
privacy_filter = "AND m.privacy_level != 'local_only'"
|
|
|
|
# Always include high-importance memories
|
|
high_importance = db.execute(f"""
|
|
SELECT * FROM memories m
|
|
WHERE m.character_id = ?
|
|
AND m.lifecycle_state IN ('active', 'pending_followup')
|
|
AND m.importance > 0.8
|
|
{privacy_filter}
|
|
ORDER BY m.created_at DESC
|
|
LIMIT 5
|
|
""", (character_id,)).fetchall()
|
|
|
|
seen_ids = {r["id"] for r in high_importance}
|
|
results = {r["id"]: {**_row_to_dict(r), "_score": 1.0} for r in high_importance}
|
|
|
|
# Semantic search (if context provided and embeddings exist)
|
|
if context_text:
|
|
try:
|
|
query_emb = get_embedding(context_text)
|
|
vec_rows = db.execute("""
|
|
SELECT id, distance
|
|
FROM memory_embeddings
|
|
WHERE embedding MATCH ?
|
|
AND k = 30
|
|
""", (_serialize_f32(query_emb),)).fetchall()
|
|
|
|
vec_ids = [r["id"] for r in vec_rows if r["id"] not in seen_ids]
|
|
vec_distances = {r["id"]: r["distance"] for r in vec_rows}
|
|
|
|
if vec_ids:
|
|
placeholders = ",".join("?" * len(vec_ids))
|
|
sem_rows = db.execute(f"""
|
|
SELECT * FROM memories m
|
|
WHERE m.id IN ({placeholders})
|
|
AND m.character_id = ?
|
|
AND m.lifecycle_state IN ('active', 'pending_followup')
|
|
{privacy_filter}
|
|
""", (*vec_ids, character_id)).fetchall()
|
|
|
|
for r in sem_rows:
|
|
d = _row_to_dict(r)
|
|
# Convert cosine distance to similarity (sqlite-vec returns L2 distance for vec0)
|
|
dist = vec_distances.get(r["id"], 1.0)
|
|
semantic_score = max(0.0, 1.0 - dist)
|
|
d["_score"] = 0.6 * semantic_score + 0.1 * d["importance"]
|
|
results[r["id"]] = d
|
|
seen_ids.add(r["id"])
|
|
except Exception as e:
|
|
print(f"[MemoryStore] Vector search error: {e}")
|
|
|
|
# Recency search: last 7 days, ordered by importance + recency
|
|
recency_rows = db.execute(f"""
|
|
SELECT * FROM memories m
|
|
WHERE m.character_id = ?
|
|
AND m.lifecycle_state IN ('active', 'pending_followup')
|
|
AND m.created_at > datetime('now', '-7 days')
|
|
{privacy_filter}
|
|
ORDER BY m.importance DESC, m.created_at DESC
|
|
LIMIT 10
|
|
""", (character_id,)).fetchall()
|
|
|
|
for r in recency_rows:
|
|
if r["id"] not in seen_ids:
|
|
d = _row_to_dict(r)
|
|
# Recency score based on age in days (newer = higher)
|
|
try:
|
|
created = datetime.fromisoformat(d["created_at"])
|
|
age_days = (datetime.now(timezone.utc) - created).total_seconds() / 86400
|
|
recency_score = max(0.0, 1.0 - (age_days / 7.0))
|
|
except (ValueError, TypeError):
|
|
recency_score = 0.5
|
|
d["_score"] = 0.3 * recency_score + 0.1 * d["importance"]
|
|
results[r["id"]] = d
|
|
seen_ids.add(r["id"])
|
|
|
|
# Sort by score descending, return top N
|
|
ranked = sorted(results.values(), key=lambda x: x.get("_score", 0), reverse=True)
|
|
|
|
# Update last_accessed for returned memories
|
|
returned = ranked[:limit]
|
|
now = _now_iso()
|
|
for mem in returned:
|
|
mem.pop("_score", None)
|
|
db.execute(
|
|
"UPDATE memories SET last_accessed = ? WHERE id = ?",
|
|
(now, mem["id"]),
|
|
)
|
|
db.commit()
|
|
|
|
return returned
|
|
|
|
|
|
def get_pending_followups(character_id: str) -> list[dict]:
|
|
"""Get follow-up memories that are due for surfacing."""
|
|
db = _get_db()
|
|
now = _now_iso()
|
|
|
|
rows = db.execute("""
|
|
SELECT * FROM memories
|
|
WHERE character_id = ?
|
|
AND lifecycle_state = 'pending_followup'
|
|
AND (follow_up_due <= ? OR follow_up_due = 'next_interaction')
|
|
ORDER BY importance DESC, created_at DESC
|
|
LIMIT 5
|
|
""", (character_id, now)).fetchall()
|
|
|
|
return [_row_to_dict(r) for r in rows]
|
|
|
|
|
|
def search_memories(
|
|
character_id: str,
|
|
query: str,
|
|
memory_type: str | None = None,
|
|
limit: int = 10,
|
|
) -> list[dict]:
|
|
"""Semantic search for memories matching a query."""
|
|
db = _get_db()
|
|
|
|
query_emb = get_embedding(query)
|
|
vec_rows = db.execute("""
|
|
SELECT id, distance
|
|
FROM memory_embeddings
|
|
WHERE embedding MATCH ?
|
|
AND k = ?
|
|
""", (_serialize_f32(query_emb), limit * 3)).fetchall()
|
|
|
|
if not vec_rows:
|
|
return []
|
|
|
|
vec_ids = [r["id"] for r in vec_rows]
|
|
vec_distances = {r["id"]: r["distance"] for r in vec_rows}
|
|
placeholders = ",".join("?" * len(vec_ids))
|
|
|
|
type_filter = "AND m.memory_type = ?" if memory_type else ""
|
|
params = [*vec_ids, character_id]
|
|
if memory_type:
|
|
params.append(memory_type)
|
|
|
|
rows = db.execute(f"""
|
|
SELECT * FROM memories m
|
|
WHERE m.id IN ({placeholders})
|
|
AND m.character_id = ?
|
|
{type_filter}
|
|
ORDER BY m.created_at DESC
|
|
""", params).fetchall()
|
|
|
|
# Sort by similarity
|
|
results = []
|
|
for r in rows:
|
|
d = _row_to_dict(r)
|
|
d["_distance"] = vec_distances.get(r["id"], 1.0)
|
|
results.append(d)
|
|
results.sort(key=lambda x: x["_distance"])
|
|
|
|
for r in results:
|
|
r.pop("_distance", None)
|
|
|
|
return results[:limit]
|
|
|
|
|
|
def list_memories(
|
|
character_id: str,
|
|
memory_type: str | None = None,
|
|
lifecycle_state: str | None = None,
|
|
category: str | None = None,
|
|
limit: int = 20,
|
|
offset: int = 0,
|
|
) -> list[dict]:
|
|
"""List memories with optional filters."""
|
|
db = _get_db()
|
|
|
|
conditions = ["character_id = ?"]
|
|
params: list = [character_id]
|
|
|
|
if memory_type:
|
|
conditions.append("memory_type = ?")
|
|
params.append(memory_type)
|
|
if lifecycle_state:
|
|
conditions.append("lifecycle_state = ?")
|
|
params.append(lifecycle_state)
|
|
if category:
|
|
conditions.append("category = ?")
|
|
params.append(category)
|
|
|
|
where = " AND ".join(conditions)
|
|
params.extend([limit, offset])
|
|
|
|
rows = db.execute(f"""
|
|
SELECT * FROM memories
|
|
WHERE {where}
|
|
ORDER BY created_at DESC
|
|
LIMIT ? OFFSET ?
|
|
""", params).fetchall()
|
|
|
|
return [_row_to_dict(r) for r in rows]
|
|
|
|
|
|
def count_memories(character_id: str) -> int:
|
|
"""Count memories for a character."""
|
|
db = _get_db()
|
|
row = db.execute(
|
|
"SELECT COUNT(*) as cnt FROM memories WHERE character_id = ?",
|
|
(character_id,),
|
|
).fetchone()
|
|
return row["cnt"] if row else 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Lifecycle management
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def resolve_followup(memory_id: str) -> bool:
|
|
"""Mark a follow-up as resolved."""
|
|
db = _get_db()
|
|
result = db.execute("""
|
|
UPDATE memories
|
|
SET lifecycle_state = 'resolved',
|
|
follow_up_due = NULL
|
|
WHERE id = ? AND lifecycle_state = 'pending_followup'
|
|
""", (memory_id,))
|
|
db.commit()
|
|
return result.rowcount > 0
|
|
|
|
|
|
def archive_memory(memory_id: str) -> bool:
|
|
"""Archive a memory (keeps it for relational inference, not surfaced)."""
|
|
db = _get_db()
|
|
result = db.execute("""
|
|
UPDATE memories
|
|
SET lifecycle_state = 'archived'
|
|
WHERE id = ?
|
|
""", (memory_id,))
|
|
db.commit()
|
|
return result.rowcount > 0
|
|
|
|
|
|
def auto_resolve_expired_followups() -> int:
|
|
"""Auto-resolve follow-ups that are more than 48h past due."""
|
|
db = _get_db()
|
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=48)).isoformat()
|
|
result = db.execute("""
|
|
UPDATE memories
|
|
SET lifecycle_state = 'resolved',
|
|
follow_up_due = NULL
|
|
WHERE lifecycle_state = 'pending_followup'
|
|
AND follow_up_due != 'next_interaction'
|
|
AND follow_up_due < ?
|
|
""", (cutoff,))
|
|
db.commit()
|
|
return result.rowcount
|
|
|
|
|
|
def auto_archive_old_resolved() -> int:
|
|
"""Archive resolved memories older than 7 days."""
|
|
db = _get_db()
|
|
cutoff = (datetime.now(timezone.utc) - timedelta(days=7)).isoformat()
|
|
result = db.execute("""
|
|
UPDATE memories
|
|
SET lifecycle_state = 'archived'
|
|
WHERE lifecycle_state = 'resolved'
|
|
AND created_at < ?
|
|
""", (cutoff,))
|
|
db.commit()
|
|
return result.rowcount
|
|
|
|
|
|
def increment_surfaced_count(memory_id: str) -> int:
|
|
"""Increment surfaced_count and return new value. Auto-resolves if >= 1."""
|
|
db = _get_db()
|
|
row = db.execute(
|
|
"SELECT surfaced_count FROM memories WHERE id = ?", (memory_id,)
|
|
).fetchone()
|
|
if not row:
|
|
return 0
|
|
|
|
new_count = (row["surfaced_count"] or 0) + 1
|
|
if new_count >= 2:
|
|
# Auto-resolve: surfaced twice without user engagement
|
|
db.execute("""
|
|
UPDATE memories
|
|
SET surfaced_count = ?, lifecycle_state = 'resolved', follow_up_due = NULL
|
|
WHERE id = ?
|
|
""", (new_count, memory_id))
|
|
else:
|
|
# Update next_interaction to actual timestamp so the 48h timer starts
|
|
db.execute("""
|
|
UPDATE memories
|
|
SET surfaced_count = ?,
|
|
follow_up_due = CASE
|
|
WHEN follow_up_due = 'next_interaction' THEN ?
|
|
ELSE follow_up_due
|
|
END
|
|
WHERE id = ?
|
|
""", (new_count, _now_iso(), memory_id))
|
|
db.commit()
|
|
return new_count
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Deduplication
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def find_similar(
|
|
character_id: str,
|
|
content: str,
|
|
memory_type: str = "semantic",
|
|
threshold: float = 0.85,
|
|
) -> dict | None:
|
|
"""Find an existing memory that is semantically similar (>threshold).
|
|
Returns the matching memory dict or None."""
|
|
db = _get_db()
|
|
query_emb = get_embedding(content)
|
|
|
|
vec_rows = db.execute("""
|
|
SELECT id, distance
|
|
FROM memory_embeddings
|
|
WHERE embedding MATCH ?
|
|
AND k = 5
|
|
""", (_serialize_f32(query_emb),)).fetchall()
|
|
|
|
for vr in vec_rows:
|
|
similarity = max(0.0, 1.0 - vr["distance"])
|
|
if similarity >= threshold:
|
|
row = db.execute("""
|
|
SELECT * FROM memories
|
|
WHERE id = ? AND character_id = ? AND memory_type = ?
|
|
AND lifecycle_state = 'active'
|
|
""", (vr["id"], character_id, memory_type)).fetchone()
|
|
if row:
|
|
return _row_to_dict(row)
|
|
|
|
return None
|
|
|
|
|
|
def add_or_merge_memory(
|
|
character_id: str,
|
|
content: str,
|
|
memory_type: str | None = None,
|
|
category: str = "other",
|
|
importance: float | None = None,
|
|
privacy_level: str | None = None,
|
|
tags: list[str] | None = None,
|
|
follow_up_due: str | None = None,
|
|
follow_up_context: str | None = None,
|
|
source: str = "user_explicit",
|
|
expires_at: str | None = None,
|
|
dedup_threshold: float = 0.85,
|
|
) -> dict:
|
|
"""Add a memory, or merge with an existing similar one (semantic dedup).
|
|
For semantic memories, if a similar one exists (>threshold), update it
|
|
instead of creating a new record."""
|
|
resolved_type = memory_type or classify_memory(content)["memory_type"]
|
|
|
|
if resolved_type == "semantic":
|
|
existing = find_similar(character_id, content, "semantic", dedup_threshold)
|
|
if existing:
|
|
updated = update_memory(existing["id"], content=content)
|
|
if updated:
|
|
return updated
|
|
|
|
return add_memory(
|
|
character_id=character_id,
|
|
content=content,
|
|
memory_type=memory_type,
|
|
category=category,
|
|
importance=importance,
|
|
privacy_level=privacy_level,
|
|
tags=tags,
|
|
follow_up_due=follow_up_due,
|
|
follow_up_context=follow_up_context,
|
|
source=source,
|
|
expires_at=expires_at,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Migration from JSON
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Mapping from old JSON categories to new memory types
|
|
_CATEGORY_TO_TYPE = {
|
|
"preference": "semantic",
|
|
"personal_info": "semantic",
|
|
"interaction": "episodic",
|
|
"emotional": "episodic",
|
|
"system": "semantic",
|
|
"tool_usage": "semantic",
|
|
"home_layout": "semantic",
|
|
"device": "semantic",
|
|
"routine": "semantic",
|
|
"other": "semantic",
|
|
}
|
|
|
|
_CATEGORY_TO_IMPORTANCE = {
|
|
"personal_info": 0.7,
|
|
"preference": 0.6,
|
|
"emotional": 0.5,
|
|
"interaction": 0.4,
|
|
"system": 0.4,
|
|
"tool_usage": 0.3,
|
|
"home_layout": 0.5,
|
|
"device": 0.4,
|
|
"routine": 0.5,
|
|
"other": 0.4,
|
|
}
|
|
|
|
_CATEGORY_TO_PRIVACY = {
|
|
"emotional": "sensitive",
|
|
"personal_info": "sensitive",
|
|
}
|
|
|
|
|
|
def migrate_from_json(memories_dir: str | None = None) -> dict:
|
|
"""Migrate all JSON memory files to SQLite.
|
|
Returns {migrated: int, skipped: int, errors: [str]}."""
|
|
db = _get_db()
|
|
mem_dir = Path(memories_dir) if memories_dir else MEMORIES_DIR
|
|
|
|
migrated = 0
|
|
skipped = 0
|
|
errors = []
|
|
|
|
# Migrate personal memories
|
|
personal_dir = mem_dir / "personal"
|
|
if personal_dir.exists():
|
|
for json_file in personal_dir.glob("*.json"):
|
|
try:
|
|
with open(json_file) as f:
|
|
data = json.load(f)
|
|
character_id = data.get("characterId", json_file.stem)
|
|
for mem in data.get("memories", []):
|
|
content = mem.get("content", "").strip()
|
|
if not content:
|
|
skipped += 1
|
|
continue
|
|
category = mem.get("category", "other")
|
|
created_at = mem.get("createdAt", _now_iso())
|
|
|
|
try:
|
|
add_memory(
|
|
character_id=character_id,
|
|
content=content,
|
|
memory_type=_CATEGORY_TO_TYPE.get(category, "semantic"),
|
|
category=category,
|
|
importance=_CATEGORY_TO_IMPORTANCE.get(category, 0.5),
|
|
privacy_level=_CATEGORY_TO_PRIVACY.get(category, "standard"),
|
|
source="migrated_json",
|
|
)
|
|
# Fix created_at to original value
|
|
db.execute(
|
|
"UPDATE memories SET created_at = ? WHERE id = (SELECT id FROM memories ORDER BY rowid DESC LIMIT 1)",
|
|
(created_at,),
|
|
)
|
|
db.commit()
|
|
migrated += 1
|
|
except Exception as e:
|
|
errors.append(f"personal/{json_file.name}: {e}")
|
|
|
|
# Rename to backup
|
|
backup = json_file.with_suffix(".json.bak")
|
|
json_file.rename(backup)
|
|
except Exception as e:
|
|
errors.append(f"personal/{json_file.name}: {e}")
|
|
|
|
# Migrate general memories
|
|
general_file = mem_dir / "general.json"
|
|
if general_file.exists():
|
|
try:
|
|
with open(general_file) as f:
|
|
data = json.load(f)
|
|
for mem in data.get("memories", []):
|
|
content = mem.get("content", "").strip()
|
|
if not content:
|
|
skipped += 1
|
|
continue
|
|
category = mem.get("category", "other")
|
|
created_at = mem.get("createdAt", _now_iso())
|
|
|
|
try:
|
|
add_memory(
|
|
character_id="shared",
|
|
content=content,
|
|
memory_type=_CATEGORY_TO_TYPE.get(category, "semantic"),
|
|
category=category,
|
|
importance=_CATEGORY_TO_IMPORTANCE.get(category, 0.5),
|
|
privacy_level="standard",
|
|
source="migrated_json",
|
|
)
|
|
db.execute(
|
|
"UPDATE memories SET created_at = ? WHERE id = (SELECT id FROM memories ORDER BY rowid DESC LIMIT 1)",
|
|
(created_at,),
|
|
)
|
|
db.commit()
|
|
migrated += 1
|
|
except Exception as e:
|
|
errors.append(f"general.json: {e}")
|
|
|
|
backup = general_file.with_suffix(".json.bak")
|
|
general_file.rename(backup)
|
|
except Exception as e:
|
|
errors.append(f"general.json: {e}")
|
|
|
|
return {"migrated": migrated, "skipped": skipped, "errors": errors}
|