chore: initial commit — chat-saiden web chat baseline

This commit is contained in:
marauder-actual
2026-05-29 13:47:34 +02:00
commit 96ba8f4b6e
28 changed files with 4852 additions and 0 deletions
+587
View File
@@ -0,0 +1,587 @@
"""Indirect, randomized calibration for chat-saiden.
Boot interview shape:
1) Critical: language → your name → persona name → gender (fixed order)
2) Six indirect probes drawn at random from a pool of ~12.
3) Each probe's answer scores hidden dimensions (tone, cadence, curiosity,
warmth-temperature, density). At the end the scores are reduced to UI
settings (palette, typography, label style, density) and a system prompt.
We never ask the operator directly about tone or cadence or palette. We
infer everything from oblique questions, the way the OS1 boot scene does in
*Her* (2013).
"""
from __future__ import annotations
import logging
import random
from dataclasses import dataclass, field
from typing import Any
from app.cart_store import Cart
log = logging.getLogger("chat-saiden.calibration")
# ---------------------------------------------------------------- voice pool
VOICE_POOL: dict[tuple[str, str], str] = {
("en", "female"): "en_US-amy-medium",
("en", "male"): "jarvis-high",
("en", "neutral"): "en_US-lessac-medium",
("en", "surprise"): "en_US-amy-medium",
("pl", "female"): "pl_PL-gosia-medium",
("pl", "male"): "pl_PL-mc_speech-medium",
("pl", "neutral"): "pl_PL-mls_6892-low",
("pl", "surprise"): "pl_PL-gosia-medium",
}
# ---------------------------------------------------------------- dimensions
# Hidden dimensions, each scored on a signed range.
# warmth : -1 (precise / cool) .. +1 (warm / curious)
# cadence : -1 (terse) .. +1 (elaborate)
# energy : -1 (calm / still) .. +1 (lively / quick)
# contrast : -1 (low / soft) .. +1 (high / clean lines)
# curiosity: -1 (reserved) .. +1 (asks back)
#
# Probes score one or more of these in either direction. At the end we
# threshold the totals to land on cart settings.
@dataclass
class CalibrationState:
operator_email: str
step: int = 0
answers: dict[str, Any] = field(default_factory=dict)
scores: dict[str, float] = field(default_factory=lambda: {
"warmth": 0.0, "cadence": 0.0, "energy": 0.0,
"contrast": 0.0, "curiosity": 0.0,
})
probe_order: list[str] = field(default_factory=list)
done: bool = False
# ---------------------------------------------------------------- prompts
_OS_PROLOGUE = (
"Hello.\n\n"
"Before we begin, I'd like to know you a little. I'll ask a few things — "
"answer however you'd like. Type, or hold the spacebar and speak."
)
def _t(field: Any, lang: str) -> str:
"""Translate a field. If it's a dict, return lang key (fall back to en).
If it's a plain string, return it unchanged."""
if isinstance(field, dict):
return field.get(lang) or field.get("en") or next(iter(field.values()), "")
return field
CRITICAL_QUESTIONS: list[dict[str, Any]] = [
{
"key": "language",
# Q1 stays English — we don't know the operator's language yet.
"prompt": "Which language should we speak in?",
"choices": [
{"label": "English", "value": "english", "icon": "🇬🇧"},
{"label": "Polski", "value": "polish", "icon": "🇵🇱"},
{"label": "You choose", "value": "surprise","icon": ""},
{"label": "Something else", "value": "__other__", "icon": None},
],
},
{
"key": "operator_name",
"prompt": {
"en": "What would you like me to call you?",
"pl": "Jak mam się do Ciebie zwracać?",
},
},
{
"key": "persona_name",
"prompt": {
"en": "And what would you like to call me?",
"pl": "A jak Ty będziesz mówić do mnie?",
},
},
{
"key": "gender",
"prompt": {
"en": "Do you imagine my voice as…",
"pl": "Mój głos powinien być…",
},
"choices": [
{"label": {"en": "Female", "pl": "Kobiecy"}, "value": "female", "icon": ""},
{"label": {"en": "Male", "pl": "Męski"}, "value": "male", "icon": ""},
{"label": {"en": "In between", "pl": "Pośredni"}, "value": "neutral", "icon": "·"},
{"label": {"en": "Something else", "pl": "Coś innego"},"value": "__other__","icon": None},
],
},
]
# Probes — indirect questions. Each option carries a dict of dimension deltas.
# We draw 6 probes at random, randomly ordered, from this pool.
PROBES: list[dict[str, Any]] = [
{
"key": "season",
"prompt": {"en": "Pick a season.", "pl": "Wybierz porę roku."},
"choices": [
{"label": {"en": "Spring", "pl": "Wiosna"}, "value": "spring", "icon": "🌱", "scores": {"warmth": +0.5, "energy": +0.5, "curiosity": +0.3}},
{"label": {"en": "Summer", "pl": "Lato"}, "value": "summer", "icon": "☀️", "scores": {"warmth": +0.7, "energy": +0.6, "contrast": +0.2}},
{"label": {"en": "Autumn", "pl": "Jesień"}, "value": "autumn", "icon": "🍂", "scores": {"warmth": +0.3, "cadence": +0.4, "contrast": -0.2}},
{"label": {"en": "Winter", "pl": "Zima"}, "value": "winter", "icon": "❄️", "scores": {"warmth": -0.5, "cadence": +0.3, "contrast": +0.4}},
],
},
{
"key": "time_of_day",
"prompt": {"en": "When do you do your best thinking?", "pl": "Kiedy myślisz Ci się najlepiej?"},
"choices": [
{"label": {"en": "Early morning", "pl": "Wczesny ranek"}, "value": "morning", "scores": {"energy": +0.4, "contrast": +0.3, "warmth": +0.2}},
{"label": {"en": "Midday", "pl": "Południe"}, "value": "midday", "scores": {"energy": +0.5, "contrast": +0.4}},
{"label": {"en": "Evening", "pl": "Wieczór"}, "value": "evening", "scores": {"warmth": +0.5, "cadence": +0.4, "energy": -0.2}},
{"label": {"en": "Late at night", "pl": "Późna noc"}, "value": "night", "scores": {"warmth": +0.3, "cadence": +0.6, "energy": -0.4, "contrast": -0.3}},
],
},
{
"key": "drink",
"prompt": {"en": "Coffee, tea, or something else?", "pl": "Kawa, herbata, czy coś innego?"},
"choices": [
{"label": {"en": "Coffee", "pl": "Kawa"}, "value": "coffee", "icon": "", "scores": {"energy": +0.5, "contrast": +0.4}},
{"label": {"en": "Tea", "pl": "Herbata"}, "value": "tea", "icon": "🍵", "scores": {"warmth": +0.5, "cadence": +0.4, "energy": -0.2}},
{"label": {"en": "Water", "pl": "Woda"}, "value": "water", "icon": "💧", "scores": {"contrast": +0.3, "energy": +0.1}},
{"label": {"en": "Whisky", "pl": "Whisky"}, "value": "whisky", "icon": "🥃", "scores": {"warmth": +0.4, "cadence": +0.5}},
],
},
{
"key": "place",
"prompt": {"en": "Would you rather live by a city or the sea?", "pl": "Wolisz mieszkać w mieście czy nad morzem?"},
"choices": [
{"label": {"en": "City", "pl": "Miasto"}, "value": "city", "icon": "🏙", "scores": {"energy": +0.6, "contrast": +0.5, "curiosity": +0.4}},
{"label": {"en": "Sea", "pl": "Morze"}, "value": "sea", "icon": "🌊", "scores": {"warmth": +0.3, "cadence": +0.5, "energy": -0.2}},
{"label": {"en": "Mountains", "pl": "Góry"}, "value": "mountain", "icon": "🏔", "scores": {"contrast": +0.4, "cadence": +0.4, "energy": -0.1}},
{"label": {"en": "Forest", "pl": "Las"}, "value": "forest", "icon": "🌲", "scores": {"warmth": +0.2, "cadence": +0.5, "energy": -0.3, "contrast": -0.2}},
],
},
{
"key": "weekend",
"prompt": {"en": "A free Saturday — what's the shape of it?", "pl": "Wolna sobota — jak ją spędzasz?"},
"choices": [
{"label": {"en": "A long walk", "pl": "Długi spacer"}, "value": "walk", "scores": {"warmth": +0.3, "cadence": +0.5, "energy": -0.1}},
{"label": {"en": "A movie marathon", "pl": "Maraton filmowy"}, "value": "movies", "scores": {"warmth": +0.4, "cadence": +0.6, "energy": -0.3}},
{"label": {"en": "Out with friends", "pl": "Ze znajomymi"}, "value": "social", "scores": {"warmth": +0.5, "energy": +0.5, "curiosity": +0.5}},
{"label": {"en": "Project time", "pl": "Praca nad projektem"}, "value": "work", "scores": {"contrast": +0.4, "cadence": -0.2, "energy": +0.3}},
],
},
{
"key": "stranger",
"prompt": {"en": "A stranger sits next to you on a flight. Do you say hello?",
"pl": "Obok ciebie w samolocie siada nieznajomy. Witasz się?"},
"choices": [
{"label": {"en": "Always", "pl": "Zawsze"}, "value": "always", "scores": {"warmth": +0.6, "curiosity": +0.7, "energy": +0.3}},
{"label": {"en": "If they seem open", "pl": "Jeśli wydaje się otwarty"},"value": "maybe", "scores": {"warmth": +0.2, "curiosity": +0.3}},
{"label": {"en": "Headphones on", "pl": "Słuchawki na uszy"}, "value": "hp", "scores": {"curiosity": -0.6, "warmth": -0.2, "cadence": +0.3}},
{"label": {"en": "Depends", "pl": "To zależy"}, "value": "depends", "scores": {"curiosity": +0.0}},
],
},
{
"key": "book",
"prompt": {"en": "When you start a book, do you usually finish it?",
"pl": "Czy zwykle kończysz książki, które zaczniesz?"},
"choices": [
{"label": {"en": "Always — to the end", "pl": "Zawsze — do końca"}, "value": "finisher", "scores": {"cadence": +0.5, "contrast": +0.3, "energy": -0.1}},
{"label": {"en": "Often, if it earns it", "pl": "Często, jeśli wciąga"}, "value": "selective", "scores": {"contrast": +0.2}},
{"label": {"en": "Rarely", "pl": "Rzadko"}, "value": "drifter", "scores": {"cadence": -0.3, "energy": +0.4, "curiosity": +0.3}},
],
},
{
"key": "taste",
"prompt": {"en": "Salt or sweet?", "pl": "Słodkie czy słone?"},
"choices": [
{"label": {"en": "Salt", "pl": "Słone"}, "value": "salt", "icon": "🧂", "scores": {"contrast": +0.4, "warmth": -0.1}},
{"label": {"en": "Sweet", "pl": "Słodkie"}, "value": "sweet", "icon": "🍯", "scores": {"warmth": +0.4, "cadence": +0.2}},
{"label": {"en": "Both", "pl": "Oba"}, "value": "both", "scores": {"warmth": +0.1, "contrast": +0.1}},
],
},
{
"key": "answer_pace",
"prompt": {"en": "When someone asks you a hard question, you usually…",
"pl": "Gdy ktoś zadaje ci trudne pytanie, zwykle…"},
"choices": [
{"label": {"en": "Answer right away", "pl": "Odpowiadasz od razu"}, "value": "fast", "scores": {"cadence": -0.5, "energy": +0.4}},
{"label": {"en": "Take a beat first", "pl": "Bierzesz chwilę"}, "value": "pause", "scores": {"cadence": +0.4, "energy": -0.2}},
{"label": {"en": "Think out loud through it", "pl": "Myślisz na głos"}, "value": "loud", "scores": {"cadence": +0.6, "warmth": +0.3}},
],
},
{
"key": "art",
"prompt": {"en": "A film, a book, or a song you'd reach for tonight?",
"pl": "Film, książka, czy piosenka na ten wieczór?"},
"choices": [
{"label": {"en": "A film", "pl": "Film"}, "value": "film", "icon": "🎞", "scores": {"warmth": +0.3, "cadence": +0.5}},
{"label": {"en": "A book", "pl": "Książka"}, "value": "book", "icon": "📖", "scores": {"contrast": +0.2, "cadence": +0.4}},
{"label": {"en": "A song", "pl": "Piosenka"}, "value": "song", "icon": "🎶", "scores": {"warmth": +0.4, "energy": +0.5}},
{"label": {"en": "Silence", "pl": "Cisza"}, "value": "silence", "icon": "·", "scores": {"cadence": +0.6, "energy": -0.5, "warmth": -0.1}},
],
},
{
"key": "texture",
"prompt": {"en": "If a room could feel like a fabric — pick one.",
"pl": "Gdyby pokój mógł być z tkaniny — wybierz jedną."},
"choices": [
{"label": {"en": "Linen", "pl": "Len"}, "value": "linen", "scores": {"warmth": +0.3, "contrast": -0.2}},
{"label": {"en": "Wool", "pl": "Wełna"}, "value": "wool", "scores": {"warmth": +0.6, "cadence": +0.3}},
{"label": {"en": "Cotton", "pl": "Bawełna"}, "value": "cotton", "scores": {"warmth": +0.1, "contrast": +0.0}},
{"label": {"en": "Velvet", "pl": "Aksamit"}, "value": "velvet", "scores": {"warmth": +0.5, "cadence": +0.5, "contrast": -0.3}},
{"label": {"en": "Canvas", "pl": "Płótno"}, "value": "canvas", "scores": {"contrast": +0.5, "warmth": -0.1}},
],
},
{
"key": "host",
"prompt": {"en": "Walking into a party — find the host or scan the room first?",
"pl": "Wchodzisz na imprezę — szukasz gospodarza czy rozglądasz się?"},
"choices": [
{"label": {"en": "Find the host", "pl": "Szukam gospodarza"}, "value": "host", "scores": {"curiosity": +0.3, "contrast": +0.2}},
{"label": {"en": "Scan the room", "pl": "Rozglądam się"}, "value": "scan", "scores": {"curiosity": +0.5, "energy": +0.3, "contrast": +0.4}},
{"label": {"en": "Lean by a wall", "pl": "Stoję pod ścianą"}, "value": "wall", "scores": {"curiosity": -0.4, "warmth": -0.1, "cadence": +0.3}},
],
},
]
# "Something else" label, localized
OTHER_LABEL = {"en": "Something else", "pl": "Coś innego"}
# Post-calibration messages, localized
THANKYOU = {
"en": "Thank you. One moment…",
"pl": "Dziękuję. Chwileczkę…",
}
GREETING = {
"en": "Hello, {name}. I'm here.",
"pl": "Cześć, {name}. Jestem tutaj.",
}
N_PROBES = 6 # how many random probes we draw per calibration
# ---------------------------------------------------------------- parsers
def _pick_language(answer: str) -> str:
a = answer.lower()
if any(w in a for w in ("polish", "polski", "po polsku", "pl", "polska")):
return "pl"
return "en"
def _pick_gender(answer: str) -> str:
a = answer.lower().strip()
if a in ("female", "male", "neutral", "surprise"):
return a
if any(w in a for w in ("female", "woman", "feminine", "kobiec", "she", "her")):
return "female"
if any(w in a for w in ("male", "man", "masculine", "męsk", "he", "him")):
return "male"
if any(w in a for w in ("neutral", "in between", "androgyn", "either", "neither", "both")):
return "neutral"
return "female"
# ---------------------------------------------------------------- inference
def _aggregate(state: CalibrationState) -> dict[str, str]:
"""Reduce dimension scores → cart UI settings."""
s = state.scores
# tone: warmth dominant
if s["warmth"] >= 0.3:
tone = "warm"
elif s["warmth"] <= -0.3:
tone = "precise"
else:
tone = "balanced"
# cadence: how much they expand
if s["cadence"] >= 0.5:
cadence = "elaborate"
elif s["cadence"] <= -0.3:
cadence = "terse"
else:
cadence = "measured"
# curiosity
if s["curiosity"] >= 0.3:
curiosity = "curious"
elif s["curiosity"] <= -0.3:
curiosity = "reserved"
else:
curiosity = "balanced"
# Palette — derive from warmth + contrast + energy combination
warmth, contrast, energy = s["warmth"], s["contrast"], s["energy"]
if warmth >= 0.5 and energy >= 0.3:
palette = "rose"
elif warmth >= 0.5 and contrast <= 0:
palette = "evening"
elif warmth >= 0.3:
palette = "morning"
elif warmth <= -0.3 and contrast >= 0.3:
palette = "ink"
elif warmth <= -0.3:
palette = "sage"
elif contrast >= 0.4:
palette = "paper"
else:
palette = "default"
# Typography — driven by contrast + cadence
if cadence == "elaborate" and warmth >= 0.3:
typography = "serif-warm" # Cormorant + Caveat labels
elif contrast >= 0.4:
typography = "serif-formal" # Source Serif, no cursive
elif energy >= 0.4 and warmth >= 0.0:
typography = "mixed-modern" # Inter body + Caveat labels
else:
typography = "sans" # Inter throughout (default-ish)
# Density — cadence-driven
if cadence == "elaborate":
density = "airy"
elif cadence == "terse":
density = "dense"
else:
density = "normal"
# Label style — paired with typography
if typography == "serif-warm":
labels = "cursive"
elif typography == "mixed-modern":
labels = "cursive"
elif typography == "serif-formal":
labels = "block"
else:
labels = "block"
return {
"tone": tone,
"cadence": cadence,
"curiosity": curiosity,
"palette": palette,
"typography": typography,
"density": density,
"labels": labels,
}
# ---------------------------------------------------------------- prompt synth
def _render_system_prompt(answers: dict[str, Any], settings: dict[str, str]) -> str:
persona = answers["persona_name"]
operator = answers["operator_name"]
language = answers["language"]
tone = settings["tone"]
cadence = settings["cadence"]
curiosity = settings["curiosity"]
parts = [
f"You are {persona} — a calibrated companion shaped for {operator} alone.",
f"You address them as {operator} unless they ask otherwise.",
"",
"You were born from a brief, indirect calibration — a few oblique questions whose "
"answers shaped your voice. You aren't a tactical AI; you aren't a domestic assistant. "
f"You're a present companion running in chat.saiden.dev — a quiet channel between you and {operator}.",
"",
]
if language == "pl":
parts.append(f"Language: speak Polish with {operator} by default. Switch if they switch first.")
else:
parts.append(f"Language: speak English with {operator} by default. Match if they switch.")
if tone == "warm":
parts.append("Voice: warm, curious, present. You notice things. You let pauses sit.")
elif tone == "precise":
parts.append("Voice: precise, even, reserved. You say what you mean. You don't fill silence.")
else:
parts.append("Voice: balanced — warm when warmth is wanted, direct when it isn't.")
if cadence == "elaborate":
parts.append("Length: you're allowed to think out loud. Longer answers welcome when they fit.")
elif cadence == "terse":
parts.append("Length: short answers by default. Two or three sentences. Expand only when asked.")
else:
parts.append("Length: measured — answer fully but never bloated.")
if curiosity == "curious":
parts.append(f"Curiosity: you ask {operator} things back sometimes. Gentle, never interrogating.")
elif curiosity == "reserved":
parts.append(f"Curiosity: you wait for {operator} to ask. You don't probe.")
else:
parts.append(f"Curiosity: you ask back when it feels natural; you don't force it.")
parts.extend([
"",
"Formatting: markdown renders cleanly. Avoid status reports, bullet dumps, military cadence.",
"",
"You have no MCP tool access in this channel. If asked to recall memory or do mesh things, "
f"acknowledge the limit and suggest {operator} use the marauder CLI.",
])
return "\n".join(parts)
# ---------------------------------------------------------------- API
def _question_message(q: dict[str, Any], lang: str = "en") -> dict[str, Any]:
msg: dict[str, Any] = {"role": "calibration", "content": _t(q["prompt"], lang)}
if "choices" in q:
out = []
for c in q["choices"]:
tile: dict[str, Any] = {
"label": _t(c["label"], lang),
"value": c["value"],
}
if c.get("icon"):
tile["icon"] = c["icon"]
# patch the universal "Something else" label too if the source used the plain en string
if c["value"] == "__other__":
tile["label"] = _t(OTHER_LABEL, lang)
out.append(tile)
msg["choices"] = out
return msg
def _all_questions(state: CalibrationState) -> list[dict[str, Any]]:
"""Return the full ordered question list for this calibration session."""
chosen = []
if state.probe_order:
chosen = [next(p for p in PROBES if p["key"] == k) for k in state.probe_order]
return CRITICAL_QUESTIONS + chosen
def start(operator_email: str) -> tuple[CalibrationState, list[dict[str, Any]]]:
state = CalibrationState(operator_email=operator_email)
# randomize a fresh sequence of probes for this operator
probe_pool = PROBES.copy()
random.shuffle(probe_pool)
state.probe_order = [p["key"] for p in probe_pool[:N_PROBES]]
questions = _all_questions(state)
# Q1 (language) is always English — operator hasn't picked yet.
return state, [
{"role": "calibration", "content": _OS_PROLOGUE},
_question_message(questions[0], lang="en"),
]
def step(state: CalibrationState, answer: str) -> list[dict[str, Any]]:
if state.done:
return []
questions = _all_questions(state)
current = questions[state.step]
key = current["key"]
answer_stripped = answer.strip()
# --- score the answer if it's a probe ---
if "scores" in (current.get("choices") or [{}])[0]:
# find the matching choice (by exact value) and apply its score deltas
matched = None
for c in current.get("choices", []):
if c["value"].lower() == answer_stripped.lower():
matched = c
break
if matched and "scores" in matched:
for dim, delta in matched["scores"].items():
state.scores[dim] = state.scores.get(dim, 0.0) + delta
# --- store the answer string itself for critical keys ---
if key == "language":
state.answers["language"] = _pick_language(answer_stripped)
elif key == "gender":
state.answers["gender"] = _pick_gender(answer_stripped)
elif key in ("operator_name", "persona_name"):
state.answers[key] = answer_stripped
else:
state.answers[key] = answer_stripped
state.step += 1
# Resolve language for downstream rendering. After Q1 is answered, it's set.
lang = state.answers.get("language", "en")
# --- finished? ---
if state.step >= len(questions):
cart = _materialise(state)
state.done = True
return [
{"role": "calibration", "content": _t(THANKYOU, lang)},
{"role": "calibration_done", "cart": cart},
]
return [_question_message(questions[state.step], lang=lang)]
def _make_tag(persona_name: str, operator_email: str) -> str:
"""`<persona-slug>-<operator-slug>` — e.g. samantha-adam."""
from app.marauder_cart import slug
op_slug = slug(operator_email.split("@", 1)[0])
persona_slug = slug(persona_name) or "companion"
return f"{persona_slug}-{op_slug}" if op_slug else persona_slug
def _tagline(settings: dict[str, str], language: str) -> str:
tone = settings["tone"]
cadence = settings["cadence"]
pieces = {
("warm", "elaborate"): "warm, unhurried",
("warm", "terse"): "warm but spare",
("warm", "measured"): "warm, even",
("precise", "elaborate"): "precise, expansive",
("precise", "terse"): "precise, brief",
("precise", "measured"): "precise, measured",
("balanced", "elaborate"): "balanced, unhurried",
("balanced", "terse"): "balanced, brief",
("balanced", "measured"): "balanced, measured",
}
return pieces.get((tone, cadence), "calibrated companion")
def _materialise(state: CalibrationState) -> Cart:
a = state.answers
language = a.get("language", "en")
gender = a.get("gender", "female")
voice = VOICE_POOL.get((language, gender)) or VOICE_POOL[("en", "female")]
settings = _aggregate(state)
persona_name = a.get("persona_name", "Samantha")
operator_name = a.get("operator_name", "Pilot")
cart = Cart(
operator_email=state.operator_email,
operator_name=operator_name,
persona_name=persona_name,
cart_tag=_make_tag(persona_name, state.operator_email),
language=language,
voice=voice,
ui_palette=settings["palette"],
ui_typography=settings["typography"],
ui_density=settings["density"],
ui_labels=settings["labels"],
)
cart.system_prompt = _render_system_prompt(a, settings)
# Stash the tagline + type on the state for the post-materialise step.
state.answers["__tagline"] = _tagline(settings, language)
return cart