854ac6ddaa
📁 Files changed: 2 📝 Lines changed: 298 • models-db.md • models.db
9.0 KiB
9.0 KiB
Models Database Implementation
SQLite database (models.db) for local model metadata storage and CivitAI model information cache.
Dual Purpose
- Metadata Storage — Track local safetensor files with their SHA256 hashes and link them to CivitAI model info
- Information Source — Cache CivitAI API responses for offline queries, search, and model discovery
Schema Overview
Local Files Tracking
-- Your local safetensor files
local_files (
id INTEGER PRIMARY KEY,
file_path TEXT NOT NULL UNIQUE, -- Absolute path to file
sha256 TEXT NOT NULL, -- Full SHA256 hash
header_size INTEGER, -- Safetensor header size in bytes
tensor_count INTEGER, -- Number of tensors in file
civitai_model_id INTEGER, -- Links to models.civitai_id
civitai_version_id INTEGER, -- Links to model_versions.civitai_id
created_at TEXT,
updated_at TEXT
)
-- Key-value metadata extracted from safetensor headers
safetensor_metadata (
id INTEGER PRIMARY KEY,
local_file_id INTEGER NOT NULL, -- FK to local_files.id
key TEXT NOT NULL,
value TEXT,
UNIQUE(local_file_id, key)
)
CivitAI Model Cache
-- Model creators
creators (id, username, image_url)
-- Models from CivitAI
models (
id INTEGER PRIMARY KEY,
civitai_id INTEGER UNIQUE NOT NULL, -- CivitAI model ID
name TEXT NOT NULL,
description TEXT, -- HTML description
type TEXT NOT NULL, -- Checkpoint, LORA, etc.
nsfw INTEGER,
creator_id INTEGER, -- FK to creators
download_count INTEGER,
thumbs_up_count INTEGER,
...
)
-- Model versions (each model has multiple versions)
model_versions (
id INTEGER PRIMARY KEY,
civitai_id INTEGER UNIQUE NOT NULL, -- CivitAI version ID
model_id INTEGER NOT NULL, -- FK to models.id
name TEXT NOT NULL,
base_model TEXT, -- "SD 1.5", "SDXL 1.0", "Pony", etc.
download_url TEXT,
version_index INTEGER, -- 0 = latest
...
)
-- Trigger words for LoRAs
trained_words (version_id, word, position)
-- Downloadable files for each version
version_files (
civitai_id INTEGER UNIQUE,
version_id INTEGER, -- FK to model_versions
name TEXT,
size_kb REAL,
format TEXT, -- safetensors, ckpt, etc.
fp TEXT, -- fp16, fp32, bf16
is_primary INTEGER,
download_url TEXT
)
-- File hashes (SHA256, AutoV1, AutoV2, etc.)
file_hashes (file_id, hash_type, hash_value)
Tags and Images
tags (id, name)
model_tags (model_id, tag_id)
-- Preview images with generation params
version_images (version_id, url, width, height, nsfw_level, ...)
image_generation_params (image_id, key, value) -- prompt, sampler, cfg, etc.
image_resources (image_id, name, type, hash, weight) -- LoRAs used in image
Views
-- Models with their latest version info
v_models_with_latest:
id, civitai_id, name, type, nsfw, creator, latest_version, base_model, download_count, thumbs_up_count
-- Local files with linked CivitAI info
v_local_files_full:
file_path, sha256, model_name, model_type, version_name, base_model, creator
Implementation Strategy
1. Scan Command (tsr scan)
Scan local model directories and populate local_files:
def scan_models(directory: Path, db: Connection) -> None:
"""Scan directory for safetensor files and add to database."""
for path in directory.rglob("*.safetensors"):
sha256 = compute_sha256(path)
header = read_safetensor_header(path)
# Insert or update local_files
db.execute("""
INSERT INTO local_files (file_path, sha256, header_size, tensor_count)
VALUES (?, ?, ?, ?)
ON CONFLICT(file_path) DO UPDATE SET
sha256 = excluded.sha256,
updated_at = datetime('now')
""", (str(path), sha256, header['size'], header['tensor_count']))
# Store metadata
for key, value in header['metadata'].items():
db.execute("""
INSERT INTO safetensor_metadata (local_file_id, key, value)
VALUES (?, ?, ?)
ON CONFLICT DO UPDATE SET value = excluded.value
""", (file_id, key, json.dumps(value)))
2. Link Command (tsr link)
Match local files to CivitAI by hash lookup:
def link_to_civitai(db: Connection, api_key: str | None) -> None:
"""Link local files to CivitAI models using hash matching."""
unlinked = db.execute("""
SELECT id, sha256 FROM local_files
WHERE civitai_model_id IS NULL
""").fetchall()
for file_id, sha256 in unlinked:
# Check local hash cache first
version = db.execute("""
SELECT mv.civitai_id, mv.model_id
FROM file_hashes fh
JOIN version_files vf ON fh.file_id = vf.id
JOIN model_versions mv ON vf.version_id = mv.id
WHERE fh.hash_value = ? AND fh.hash_type = 'SHA256'
""", (sha256,)).fetchone()
if not version:
# Fall back to API lookup
data = fetch_civitai_by_hash(sha256, api_key)
if data:
store_model_version(db, data)
version = (data['id'], data['modelId'])
if version:
db.execute("""
UPDATE local_files
SET civitai_version_id = ?, civitai_model_id = ?
WHERE id = ?
""", (version[0], version[1], file_id))
3. Cache Command (tsr cache)
Fetch and store full model details from CivitAI:
def cache_model(model_id: int, db: Connection, api_key: str | None) -> None:
"""Fetch and cache complete model data from CivitAI."""
data = fetch_civitai_model(model_id, api_key)
if not data:
return
# Upsert creator
creator = data.get('creator', {})
if creator:
db.execute("""
INSERT INTO creators (username, image_url) VALUES (?, ?)
ON CONFLICT(username) DO UPDATE SET image_url = excluded.image_url
""", (creator['username'], creator.get('image')))
# Upsert model
db.execute("""
INSERT INTO models (civitai_id, name, description, type, nsfw, ...)
VALUES (?, ?, ?, ?, ?, ...)
ON CONFLICT(civitai_id) DO UPDATE SET ...
""", ...)
# Process versions, files, hashes, images, trained words
for idx, version in enumerate(data.get('modelVersions', [])):
store_version(db, model_id, version, version_index=idx)
4. Query Commands
List local models with CivitAI info:
def list_local_models(db: Connection) -> list[dict]:
"""List all local files with their linked CivitAI metadata."""
return db.execute("""
SELECT * FROM v_local_files_full ORDER BY model_name
""").fetchall()
Search cached models:
def search_cached(query: str, model_type: str | None, db: Connection) -> list[dict]:
"""Search cached models without hitting the API."""
sql = """
SELECT m.*, mv.base_model, mv.download_url
FROM models m
JOIN model_versions mv ON mv.model_id = m.id AND mv.version_index = 0
WHERE m.name LIKE ?
"""
params = [f'%{query}%']
if model_type:
sql += " AND m.type = ?"
params.append(model_type)
return db.execute(sql, params).fetchall()
Find trigger words for a local LoRA:
def get_trigger_words(file_path: str, db: Connection) -> list[str]:
"""Get trigger words for a local LoRA file."""
return db.execute("""
SELECT tw.word
FROM trained_words tw
JOIN model_versions mv ON tw.version_id = mv.id
JOIN local_files lf ON lf.civitai_version_id = mv.civitai_id
WHERE lf.file_path = ?
ORDER BY tw.position
""", (file_path,)).fetchall()
Database Location
Following XDG conventions, the database should live at:
from tensors.config import DATA_DIR
DB_PATH = DATA_DIR / "models.db" # ~/.local/share/tensors/models.db
CLI Integration
# Scan models directory
tsr db scan /models/
# Link local files to CivitAI (uses API for unknown hashes)
tsr db link
# Cache a specific model's full data
tsr db cache 999258
# List local models with CivitAI info
tsr db list
# Search cached models (offline)
tsr db search "bimbo" --type lora
# Show trigger words for a LoRA
tsr db triggers /models/loras/70s_VPMS.safetensors
# Show generation params from example images
tsr db prompts 999258
Benefits
- Offline First — Query cached data without API calls
- Hash Deduplication — Detect duplicate files by SHA256
- Metadata Enrichment — Combine safetensor header info with CivitAI metadata
- Trigger Word Lookup — Find correct prompts for LoRAs
- Example Prompts — Extract working prompts from preview images
- Version Tracking — Know which version you have vs. latest available