dynamic checkpoint presets with orientation, correct VAEs, and auto-resolved sampler/scheduler/cfg/steps

2026-04-20 22:08:01 +02:00
parent b074a7dd50
commit 7162db1ab9
7 changed files with 237 additions and 75 deletions
@@ -0,0 +1,11 @@
+{
+  "prompt": "a cat on a windowsill",
+  "width": 1024,
+  "height": 1024,
+  "steps": 20,
+  "cfg": 7.0,
+  "seed": -1,
+  "sampler": "euler",
+  "scheduler": "normal",
+  "lora_strength": 0.8
+}
@@ -8,6 +8,7 @@ from importlib.metadata import version
 from pathlib import Path
 from typing import Annotated, Any

+import click
 import typer
 from rich.console import Console
 from rich.table import Table
@@ -769,37 +770,116 @@ def serve(
 # =============================================================================


-@app.command()
+@app.command(context_settings={"allow_extra_args": False})
 def generate(  # noqa: PLR0915
-    prompt: Annotated[str, typer.Argument(help="Positive prompt text")],
+    ctx: typer.Context,
+    prompt: Annotated[str | None, typer.Argument(help="Positive prompt text", show_default=False)] = None,
    model: Annotated[str | None, typer.Option("-m", "--model", help="Checkpoint model name")] = None,
-    width: Annotated[int, typer.Option("-W", "--width", help="Image width")] = 1024,
-    height: Annotated[int, typer.Option("-H", "--height", help="Image height")] = 1024,
-    steps: Annotated[int, typer.Option("--steps", help="Sampling steps")] = 20,
-    cfg: Annotated[float, typer.Option("--cfg", help="CFG scale")] = 7.0,
+    width: Annotated[int | None, typer.Option("-W", "--width", help="Image width (auto from checkpoint)")] = None,
+    height: Annotated[int | None, typer.Option("-H", "--height", help="Image height (auto from checkpoint)")] = None,
+    steps: Annotated[int | None, typer.Option("--steps", help="Sampling steps (auto from checkpoint)")] = None,
+    cfg: Annotated[float | None, typer.Option("--cfg", help="CFG scale (auto from checkpoint)")] = None,
    seed: Annotated[int, typer.Option("--seed", "-s", help="Random seed (-1 for random)")] = -1,
-    sampler: Annotated[str, typer.Option("--sampler", help="Sampler name")] = "euler",
-    scheduler: Annotated[str, typer.Option("--scheduler", help="Scheduler name")] = "normal",
-    vae: Annotated[str | None, typer.Option("--vae", help="VAE model name")] = None,
+    sampler: Annotated[str | None, typer.Option("--sampler", help="Sampler name (auto from checkpoint)")] = None,
+    scheduler: Annotated[str | None, typer.Option("--scheduler", help="Scheduler name (auto from checkpoint)")] = None,
+    vae: Annotated[str | None, typer.Option("--vae", help="VAE model name (auto from checkpoint)")] = None,
+    orientation: Annotated[str, typer.Option("-O", "--orientation", help="Resolution: square, portrait, landscape")] = "square",
    lora: Annotated[str | None, typer.Option("-l", "--lora", help="LoRA model name")] = None,
    lora_strength: Annotated[float, typer.Option("--lora-strength", help="LoRA strength")] = 0.8,
    negative: Annotated[str, typer.Option("-n", "--negative-prompt", help="Negative prompt")] = "",
    output: Annotated[Path | None, typer.Option("-o", "--output", help="Save path (default: current dir)")] = None,
    remote: Annotated[str | None, typer.Option("-r", "--remote", help="Remote server name or URL")] = None,
    json_output: Annotated[bool, typer.Option("--json", "-j", help="Output as JSON")] = False,
+    json_input: Annotated[
+        str | None, typer.Option("--input", "-I", help="JSON params (keys match CLI options)")
+    ] = None,
 ) -> None:
    """Generate an image using text-to-image.

    Calls ComfyUI directly when local, or the remote tensors API when --remote is given.
+    Accepts --input with a JSON object whose keys match CLI option names. CLI flags override JSON values.

    Examples:
        tsr generate "a cat on a windowsill"
        tsr generate "portrait photo" -m "flux1-dev-fp8.safetensors" --steps 30
        tsr generate "cyberpunk city" -o output.png
        tsr generate "landscape" --remote junkpile
+        tsr generate --input '{"prompt": "a mech", "model": "flux1-dev-fp8.safetensors", "steps": 30}'
    """
    import random as rng  # noqa: PLC0415

+    # ---- JSON input merging ----
+    if json_input is not None:
+        # Support file paths and raw JSON strings
+        json_path = Path(json_input)
+        if json_path.is_file():
+            json_text = json_path.read_text()
+        elif json_input.lstrip().startswith("{"):
+            json_text = json_input
+        else:
+            console.print(f"[red]Not a JSON string or file:[/red] {json_input}")
+            raise typer.Exit(1)
+
+        try:
+            ji = json.loads(json_text)
+        except json.JSONDecodeError as e:
+            console.print(f"[red]Invalid JSON input:[/red] {e}")
+            raise typer.Exit(1) from e
+
+        if not isinstance(ji, dict):
+            console.print("[red]JSON input must be an object[/red]")
+            raise typer.Exit(1)
+
+        # Map JSON keys to parameter names (handle aliases)
+        key_map = {"negative_prompt": "negative", "lora_name": "lora"}
+        mapped: dict[str, Any] = {}
+        for k, v in ji.items():
+            mapped[key_map.get(k, k)] = v
+
+        # Determine which CLI params the user explicitly set
+        click_ctx = ctx._context if hasattr(ctx, "_context") else ctx
+        explicit = {
+            p.name
+            for p in click_ctx.command.params
+            if click_ctx.get_parameter_source(p.name) == click.core.ParameterSource.COMMANDLINE
+        } if hasattr(click_ctx, "get_parameter_source") else set()
+
+        # Apply JSON values for anything not explicitly set on CLI
+        if "prompt" in mapped and ("prompt" not in explicit and prompt is None):
+            prompt = mapped["prompt"]
+        if "model" in mapped and "model" not in explicit:
+            model = mapped["model"]
+        if "width" in mapped and "width" not in explicit:
+            width = int(mapped["width"])
+        if "height" in mapped and "height" not in explicit:
+            height = int(mapped["height"])
+        if "steps" in mapped and "steps" not in explicit:
+            steps = int(mapped["steps"])
+        if "cfg" in mapped and "cfg" not in explicit:
+            cfg = float(mapped["cfg"])
+        if "seed" in mapped and "seed" not in explicit:
+            seed = int(mapped["seed"])
+        if "sampler" in mapped and "sampler" not in explicit:
+            sampler = mapped["sampler"]
+        if "scheduler" in mapped and "scheduler" not in explicit:
+            scheduler = mapped["scheduler"]
+        if "vae" in mapped and "vae" not in explicit:
+            vae = mapped["vae"]
+        if "lora" in mapped and "lora" not in explicit:
+            lora = mapped["lora"]
+        if "lora_strength" in mapped and "lora_strength" not in explicit:
+            lora_strength = float(mapped["lora_strength"])
+        if "negative" in mapped and "negative" not in explicit:
+            negative = mapped["negative"]
+        if "output" in mapped and "output" not in explicit:
+            output = Path(mapped["output"])
+        if "remote" in mapped and "remote" not in explicit:
+            remote = mapped["remote"]
+
+    if not prompt:
+        console.print("[red]Prompt is required (as argument or in --input JSON)[/red]")
+        raise typer.Exit(1)
+
    from tensors.config import resolve_remote as do_resolve_remote  # noqa: PLC0415

    # Resolve remote (explicit flag, or default from config)
@@ -883,6 +963,7 @@ def generate(  # noqa: PLR0915
            lora_name=lora,
            lora_strength=lora_strength,
            vae=vae,
+            orientation=orientation,
        )

        if not result_local:
@@ -1576,13 +1657,14 @@ def comfy_generate(  # noqa: PLR0915
    url: Annotated[str | None, typer.Option("--url", "-u", help="ComfyUI server URL")] = None,
    negative: Annotated[str, typer.Option("-n", "--negative", help="Negative prompt")] = "",
    model: Annotated[str | None, typer.Option("-m", "--model", help="Checkpoint model name")] = None,
-    width: Annotated[int, typer.Option("-W", "--width", help="Image width")] = 1024,
-    height: Annotated[int, typer.Option("-H", "--height", help="Image height")] = 1024,
-    steps: Annotated[int, typer.Option("--steps", help="Sampling steps")] = 20,
-    cfg: Annotated[float, typer.Option("--cfg", help="CFG scale")] = 7.0,
+    width: Annotated[int | None, typer.Option("-W", "--width", help="Image width (auto from checkpoint)")] = None,
+    height: Annotated[int | None, typer.Option("-H", "--height", help="Image height (auto from checkpoint)")] = None,
+    steps: Annotated[int | None, typer.Option("--steps", help="Sampling steps (auto from checkpoint)")] = None,
+    cfg: Annotated[float | None, typer.Option("--cfg", help="CFG scale (auto from checkpoint)")] = None,
    seed: Annotated[int, typer.Option("--seed", "-s", help="Random seed (-1 for random)")] = -1,
-    sampler: Annotated[str, typer.Option("--sampler", help="Sampler name")] = "euler",
-    scheduler: Annotated[str, typer.Option("--scheduler", help="Scheduler name")] = "normal",
+    sampler: Annotated[str | None, typer.Option("--sampler", help="Sampler name (auto from checkpoint)")] = None,
+    scheduler: Annotated[str | None, typer.Option("--scheduler", help="Scheduler name (auto from checkpoint)")] = None,
+    orientation: Annotated[str, typer.Option("-O", "--orientation", help="Resolution: square, portrait, landscape")] = "square",
    output: Annotated[Path | None, typer.Option("-o", "--output", help="Output file path")] = None,
    count: Annotated[int, typer.Option("-c", "--count", help="Number of images to generate")] = 1,
    lora: Annotated[str | None, typer.Option("-l", "--lora", help="LoRA model name")] = None,
@@ -1686,6 +1768,7 @@ def comfy_generate(  # noqa: PLR0915
        lora_name=lora,
        lora_strength=lora_strength,
        batch_size=count,
+        orientation=orientation,
    )

    if not result:
@@ -709,47 +709,65 @@ DEFAULT_WORKFLOW_TEMPLATE: dict[str, Any] = {
    },
 }

-# Default VAE for SDXL/Illustrious/Pony models
-DEFAULT_VAE = "sdxl_vae.safetensors"
-
-
 def _build_workflow(
    prompt: str,
    negative_prompt: str = "",
    model: str | None = None,
-    width: int = 1024,
-    height: int = 1024,
-    steps: int = 20,
-    cfg: float = 7.0,
+    width: int | None = None,
+    height: int | None = None,
+    steps: int | None = None,
+    cfg: float | None = None,
    seed: int = -1,
-    sampler: str = "euler",
-    scheduler: str = "normal",
+    sampler: str | None = None,
+    scheduler: str | None = None,
    lora_name: str | None = None,
    lora_strength: float = 1.0,
    batch_size: int = 1,
    vae: str | None = None,
+    orientation: str = "square",
 ) -> dict[str, Any]:
    """Build a text-to-image workflow from parameters.

+    Parameters set to None are auto-resolved from the checkpoint's family preset
+    via config.get_model_generation_defaults(). User-provided values always win.
+
    Args:
        prompt: Positive prompt text
        negative_prompt: Negative prompt text
        model: Checkpoint filename (if None, uses first available)
-        width: Image width
-        height: Image height
-        steps: Number of sampling steps
-        cfg: CFG scale
+        width: Image width (None = use preset for orientation)
+        height: Image height (None = use preset for orientation)
+        steps: Number of sampling steps (None = use preset)
+        cfg: CFG scale (None = use preset)
        seed: Random seed (-1 for random)
-        sampler: Sampler name
-        scheduler: Scheduler name
+        sampler: Sampler name (None = use preset)
+        scheduler: Scheduler name (None = use preset)
        lora_name: LoRA model filename (optional)
        lora_strength: LoRA strength (default 1.0)
        batch_size: Number of images to generate in one workflow (default 1)
-        vae: VAE filename (defaults to sdxl_vae.safetensors)
+        vae: VAE filename (None = use preset)
+        orientation: Resolution orientation: "square", "portrait", or "landscape"

    Returns:
        ComfyUI workflow dict
    """
+    from tensors.config import get_model_generation_defaults, resolve_orientation  # noqa: PLC0415
+
+    # Get preset defaults for this checkpoint family
+    defaults = get_model_generation_defaults(model or "") if model else get_model_generation_defaults("")
+
+    # Resolve orientation-based resolution
+    res_w, res_h = resolve_orientation(defaults.get("family"), orientation)
+
+    # Merge: user overrides > preset defaults
+    resolved_sampler = sampler if sampler is not None else defaults.get("sampler", "euler")
+    resolved_scheduler = scheduler if scheduler is not None else defaults.get("scheduler", "normal")
+    resolved_cfg = cfg if cfg is not None else defaults.get("cfg", 7.0)
+    resolved_steps = steps if steps is not None else defaults.get("steps", 20)
+    resolved_width = width if width is not None else res_w
+    resolved_height = height if height is not None else res_h
+    resolved_vae = vae if vae is not None else defaults.get("vae")
+
    workflow = copy.deepcopy(DEFAULT_WORKFLOW_TEMPLATE)

    # Set seed (random if -1)
@@ -757,30 +775,30 @@ def _build_workflow(

    # Update KSampler settings
    workflow["3"]["inputs"]["seed"] = actual_seed
-    workflow["3"]["inputs"]["steps"] = steps
-    workflow["3"]["inputs"]["cfg"] = cfg
-    workflow["3"]["inputs"]["sampler_name"] = sampler
-    workflow["3"]["inputs"]["scheduler"] = scheduler
+    workflow["3"]["inputs"]["steps"] = resolved_steps
+    workflow["3"]["inputs"]["cfg"] = resolved_cfg
+    workflow["3"]["inputs"]["sampler_name"] = resolved_sampler
+    workflow["3"]["inputs"]["scheduler"] = resolved_scheduler

    # Set model
    if model:
        workflow["4"]["inputs"]["ckpt_name"] = model

    # Set dimensions and batch size
-    workflow["5"]["inputs"]["width"] = width
-    workflow["5"]["inputs"]["height"] = height
+    workflow["5"]["inputs"]["width"] = resolved_width
+    workflow["5"]["inputs"]["height"] = resolved_height
    workflow["5"]["inputs"]["batch_size"] = batch_size

    # Set prompts
    workflow["6"]["inputs"]["text"] = prompt
    workflow["7"]["inputs"]["text"] = negative_prompt

-    # Set VAE - use external VAE if specified, otherwise use checkpoint's built-in VAE
-    if vae:
+    # Set VAE - use preset VAE if available, otherwise use checkpoint's built-in
+    if resolved_vae:
        # Use external VAE loader (node 11)
-        workflow["11"]["inputs"]["vae_name"] = vae
+        workflow["11"]["inputs"]["vae_name"] = resolved_vae
    else:
-        # Use VAE from checkpoint (node 4, output index 2) - works for SD 1.5 models
+        # Use VAE from checkpoint (node 4, output index 2) - fallback for unknown models
        # Remove VAELoader node and connect VAEDecode directly to checkpoint
        del workflow["11"]
        workflow["8"]["inputs"]["vae"] = ["4", 2]
@@ -812,13 +830,13 @@ def generate_image(
    url: str | None = None,
    negative_prompt: str = "",
    model: str | None = None,
-    width: int = 1024,
-    height: int = 1024,
-    steps: int = 20,
-    cfg: float = 7.0,
+    width: int | None = None,
+    height: int | None = None,
+    steps: int | None = None,
+    cfg: float | None = None,
    seed: int = -1,
-    sampler: str = "euler",
-    scheduler: str = "normal",
+    sampler: str | None = None,
+    scheduler: str | None = None,
    console: Console | None = None,
    on_progress: ProgressCallback | None = None,
    timeout: float = 600.0,
@@ -826,28 +844,33 @@ def generate_image(
    lora_strength: float = 1.0,
    batch_size: int = 1,
    vae: str | None = None,
+    orientation: str = "square",
 ) -> GenerationResult | None:
    """Generate an image using a simple text-to-image workflow.

+    Parameters set to None are auto-resolved from the checkpoint's family preset.
+    User-provided values always override preset defaults.
+
    Args:
        prompt: Positive prompt text
        url: ComfyUI base URL
        negative_prompt: Negative prompt text
        model: Checkpoint filename (if None, must be pre-loaded in ComfyUI)
-        width: Image width
-        height: Image height
-        steps: Number of sampling steps
-        cfg: CFG scale
+        width: Image width (None = use preset for orientation)
+        height: Image height (None = use preset for orientation)
+        steps: Number of sampling steps (None = use preset)
+        cfg: CFG scale (None = use preset)
        seed: Random seed (-1 for random)
-        sampler: Sampler name (euler, dpm_2, etc.)
-        scheduler: Scheduler name (normal, karras, etc.)
+        sampler: Sampler name (None = use preset)
+        scheduler: Scheduler name (None = use preset)
        console: Rich console for progress output
        on_progress: Optional callback for progress updates
        timeout: Maximum wait time in seconds
        lora_name: LoRA model filename (optional)
        lora_strength: LoRA strength (default 1.0)
        batch_size: Number of images to generate in one workflow (default 1)
-        vae: VAE filename (defaults to sdxl_vae.safetensors)
+        vae: VAE filename (None = use preset)
+        orientation: Resolution orientation: "square", "portrait", or "landscape"

    Returns:
        GenerationResult with image paths, or None if generation failed
@@ -882,6 +905,7 @@ def generate_image(
        lora_strength=lora_strength,
        batch_size=batch_size,
        vae=vae,
+        orientation=orientation,
    )

    # Run workflow
@@ -512,29 +512,35 @@ MODEL_FAMILY_DEFAULTS: dict[str, dict[str, Any]] = {
        "negative_prompt": "score_5, score_4, ugly, deformed, blurry, bad anatomy, bad hands, missing fingers",
        "width": 1024,
        "height": 1024,
-        "cfg": 7.0,
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
+        "cfg": 6.5,
        "clip_skip": 2,
        "sampler": "euler_ancestral",
        "scheduler": "normal",
        "steps": 25,
-        "vae": "sdxl_vae.safetensors",
+        "vae": "ponyStandardVAE_v10.safetensors",
    },
    "illustrious": {
        "quality_prefix": "masterpiece, best quality, highres",
        "negative_prompt": "worst quality, bad quality, low quality, lowres, bad anatomy, bad hands, jpeg artifacts, watermark",
        "width": 1024,
        "height": 1024,
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
        "cfg": 6.0,
        "sampler": "euler_ancestral",
        "scheduler": "normal",
        "steps": 25,
-        "vae": "sdxl_vae.safetensors",
+        "vae": "illustriousXLV20_v10.safetensors",
    },
    "sdxl": {
        "quality_prefix": "",
        "negative_prompt": "ugly, deformed, bad anatomy, bad hands, extra fingers, missing fingers, blurry, watermark",
        "width": 1024,
        "height": 1024,
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
        "cfg": 7.0,
        "sampler": "dpmpp_2m",
        "scheduler": "karras",
@@ -546,21 +552,25 @@ MODEL_FAMILY_DEFAULTS: dict[str, dict[str, Any]] = {
        "negative_prompt": "ugly, deformed, bad anatomy, bad hands, extra fingers, missing fingers, blurry, watermark",
        "width": 1024,
        "height": 1024,
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
        "cfg": 2.0,
        "sampler": "euler",
        "scheduler": "sgm_uniform",
-        "steps": 8,  # Lightning models use fewer steps
+        "steps": 8,
        "vae": "sdxl_vae.safetensors",
    },
    "sdxl_turbo": {
        "quality_prefix": "",
-        "negative_prompt": "",  # Turbo models work best without negative prompts
+        "negative_prompt": "",
        "width": 1024,
        "height": 1024,
-        "cfg": 1.0,  # Very low CFG for turbo
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
+        "cfg": 1.0,
        "sampler": "euler_ancestral",
        "scheduler": "normal",
-        "steps": 4,  # Turbo models use very few steps
+        "steps": 4,
        "vae": "sdxl_vae.safetensors",
    },
    "sd15": {
@@ -571,28 +581,34 @@ MODEL_FAMILY_DEFAULTS: dict[str, dict[str, Any]] = {
        ),
        "width": 512,
        "height": 512,
+        "portrait": (512, 768),
+        "landscape": (768, 512),
        "cfg": 7.0,
-        "sampler": "dpmpp_2m",
-        "scheduler": "karras",
-        "steps": 20,
-        "vae": None,  # Use checkpoint's built-in VAE
+        "sampler": "euler_ancestral",
+        "scheduler": "normal",
+        "steps": 25,
+        "vae": "vae-ft-mse-840000-ema-pruned.safetensors",
    },
    "sd15_lcm": {
        "quality_prefix": "masterpiece, best quality",
-        "negative_prompt": "",  # LCM works best with minimal negative
+        "negative_prompt": "",
        "width": 512,
        "height": 512,
+        "portrait": (512, 768),
+        "landscape": (768, 512),
        "cfg": 1.5,
        "sampler": "lcm",
        "scheduler": "normal",
        "steps": 6,
-        "vae": None,  # Use checkpoint's built-in VAE
+        "vae": "vae-ft-mse-840000-ema-pruned.safetensors",
    },
    "flux": {
        "quality_prefix": "",
-        "negative_prompt": "",  # Flux doesn't use negative prompts effectively
+        "negative_prompt": "",
        "width": 1024,
        "height": 1024,
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
        "cfg": 3.5,
        "sampler": "euler",
        "scheduler": "simple",
@@ -604,21 +620,25 @@ MODEL_FAMILY_DEFAULTS: dict[str, dict[str, Any]] = {
        "negative_prompt": "",
        "width": 1024,
        "height": 1024,
-        "cfg": 1.0,  # Schnell uses low CFG
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
+        "cfg": 1.0,
        "sampler": "euler",
        "scheduler": "simple",
-        "steps": 4,  # Schnell is a distilled model, very few steps
+        "steps": 4,
        "vae": "ae.safetensors",
    },
    "zimage": {
        "quality_prefix": "",
-        "negative_prompt": "",  # Turbo models work best without negative prompts
+        "negative_prompt": "",
        "width": 1024,
        "height": 1024,
-        "cfg": 1.0,  # Very low CFG for turbo
+        "portrait": (832, 1216),
+        "landscape": (1216, 832),
+        "cfg": 1.0,
        "sampler": "euler",
        "scheduler": "simple",
-        "steps": 4,  # ZImageTurbo is a distilled model
+        "steps": 4,
        "vae": "ae.safetensors",
    },
 }
@@ -733,6 +753,30 @@ def get_model_generation_defaults(model_name: str, base_model: str | None = None
    return defaults


+def resolve_orientation(family: str | None, orientation: str = "square") -> tuple[int, int]:
+    """Get width/height for a model family and orientation.
+
+    Args:
+        family: Model family key (e.g. "pony", "sd15", "sdxl") or None for default
+        orientation: One of "square", "portrait", "landscape"
+
+    Returns:
+        (width, height) tuple
+    """
+    defaults = MODEL_FAMILY_DEFAULTS.get(family or "sdxl", MODEL_FAMILY_DEFAULTS["sdxl"])
+    w: int = defaults["width"]
+    h: int = defaults["height"]
+    fallback = (w, h)
+
+    if orientation == "portrait":
+        pair: tuple[int, int] = defaults.get("portrait", fallback)
+        return pair
+    if orientation == "landscape":
+        pair = defaults.get("landscape", fallback)
+        return pair
+    return fallback
+
+
 def get_comfyui_url() -> str:
    """Get the ComfyUI server URL.

@@ -356,7 +356,7 @@ class TestModelFamilyDetection:
        assert defaults["sampler"] == "euler_ancestral"
        assert defaults["scheduler"] == "normal"
        assert defaults["steps"] == 25
-        assert defaults["cfg"] == 7.0
+        assert defaults["cfg"] == 6.5

    def test_get_model_generation_defaults_flux(self) -> None:
        """Test getting generation defaults for Flux models."""