"""Config Analysis & Improvement Tab - Merged Analysis Dashboard.

This module combines Config Health, Config Improvement, and Prompts & Examples
into a unified workflow with shared config input.
"""

import gradio as gr
import json
import spaces
from typing import Dict, Any, List, Tuple

from config_linting import (
    get_config_health_summary,
    generate_mermaid_diagram,
    generate_story_flags
)

# Import prompt templates and examples
from prompts_and_outputs import (
    WFStage2prompt,
    WFStage2prompt2,
    SimpleStructureTimeline1stPrompt,
    Outputof1stSimpleStructurePrompt,
    MoreRobustTimeline1stPrompt,
    Outputof1stMoreRobustPrompt,
    TimeRelatedConceptsForIdeaGeneration,
    Nonlinearprogressionideas,
    Adjectivebasedcombinationideatexts,
    Adjectivebasedcombinationideatextsv2,
)

from story_elements import (
    sterotypicalstoryendings,
    InterpersonalConflictIdeasBetraylacksympathyunfairexpectations,
)

from game_configs import (
    originalconfigatbeinningofthisspace,
    claude3_5_06072024configtips,
    tipsupdatedconfigatbeinningofthisspace,
)


# ==================== STARTING POINT DETECTION & CHECKLISTS ====================

STARTING_POINT_CHECKLISTS = {
    "world_geography": {
        "name": "World Facts / Geography",
        "icon": "🌍",
        "checks": [
            ("Location connectivity", "Are all locations reachable? Check for orphaned areas."),
            ("Spatial descriptions", "Do descriptions mention directions, distances, landmarks?"),
            ("Exploration rewards", "Are there secrets or discoveries in each location?"),
            ("Environmental variety", "Different biomes, weather, or atmospheres across locations?"),
            ("Travel consequences", "Does moving between locations have meaningful effects?"),
            ("Map coherence", "Would this make sense as a physical map?"),
        ]
    },
    "video_writer": {
        "name": "Video/Film Ideas (Writer)",
        "icon": "🎬",
        "checks": [
            ("Beat structure", "Does the narrative follow clear act breaks (setup, confrontation, resolution)?"),
            ("Pacing", "Are high-tension moments balanced with breathing room?"),
            ("Character arcs", "Do characters change from beginning to end?"),
            ("Dramatic question", "Is there a central question driving the story?"),
            ("Scene purpose", "Does each state advance plot, reveal character, or both?"),
            ("Climax placement", "Is there a clear climactic moment near the end?"),
        ]
    },
    "video_videographer": {
        "name": "Video/Film Ideas (Videographer)",
        "icon": "📹",
        "checks": [
            ("Visual descriptions", "Do descriptions paint visual scenes (framing, lighting, composition)?"),
            ("Camera notes", "Are there implied camera movements or angles in the text?"),
            ("Transition flow", "Do scene changes have visual logic (cuts, fades, match cuts)?"),
            ("Shot variety", "Mix of wide establishing shots, medium, and close-ups implied?"),
            ("Visual motifs", "Are there recurring visual elements for thematic connection?"),
            ("Media prompts", "Do media fields contain cinematographic direction?"),
        ]
    },
    "song_lyrics": {
        "name": "Song / Lyric Ideas",
        "icon": "🎵",
        "checks": [
            ("Emotional arc", "Does the mood progress through verses/states?"),
            ("Verse-to-state mapping", "Does each state correspond to a lyrical section?"),
            ("Refrain/chorus", "Are there recurring states that echo the chorus?"),
            ("Rhythm in text", "Does the prose have musicality or poetic flow?"),
            ("Sensory language", "Rich use of sound, texture, and feeling words?"),
            ("Thematic unity", "Does everything connect to the song's central theme?"),
        ]
    },
    "timeline_events": {
        "name": "Timeline Events",
        "icon": "⏱️",
        "checks": [
            ("Temporal markers", "Do descriptions include time references (morning, later, after)?"),
            ("Cause-effect chains", "Do earlier choices affect later states?"),
            ("Event sequencing", "Is the order of events logical and clear?"),
            ("Parallel timelines", "If branching, do timelines stay internally consistent?"),
            ("Urgency mechanics", "Are there time-sensitive choices or deadlines?"),
            ("Flashback/forward", "If non-linear, are jumps clearly marked?"),
        ]
    },
    "existing_structures": {
        "name": "Existing Structures (D&D, Templates)",
        "icon": "📚",
        "checks": [
            ("Source fidelity", "Does the adaptation honor the source material?"),
            ("Mechanical translation", "Are game mechanics (stats, rolls) represented in choices?"),
            ("Lore consistency", "Do names, places, and terms match the source?"),
            ("Balance", "Are encounters/challenges appropriately difficult?"),
            ("Completeness", "Are all key elements from the source included?"),
            ("Adaptation creativity", "What new elements make this unique?"),
        ]
    },
    "character_psychology": {
        "name": "Character Psychology",
        "icon": "🧠",
        "checks": [
            ("Motivation clarity", "Are character wants and needs clear?"),
            ("Psychological depth", "Do characters have flaws, fears, contradictions?"),
            ("Dialogue authenticity", "Does each character have a distinct voice?"),
            ("Relationship dynamics", "Are interpersonal tensions and alliances present?"),
            ("Character agency", "Do NPCs make choices independent of player?"),
            ("Internal conflict", "Do characters struggle with themselves, not just external?"),
        ]
    },
    "mystery_depth": {
        "name": "Mystery / Hidden Depth",
        "icon": "🔮",
        "checks": [
            ("Information layering", "Is truth revealed gradually across layers?"),
            ("Red herrings", "Are there misleading clues that feel fair?"),
            ("Reveal pacing", "Are revelations spaced for maximum impact?"),
            ("Unreliable elements", "Can the player trust what they're told?"),
            ("Hidden connections", "Do seemingly unrelated elements connect?"),
            ("Core truth", "Is there a satisfying 'abyss' truth worth discovering?"),
        ]
    },
    "faction_politics": {
        "name": "Faction Politics",
        "icon": "⚔️",
        "checks": [
            ("Faction definition", "Are factions clearly distinct with different goals?"),
            ("Reputation tracking", "Do choices affect standing with groups?"),
            ("Alliance consequences", "Does joining one faction affect others?"),
            ("Political complexity", "Are there nuanced positions, not just good/evil?"),
            ("Power dynamics", "Is there a balance of power that can shift?"),
            ("Betrayal potential", "Can allegiances change or be tested?"),
        ]
    },
}


# ==================== IMPROVEMENT WORKFLOWS ====================

IMPROVEMENT_WORKFLOWS = {
    "branching_first": {
        "name": "Branching First",
        "description": "Best for linear configs that need more player agency",
        "steps": [
            {"round": 1, "focus": "choices", "instruction": "Add 3-4 choices per state"},
            {"round": 2, "focus": "choices", "instruction": "Add conditional/hidden choices"},
            {"round": 3, "focus": "detail", "instruction": "Flesh out descriptions"},
            {"round": 4, "focus": "characters", "instruction": "Add NPC motivations"},
        ]
    },
    "detail_first": {
        "name": "Detail First",
        "description": "Best for skeleton configs with basic structure",
        "steps": [
            {"round": 1, "focus": "detail", "instruction": "Rich descriptions (3-4 sentences)"},
            {"round": 2, "focus": "environment", "instruction": "Environmental storytelling"},
            {"round": 3, "focus": "choices", "instruction": "Meaningful branches"},
            {"round": 4, "focus": "tension", "instruction": "Add stakes and pressure"},
        ]
    },
    "non_branching": {
        "name": "Non-Branching Depth",
        "description": "Best for configs where you want depth, not breadth",
        "steps": [
            {"round": 1, "focus": "detail", "instruction": "Deepen existing paths"},
            {"round": 2, "focus": "characters", "instruction": "NPC dialogue and reactions"},
            {"round": 3, "focus": "environment", "instruction": "Discoverable lore"},
            {"round": 4, "focus": "tension", "instruction": "Emotional beats"},
        ]
    },
    "mystery_deepening": {
        "name": "Mystery Deepening",
        "description": "Best for mystery/investigation configs",
        "steps": [
            {"round": 1, "focus": "environment", "instruction": "Plant clues in descriptions"},
            {"round": 2, "focus": "characters", "instruction": "Hidden motivations"},
            {"round": 3, "focus": "choices", "instruction": "Investigation options"},
            {"round": 4, "focus": "tension", "instruction": "Red herrings and reveals"},
        ]
    },
}

# Focus area definitions for prompt generation
FOCUS_AREAS = {
    "balanced": {
        "name": "Balanced",
        "instruction": """Improve the config with balanced attention to:
- Richer descriptions (2-3 sentences per state)
- 3-4 meaningful choices per state where appropriate
- Clear character motivations
- Environmental details that support the mood"""
    },
    "detail": {
        "name": "Add Detail",
        "instruction": """Focus on ADDING DETAIL to descriptions:
- Expand each description to 3-4 vivid sentences
- Include sensory details (sight, sound, smell, touch)
- Add atmospheric elements (weather, lighting, ambient sounds)
- Include small environmental storytelling details
- Make locations feel lived-in and real"""
    },
    "choices": {
        "name": "Add Choices",
        "instruction": """Focus on ADDING MORE CHOICES and branches:
- Ensure each state has 3-4 distinct choices
- Add choices that reflect different playstyles (cautious, bold, clever)
- Include hidden or conditional choices
- Create meaningful branches that reconverge later
- Add optional side paths that reward exploration"""
    },
    "characters": {
        "name": "Deepen Characters",
        "instruction": """Focus on DEEPENING CHARACTER MOTIVATIONS:
- Give NPCs clear wants, fears, and secrets
- Add dialogue that reveals personality
- Create relationship dynamics between characters
- Include character-specific choices and reactions
- Show character growth through the narrative"""
    },
    "environment": {
        "name": "Environmental Storytelling",
        "instruction": """Focus on ENVIRONMENTAL STORYTELLING:
- Add details that imply history without exposition
- Include objects that tell stories (old letters, worn paths, abandoned items)
- Use weather and time of day to set mood
- Create spaces that feel connected to events
- Add discoverable lore through exploration"""
    },
    "tension": {
        "name": "Increase Tension",
        "instruction": """Focus on INCREASING DRAMATIC TENSION:
- Add time pressure or urgency where appropriate
- Include moments of difficult moral choice
- Create setbacks that raise stakes
- Add foreshadowing of dangers
- Include moments of false safety before reveals"""
    },
}

# Config type instructions for prompt context
CONFIG_TYPE_INSTRUCTIONS = {
    "world": """This is a WORLD/GEOGRAPHY exploration game config. The story unfolds through location discovery.
Key aspects: Location atmosphere, natural connections, hidden areas, NPC placement, environmental storytelling.""",
    "timeline": """This is a TIMELINE/CHRONOLOGICAL narrative config. Events unfold in time sequence.
Key aspects: Time-based mood, cause-effect, pacing, flashbacks/forwards, time pressure.""",
    "song": """This is an EMOTIONAL JOURNEY config based on musical/lyrical structure.
Key aspects: Emotional transitions, verse/chorus structure, perspective shifts, sensory details.""",
    "mystery": """This is a MYSTERY/INVESTIGATION config using the iceberg model.
Key aspects: Layered clues, red herrings, reveal pacing, unreliable elements, core truth.""",
    "dnd": """This is a D&D/FANTASY ADVENTURE config with encounters and exploration.
Key aspects: Combat variety, social encounters, treasure, boss encounters, resource management.""",
    "film": """This is a FILM/VIDEO SCENES config structured around visual storytelling.
Key aspects: Visual hooks, camera direction, transitions, dialogue subtext, visual motifs.""",
    "beats": """This is a STORY BEATS config following professional screenplay structure.
Key aspects: Beat turns, character-driven plot, escalating stakes, emotional weight, payoffs.""",
}


def detect_starting_point(config_json: str) -> Tuple[str, Dict[str, float]]:
    """
    Analyze config structure to guess the starting point type.
    Returns (best_guess, confidence_scores).
    """
    try:
        config = json.loads(config_json)
    except json.JSONDecodeError:
        return "unknown", {}

    scores = {
        "world_geography": 0.0,
        "video_writer": 0.0,
        "video_videographer": 0.0,
        "song_lyrics": 0.0,
        "timeline_events": 0.0,
        "existing_structures": 0.0,
        "character_psychology": 0.0,
        "mystery_depth": 0.0,
        "faction_politics": 0.0,
    }

    all_text = ""
    location_count = 0
    state_count = 0

    for location_key, location_data in config.items():
        if isinstance(location_data, dict):
            location_count += 1
            for state_key, state_data in location_data.items():
                if isinstance(state_data, dict):
                    state_count += 1
                    desc = state_data.get("description", "")
                    all_text += f" {desc} "
                    for choice in state_data.get("choices", []):
                        if isinstance(choice, dict):
                            all_text += f" {choice.get('text', '')} "

    all_text_lower = all_text.lower()

    # World/Geography signals
    geo_keywords = ["north", "south", "east", "west", "path", "road", "village", "city", "forest", "mountain", "cave", "door", "corridor", "room", "building", "outside", "inside", "enter", "exit", "travel", "journey"]
    scores["world_geography"] = sum(1 for k in geo_keywords if k in all_text_lower) / len(geo_keywords)
    if location_count >= 5:
        scores["world_geography"] += 0.2

    # Video Writer signals (dramatic structure)
    drama_keywords = ["suddenly", "meanwhile", "finally", "climax", "tension", "reveal", "twist", "confrontation", "resolution", "act", "scene", "dramatic", "stakes"]
    scores["video_writer"] = sum(1 for k in drama_keywords if k in all_text_lower) / len(drama_keywords)

    # Video Videographer signals (visual)
    visual_keywords = ["camera", "shot", "frame", "close-up", "wide", "pan", "zoom", "angle", "lighting", "shadow", "silhouette", "composition", "visual", "fade", "cut to"]
    scores["video_videographer"] = sum(1 for k in visual_keywords if k in all_text_lower) / len(visual_keywords)

    # Song/Lyrics signals (emotional, poetic)
    song_keywords = ["verse", "chorus", "rhythm", "melody", "heart", "soul", "feeling", "dream", "echo", "silence", "cry", "whisper", "voice", "song", "music"]
    scores["song_lyrics"] = sum(1 for k in song_keywords if k in all_text_lower) / len(song_keywords)

    # Timeline signals
    time_keywords = ["morning", "evening", "night", "day", "hour", "later", "before", "after", "then", "now", "yesterday", "tomorrow", "clock", "time", "wait", "moment"]
    scores["timeline_events"] = sum(1 for k in time_keywords if k in all_text_lower) / len(time_keywords)

    # D&D/Existing structures signals
    dnd_keywords = ["hp", "damage", "attack", "spell", "magic", "roll", "dice", "armor", "weapon", "potion", "dungeon", "dragon", "goblin", "quest", "level"]
    scores["existing_structures"] = sum(1 for k in dnd_keywords if k in all_text_lower) / len(dnd_keywords)

    # Character Psychology signals
    psych_keywords = ["feel", "think", "believe", "fear", "hope", "want", "need", "memory", "emotion", "trust", "betray", "guilt", "regret", "desire", "conflict"]
    scores["character_psychology"] = sum(1 for k in psych_keywords if k in all_text_lower) / len(psych_keywords)

    # Mystery/Depth signals
    mystery_keywords = ["secret", "hidden", "clue", "mystery", "discover", "reveal", "truth", "lie", "suspect", "evidence", "investigate", "puzzle", "unknown", "shadow"]
    scores["mystery_depth"] = sum(1 for k in mystery_keywords if k in all_text_lower) / len(mystery_keywords)

    # Faction signals
    faction_keywords = ["faction", "alliance", "reputation", "loyalty", "betray", "guild", "clan", "tribe", "kingdom", "empire", "rebel", "join", "side", "enemy", "ally"]
    scores["faction_politics"] = sum(1 for k in faction_keywords if k in all_text_lower) / len(faction_keywords)

    # Normalize and find best match
    max_score = max(scores.values()) if scores.values() else 0
    if max_score > 0:
        scores = {k: round(v / max_score, 2) for k, v in scores.items()}

    best_guess = max(scores, key=scores.get) if max_score > 0 else "world_geography"
    return best_guess, scores


def format_starting_point_checklist(starting_point: str) -> str:
    """Format the checklist for a starting point as markdown."""
    if starting_point not in STARTING_POINT_CHECKLISTS:
        return "Unknown starting point selected."

    checklist = STARTING_POINT_CHECKLISTS[starting_point]
    lines = [f"## {checklist['icon']} {checklist['name']} Checklist\n"]
    lines.append("Review these criteria based on your creative approach:\n")

    for i, (check_name, check_desc) in enumerate(checklist["checks"], 1):
        lines.append(f"- [ ] **{check_name}**: {check_desc}")

    return "\n".join(lines)


def format_detection_results(config_json: str) -> Tuple[str, str, str]:
    """
    Detect starting point and return formatted results.
    Returns (detected_type, confidence_display, checklist).
    """
    if not config_json or not config_json.strip():
        return "world_geography", "*Paste a config to auto-detect*", ""

    detected, scores = detect_starting_point(config_json)

    # Format confidence display
    confidence_lines = ["**Detection Confidence:**\n"]
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    for sp_key, score in sorted_scores[:5]:  # Top 5
        sp_info = STARTING_POINT_CHECKLISTS.get(sp_key, {})
        icon = sp_info.get("icon", "")
        name = sp_info.get("name", sp_key)
        bar = "█" * int(score * 10) + "░" * (10 - int(score * 10))
        confidence_lines.append(f"{icon} {name}: {bar} ({int(score*100)}%)")

    checklist = format_starting_point_checklist(detected)

    return detected, "\n".join(confidence_lines), checklist


# ==================== METRICS FUNCTIONS ====================

def compute_config_metrics(config_json: str) -> Dict[str, Any]:
    """Compute quality metrics from a game config."""
    try:
        config = json.loads(config_json)
    except json.JSONDecodeError:
        return {"error": "Invalid JSON"}

    # Count states and locations
    total_states = 0
    total_locations = 0
    all_choices = []
    all_descriptions = []
    states_with_consequences = 0
    states_with_media = 0

    for location_key, location_data in config.items():
        if isinstance(location_data, dict):
            total_locations += 1
            for state_key, state_data in location_data.items():
                if isinstance(state_data, dict):
                    total_states += 1

                    # Count choices
                    choices = state_data.get("choices", [])
                    if isinstance(choices, list):
                        all_choices.append(len(choices))

                    # Count description length
                    desc = state_data.get("description", "")
                    if desc:
                        all_descriptions.append(len(desc.split()))

                    # Check for consequences
                    if state_data.get("consequences") or state_data.get("on_enter"):
                        states_with_consequences += 1

                    # Check for media
                    if state_data.get("media") and len(state_data.get("media", [])) > 0:
                        states_with_media += 1

    # Calculate averages
    avg_choices = sum(all_choices) / len(all_choices) if all_choices else 0
    avg_desc_length = sum(all_descriptions) / len(all_descriptions) if all_descriptions else 0
    media_coverage = states_with_media / total_states if total_states > 0 else 0

    return {
        "total_states": total_states,
        "total_locations": total_locations,
        "avg_choices_per_state": round(avg_choices, 2),
        "states_with_consequences": states_with_consequences,
        "avg_description_length": round(avg_desc_length, 1),
        "media_coverage": round(media_coverage * 100, 1),
        "states_with_media": states_with_media,
    }


def compute_improvement_suggestions(config_json: str) -> str:
    """Generate improvement suggestions based on computed metrics."""
    metrics = compute_config_metrics(config_json)

    if "error" in metrics:
        return f"**Error:** {metrics['error']}"

    suggestions = []

    # Check state variety
    if metrics["total_states"] < 10:
        suggestions.append(f"**Limited content**: Only {metrics['total_states']} states. Consider adding more locations and states for variety.")

    # Check choice variety
    if metrics["avg_choices_per_state"] < 2.5:
        suggestions.append(f"**Low choice variety**: Average {metrics['avg_choices_per_state']} choices/state. Add more choices to reduce linearity.")

    # Check media coverage
    if metrics["media_coverage"] < 20:
        suggestions.append(f"**Limited media**: Only {metrics['media_coverage']}% of states have media. Add images/sounds for engagement.")

    # Check description length
    if metrics["avg_description_length"] < 15:
        suggestions.append(f"**Short descriptions**: Average {metrics['avg_description_length']} words. Expand for richer narrative.")

    # Check consequences
    consequence_ratio = metrics["states_with_consequences"] / metrics["total_states"] if metrics["total_states"] > 0 else 0
    if consequence_ratio < 0.3:
        suggestions.append(f"**Few consequences**: Only {metrics['states_with_consequences']} states have consequences. Add more for player agency.")

    if not suggestions:
        return "**Config looks good!** No major issues detected based on structural metrics."

    return "## Improvement Suggestions\n\n" + "\n\n".join(suggestions)


def format_metrics_display(config_json: str) -> str:
    """Format metrics as a readable markdown display."""
    metrics = compute_config_metrics(config_json)

    if "error" in metrics:
        return f"**Error:** {metrics['error']}"

    return f"""## Config Metrics

| Metric | Value | Target |
|--------|-------|--------|
| Total States | {metrics['total_states']} | 15+ |
| Total Locations | {metrics['total_locations']} | 5+ |
| Avg Choices/State | {metrics['avg_choices_per_state']} | 2.5+ |
| States with Consequences | {metrics['states_with_consequences']} | 30%+ |
| Avg Description Length | {metrics['avg_description_length']} words | 15+ |
| Media Coverage | {metrics['media_coverage']}% | 50%+ |
"""


# ==================== ZERO GPU GENERATION ====================

# Lazy model loading (shared with D&D GM tab)
_analysis_model = None
_analysis_tokenizer = None
_analysis_model_name = None


def _ensure_torch():
    """Lazy import torch."""
    import torch
    return torch


def _ensure_transformers():
    """Lazy import transformers."""
    import transformers
    return transformers


def _ensure_analysis_model(model_name: str = "Qwen/Qwen3-4B"):
    """Lazy load model for config generation."""
    global _analysis_model, _analysis_tokenizer, _analysis_model_name

    if _analysis_model is None or _analysis_model_name != model_name:
        tf = _ensure_transformers()
        print(f"Loading config generation model: {model_name}")

        if _analysis_model is not None:
            del _analysis_model
            del _analysis_tokenizer
            _torch = _ensure_torch()
            if _torch.cuda.is_available():
                _torch.cuda.empty_cache()

        _analysis_tokenizer = tf.AutoTokenizer.from_pretrained(model_name)
        _analysis_model = tf.AutoModelForCausalLM.from_pretrained(
            model_name, torch_dtype="auto", device_map="auto"
        )
        _analysis_model_name = model_name

    return _analysis_model, _analysis_tokenizer


@spaces.GPU
def generate_config_from_theme(theme: str, prompt_template: str) -> str:
    """Generate a game config from a theme using Zero GPU."""
    if not theme.strip():
        return "Please enter a theme."

    _torch = _ensure_torch()
    model, tokenizer = _ensure_analysis_model()

    zero = _torch.Tensor([0]).cuda()
    model.to(zero.device)

    # Build the prompt
    full_prompt = f"{prompt_template}\n\nTheme: {theme}\n\nGenerate a complete game config JSON:"

    messages = [{"role": "user", "content": full_prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer([text], return_tensors="pt").to(zero.device)

    with _torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=2048,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract JSON from response
    if "```json" in generated:
        start = generated.find("```json") + 7
        end = generated.find("```", start)
        if end > start:
            return generated[start:end].strip()

    if "{" in generated:
        start = generated.find("{")
        # Find matching closing brace
        depth = 0
        for i, char in enumerate(generated[start:], start):
            if char == "{":
                depth += 1
            elif char == "}":
                depth -= 1
                if depth == 0:
                    return generated[start:i+1]

    return generated


# ==================== TAB CREATION ====================

def create_config_analysis_tab():
    """Create the Config Analysis & Improvement tab."""
    with gr.Tab("Config Analysis & Improvement"):
        gr.Markdown("## Config Analysis & Improvement")
        gr.Markdown("Analyze, improve, and generate game configs. Paste your config below or generate a new one.")

        # Shared config input at top
        with gr.Row():
            config_input = gr.Textbox(
                label="Config JSON",
                lines=12,
                placeholder='{"location": {"state": {"description": "...", "choices": [...]}}}',
                scale=3
            )
            with gr.Column(scale=1):
                analyze_btn = gr.Button("Analyze Config", variant="primary", size="lg")
                clear_btn = gr.Button("Clear", size="sm")

        with gr.Tabs():
            # ==================== SUB-TAB 1: ANALYZE ====================
            with gr.TabItem("Analyze"):
                gr.Markdown("### Health Report & Metrics")

                with gr.Row():
                    with gr.Column(scale=1):
                        health_report_output = gr.Markdown(value="*Paste config and click Analyze*")

                    with gr.Column(scale=1):
                        metrics_output = gr.Markdown(value="*Metrics will appear here*")

                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("### State Machine Diagram")
                        gr.Markdown("*Copy to [mermaid.live](https://mermaid.live) to visualize*")
                        mermaid_output = gr.Code(label="Mermaid Code", language=None, lines=12)
                        mermaid_btn = gr.Button("Generate Diagram")

                    with gr.Column(scale=1):
                        gr.Markdown("### Story Flags")
                        gr.Markdown("*Potential narrative issues*")
                        flags_output = gr.Dataframe(
                            headers=["Severity", "State", "Issue"],
                            label="Story Flags",
                            wrap=True
                        )

                with gr.Accordion("Legend", open=False):
                    gr.Markdown("""
**Diagram Colors:**
- Green: Start state
- Pink: End state
- Yellow: Dead-end (no exits)
- Red: Orphaned (unreachable)

**Severity Levels:**
- Warning: Should probably fix
- Info: Consider reviewing

**Common Issues:**
- *Disconnected choices*: Description doesn't mention the available choices
- *Sparse content*: Very short descriptions
- *Length outlier*: Description much longer/shorter than average
- *Inconsistent media*: Some states have media, others don't
                    """)

            # ==================== SUB-TAB 2: IMPROVEMENT GUIDE ====================
            with gr.TabItem("Improvement Guide"):
                gr.Markdown("### Improvement Suggestions")

                suggestions_output = gr.Markdown(value="*Analyze a config to see suggestions*")

                # ==================== STARTING POINT CONTEXT ====================
                with gr.Accordion("Starting Point Context (Tailored Checklists)", open=True):
                    gr.Markdown("*Different creative approaches need different quality checks. Select or auto-detect your starting point.*")

                    with gr.Row():
                        with gr.Column(scale=1):
                            detect_btn = gr.Button("🔍 Auto-Detect from Config", variant="secondary")
                            starting_point_dropdown = gr.Dropdown(
                                choices=[
                                    ("🌍 World Facts / Geography", "world_geography"),
                                    ("🎬 Video/Film (Writer)", "video_writer"),
                                    ("📹 Video/Film (Videographer)", "video_videographer"),
                                    ("🎵 Song / Lyric Ideas", "song_lyrics"),
                                    ("⏱️ Timeline Events", "timeline_events"),
                                    ("📚 Existing Structures (D&D)", "existing_structures"),
                                    ("🧠 Character Psychology", "character_psychology"),
                                    ("🔮 Mystery / Hidden Depth", "mystery_depth"),
                                    ("⚔️ Faction Politics", "faction_politics"),
                                ],
                                value="world_geography",
                                label="Select Starting Point",
                                interactive=True
                            )
                        with gr.Column(scale=1):
                            detection_confidence = gr.Markdown(value="*Click 'Auto-Detect' to analyze your config*")

                    checklist_output = gr.Markdown(value="*Select a starting point or auto-detect to see tailored checklist*")

                with gr.Accordion("Quick Evaluation Method", open=False):
                    gr.Markdown("""
**Prompt to use with any SOTA LLM:**

> This config is for a basic text based game engine. I don't have any structural metrics to assess the quality of the config. What JSON things can we look at to see if it may be too bland for a person testing the game?

Then paste your config with the prompt.
                    """)
                    gr.Markdown("""
**What to look for (Claude 3.5 analysis):**
- **State variety**: More locations and states = more diversity
- **Average choices per state**: Higher = less linear feel
- **Consequences**: More = greater sense of agency
- **Description length**: Longer = richer narrative
- **Media usage**: Images/sounds = more engagement
- **Narrative branching**: More paths = better replayability
                    """)

                with gr.Accordion("Before/After Example", open=False):
                    gr.Markdown("Example of config improvement using LLM suggestions:")
                    display_original = originalconfigatbeinningofthisspace.replace(' ', '&nbsp;').replace('\n', '<br>')
                    display_tips = claude3_5_06072024configtips.replace(' ', '&nbsp;').replace('\n', '<br>')
                    display_improved = tipsupdatedconfigatbeinningofthisspace.replace(' ', '&nbsp;').replace('\n', '<br>')
                    gr.HTML(f"""<div style="display: flex; justify-content: space-between; height: 500px; overflow: auto;">
                        <div style="flex: 1; margin: 0 5px; padding: 10px; border: 1px solid #ccc; overflow: auto;">
                            <strong>Original Config:</strong><br>{display_original}
                        </div>
                        <div style="flex: 1; margin: 0 5px; padding: 10px; border: 1px solid #ccc; overflow: auto;">
                            <strong>Claude 3.5 Tips:</strong><br>{display_tips}
                        </div>
                        <div style="flex: 1; margin: 0 5px; padding: 10px; border: 1px solid #ccc; overflow: auto;">
                            <strong>Improved Config:</strong><br>{display_improved}
                        </div>
                    </div>""")

            # ==================== SUB-TAB 3: PROMPTS & TEMPLATES ====================
            with gr.TabItem("Prompts & Templates"):
                gr.Markdown("### LLM Prompts for Config Generation")
                gr.Markdown("Use these prompts with external LLMs or generate directly with Zero GPU.")

                with gr.Accordion("Config Creation Prompts", open=True):
                    gr.Code(WFStage2prompt, label="Stage 2: Story to JSON (works on 70B+)")

                    with gr.Row():
                        theme_input = gr.Textbox(
                            label="Theme for Generation",
                            placeholder="A haunted mansion mystery...",
                            scale=3
                        )
                        generate_btn = gr.Button("Generate with Zero GPU", variant="primary", scale=1)

                    generated_config_output = gr.Code(
                        label="Generated Config",
                        language="json",
                        lines=15,
                        visible=True
                    )

                    gr.Code(WFStage2prompt2, label="Alternate Stage 2 prompt (simpler)")
                    gr.Code("Lets critique this JSON to find areas to fix", label="Config Critique Prompt")
                    gr.Code("Lets make a list for the prompts we will use to make media objects in this JSON. Make one for a person to interpret and one for direct media generators that focus on keywords:", label="Media Prompt Generation")

                with gr.Accordion("Ending & Conflict Ideas", open=False):
                    gr.Code(sterotypicalstoryendings, label="30 Stereotypical Story Endings (llama-3.3-70B)")
                    gr.Code(InterpersonalConflictIdeasBetraylacksympathyunfairexpectations, label="Interpersonal Conflict Ideas (o1)")

                with gr.Accordion("Concept Brainstorm Texts", open=False):
                    with gr.Row():
                        gr.Textbox(value=TimeRelatedConceptsForIdeaGeneration, label="Time-Related Concepts", lines=15)
                        gr.Textbox(value=Nonlinearprogressionideas, label="Non-linear Progression Ideas", lines=15)
                    with gr.Row():
                        gr.Textbox(value=Adjectivebasedcombinationideatextsv2, label="Adjective Combinations v2", lines=15)
                        gr.Textbox(value=Adjectivebasedcombinationideatexts, label="Adjective Combinations v1", lines=15)

                with gr.Accordion("Example Workflow Outputs", open=False):
                    gr.Markdown("**Simple Structure Workflow:**")
                    gr.Code(SimpleStructureTimeline1stPrompt, label="Simple Structure - Input Prompt")
                    gr.Code(Outputof1stSimpleStructurePrompt, label="Simple Structure - Output")

                    gr.Markdown("**More Robust Workflow:**")
                    gr.Code(MoreRobustTimeline1stPrompt, label="More Robust - 1st Prompt")
                    gr.Code(Outputof1stMoreRobustPrompt, label="More Robust - 1st Output")

            # ==================== SUB-TAB 4: LLM IMPROVEMENT LOOP ====================
            with gr.TabItem("LLM Improvement Loop"):
                gr.Markdown("### Iterative Config Improvement with External LLM")
                gr.Markdown("Generate prompts for ChatGPT/Claude to improve your config round by round. Track changes and metrics across iterations.")

                # Session state
                improvement_session = gr.State(value={
                    "rounds": [],
                    "current_config": "",
                    "original_config": "",
                    "session_active": False,
                    "current_round": 0
                })

                # Config Input Section
                with gr.Group():
                    gr.Markdown("#### 1. Load Config")
                    with gr.Row():
                        loop_config_input = gr.Textbox(
                            label="Config JSON",
                            lines=8,
                            placeholder="Paste your config here or load from playtest...",
                            scale=3
                        )
                        with gr.Column(scale=1):
                            gr.Markdown("**Demo Options:**")
                            load_demo_btn = gr.Button("Load Starter (7 states)", variant="secondary", size="sm")
                            load_complete_demo_btn = gr.Button("Load Complete (29 states)", variant="secondary", size="sm")
                            load_demo_session_btn = gr.Button("Load Full Demo Session", variant="primary", size="sm")
                            gr.Markdown("**Your Config:**")
                            load_from_playtest_btn = gr.Button("Load from Playtest", variant="secondary", size="sm")
                            start_session_btn = gr.Button("Start New Session", variant="primary")
                            session_status = gr.Markdown("*No active session*")

                # Side-by-Side Demo Comparison - All 5 Rounds
                with gr.Accordion("Demo Comparison: All Rounds Side-by-Side", open=False):
                    gr.Markdown("**See how each round builds on the previous - state by state progression:**")

                    # Metrics overview
                    demo_metrics_display = gr.Markdown("*Click 'Load Comparison' to see metrics*")
                    load_comparison_btn = gr.Button("Load All Rounds", variant="secondary")

                    # State selector dropdown
                    gr.Markdown("---")
                    gr.Markdown("**Select a State to Compare Across All Rounds:**")
                    with gr.Row():
                        state_selector = gr.Dropdown(
                            choices=[],
                            value=None,
                            label="Select State",
                            scale=2
                        )
                        with gr.Column(scale=1):
                            state_status = gr.Markdown("*Load comparison first*")

                    # 5-column round view
                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.Markdown("##### R0: Starter")
                            round0_view = gr.Textbox(lines=12, interactive=False, show_label=False, placeholder="Base state")
                        with gr.Column(scale=1):
                            gr.Markdown("##### R1: Environment")
                            round1_view = gr.Textbox(lines=12, interactive=False, show_label=False, placeholder="+ clues, details")
                        with gr.Column(scale=1):
                            gr.Markdown("##### R2: Characters")
                            round2_view = gr.Textbox(lines=12, interactive=False, show_label=False, placeholder="+ motivations")
                        with gr.Column(scale=1):
                            gr.Markdown("##### R3: Choices")
                            round3_view = gr.Textbox(lines=12, interactive=False, show_label=False, placeholder="+ options")
                        with gr.Column(scale=1):
                            gr.Markdown("##### R4: Tension")
                            round4_view = gr.Textbox(lines=12, interactive=False, show_label=False, placeholder="+ reveals")

                    # Changes summary for selected state
                    state_changes_display = gr.Markdown("*Select a state above to see progression across rounds*")

                # Suggested Workflows Section
                with gr.Accordion("Suggested Workflows (click step to auto-set focus)", open=True):
                    gr.Markdown("*Choose a workflow path or manually select focus areas below*")

                    with gr.Row():
                        # Branching First
                        with gr.Column(scale=1):
                            gr.Markdown("**Branching First**\n*For linear configs needing player agency*")
                            wf_branch_1 = gr.Button("1: choices", size="sm")
                            wf_branch_2 = gr.Button("2: choices", size="sm")
                            wf_branch_3 = gr.Button("3: detail", size="sm")
                            wf_branch_4 = gr.Button("4: characters", size="sm")

                        # Detail First
                        with gr.Column(scale=1):
                            gr.Markdown("**Detail First**\n*For skeleton configs with basic structure*")
                            wf_detail_1 = gr.Button("1: detail", size="sm")
                            wf_detail_2 = gr.Button("2: environment", size="sm")
                            wf_detail_3 = gr.Button("3: choices", size="sm")
                            wf_detail_4 = gr.Button("4: tension", size="sm")

                        # Non-Branching
                        with gr.Column(scale=1):
                            gr.Markdown("**Non-Branching Depth**\n*For depth over breadth*")
                            wf_nonbranch_1 = gr.Button("1: detail", size="sm")
                            wf_nonbranch_2 = gr.Button("2: characters", size="sm")
                            wf_nonbranch_3 = gr.Button("3: environment", size="sm")
                            wf_nonbranch_4 = gr.Button("4: tension", size="sm")

                        # Mystery
                        with gr.Column(scale=1):
                            gr.Markdown("**Mystery Deepening**\n*For investigation configs*")
                            wf_mystery_1 = gr.Button("1: environment", size="sm")
                            wf_mystery_2 = gr.Button("2: characters", size="sm")
                            wf_mystery_3 = gr.Button("3: choices", size="sm")
                            wf_mystery_4 = gr.Button("4: tension", size="sm")

                # Current Round Section
                with gr.Group():
                    gr.Markdown("#### 2. Generate Improvement Prompt")
                    round_indicator = gr.Markdown("**Current Round: Not started**")

                    with gr.Row():
                        loop_config_type = gr.Dropdown(
                            choices=[
                                ("Auto-detect", "auto"),
                                ("World/Geography", "world"),
                                ("Timeline", "timeline"),
                                ("Emotional Journey", "song"),
                                ("Mystery", "mystery"),
                                ("D&D/Fantasy", "dnd"),
                                ("Film/Video", "film"),
                                ("Story Beats", "beats"),
                            ],
                            value="auto",
                            label="Config Type",
                            scale=1
                        )
                        loop_focus_area = gr.Dropdown(
                            choices=[
                                ("Balanced", "balanced"),
                                ("Add Detail", "detail"),
                                ("Add Choices", "choices"),
                                ("Deepen Characters", "characters"),
                                ("Environmental Storytelling", "environment"),
                                ("Increase Tension", "tension"),
                            ],
                            value="balanced",
                            label="Focus Area",
                            scale=1
                        )
                        generate_loop_prompt_btn = gr.Button("Generate Prompt", variant="primary", scale=1)

                    loop_prompt_output = gr.Code(
                        label="AI Prompt (copy to ChatGPT/Claude)",
                        language=None,
                        lines=15
                    )

                # After LLM Response Section
                with gr.Group():
                    gr.Markdown("#### 3. Paste LLM Response & Accept")
                    loop_response_input = gr.Textbox(
                        label="Paste Improved Config from LLM",
                        lines=8,
                        placeholder="After getting the improved config from ChatGPT/Claude, paste it here..."
                    )
                    with gr.Row():
                        accept_round_btn = gr.Button("Accept & Start Next Round", variant="primary", scale=2)
                        with gr.Column(scale=1):
                            revert_dropdown = gr.Dropdown(
                                choices=[],
                                value=None,
                                label="Revert to Round",
                                interactive=True,
                                allow_custom_value=True
                            )
                            revert_btn = gr.Button("Revert", variant="secondary", size="sm")

                    # Metrics comparison
                    metrics_comparison = gr.Markdown("*Metrics comparison will appear after accepting a round*")

                # Round History Section
                with gr.Accordion("Round History", open=False):
                    history_display = gr.Markdown("*No rounds completed yet*")
                    with gr.Row():
                        export_history_btn = gr.Button("Export Session History", variant="secondary")
                        export_history_file = gr.File(label="Download", interactive=False)

        # ==================== EVENT HANDLERS ====================

        def get_flags_as_table(config):
            """Convert story flags to table format."""
            flags = generate_story_flags(config)
            if not flags or (isinstance(flags, list) and len(flags) > 0 and isinstance(flags[0], str)):
                return [["Info", "N/A", str(flags[0]) if flags else "No config to analyze"]]
            rows = []
            for f in flags:
                rows.append([
                    f.get("severity", "info").capitalize(),
                    f.get("state", ""),
                    f.get("message", "")
                ])
            return rows if rows else [["Info", "N/A", "No issues detected"]]

        def analyze_all(config_json):
            """Run all analysis functions."""
            health = get_config_health_summary(config_json)
            metrics = format_metrics_display(config_json)
            suggestions = compute_improvement_suggestions(config_json)
            flags = get_flags_as_table(config_json)
            return health, metrics, suggestions, flags

        # Main analyze button updates all outputs
        analyze_btn.click(
            fn=analyze_all,
            inputs=[config_input],
            outputs=[health_report_output, metrics_output, suggestions_output, flags_output]
        )

        # Mermaid diagram generation
        mermaid_btn.click(
            fn=generate_mermaid_diagram,
            inputs=[config_input],
            outputs=[mermaid_output]
        )

        # Clear button
        clear_btn.click(
            fn=lambda: ("", "*Paste config and click Analyze*", "*Metrics will appear here*",
                       "*Analyze a config to see suggestions*", "", [["", "", ""]]),
            outputs=[config_input, health_report_output, metrics_output,
                    suggestions_output, mermaid_output, flags_output]
        )

        # Zero GPU generation
        generate_btn.click(
            fn=lambda theme: generate_config_from_theme(theme, WFStage2prompt),
            inputs=[theme_input],
            outputs=[generated_config_output]
        )

        # Starting point detection
        def run_detection(config_json):
            """Auto-detect starting point and update UI."""
            detected, confidence, checklist = format_detection_results(config_json)
            return detected, confidence, checklist

        detect_btn.click(
            fn=run_detection,
            inputs=[config_input],
            outputs=[starting_point_dropdown, detection_confidence, checklist_output]
        )

        # Dropdown selection updates checklist
        starting_point_dropdown.change(
            fn=format_starting_point_checklist,
            inputs=[starting_point_dropdown],
            outputs=[checklist_output]
        )

        # ==================== LLM IMPROVEMENT LOOP HANDLERS ====================

        def detect_config_type(config_json: str) -> str:
            """Auto-detect config type from content."""
            if not config_json:
                return "film"
            config_lower = config_json.lower()
            if "world" in config_lower or "location_" in config_lower or "terrain" in config_lower:
                return "world"
            elif "timeline" in config_lower or "event_" in config_lower:
                return "timeline"
            elif "emotional_journey" in config_lower or "moment_" in config_lower:
                return "song"
            elif "investigation" in config_lower or "clue_" in config_lower:
                return "mystery"
            elif "adventure" in config_lower or "encounter_" in config_lower:
                return "dnd"
            elif "beat_" in config_lower or "act_" in config_lower:
                return "beats"
            return "film"

        def start_improvement_session(config_json, session):
            """Initialize a new improvement session."""
            if not config_json or not config_json.strip():
                return (
                    session,
                    "**Error:** Please paste a config first",
                    "**Current Round: Not started**",
                    [],
                    "*No rounds completed yet*"
                )

            # Validate JSON
            try:
                json.loads(config_json)
            except json.JSONDecodeError as e:
                return (
                    session,
                    f"**Error:** Invalid JSON - {str(e)}",
                    "**Current Round: Not started**",
                    [],
                    "*No rounds completed yet*"
                )

            # Initialize new session
            new_session = {
                "rounds": [],
                "current_config": config_json,
                "original_config": config_json,
                "session_active": True,
                "current_round": 1
            }

            metrics = compute_config_metrics(config_json)
            status = f"**Session Started!** Original config: {metrics.get('total_states', 0)} states, {metrics.get('avg_choices_per_state', 0)} avg choices"

            return (
                new_session,
                status,
                "**Current Round: 1** (Ready to generate prompt)",
                [],
                "*No rounds completed yet*"
            )

        def generate_improvement_prompt(config_json, config_type, focus_area, session):
            """Generate a context-aware improvement prompt."""
            if not session.get("session_active"):
                return "Please start a session first by clicking 'Start New Session'."

            current_config = session.get("current_config", config_json)
            if not current_config:
                return "No config available. Please start a session."

            # Auto-detect type if needed
            actual_type = config_type if config_type != "auto" else detect_config_type(current_config)

            # Get type instructions
            type_instruction = CONFIG_TYPE_INSTRUCTIONS.get(actual_type, CONFIG_TYPE_INSTRUCTIONS["film"])

            # Get focus instructions
            focus_instruction = FOCUS_AREAS.get(focus_area, FOCUS_AREAS["balanced"])["instruction"]

            # Count states
            try:
                config = json.loads(current_config)
                state_count = sum(len(loc) for loc in config.values() if isinstance(loc, dict))
            except:
                state_count = "unknown"

            # Build round context
            round_num = session.get("current_round", 1)
            round_context = ""
            if round_num > 1 and session.get("rounds"):
                prev_focuses = [r.get("focus_area", "unknown") for r in session["rounds"]]
                round_context = f"\n**Previous rounds:** {', '.join(prev_focuses)}\nThis is round {round_num}. Build on previous improvements."

            prompt = f"""I have a game config JSON for an interactive narrative game. Please help me improve it.

**CONFIG TYPE:** {actual_type.upper()}
**CURRENT SIZE:** {state_count} states
**FOCUS FOR THIS ROUND:** {FOCUS_AREAS.get(focus_area, {}).get('name', focus_area)}
**ROUND:** {round_num}{round_context}

{type_instruction}

{focus_instruction}

**IMPORTANT RULES:**
1. Return ONLY valid JSON - no explanations before or after
2. Keep the exact same structure (location > state > properties)
3. Every state MUST have: "description", "choices" (list), "transitions" (dict mapping choices to state IDs)
4. Do NOT rename existing state IDs - only add new ones
5. Ensure all transitions point to valid state IDs
6. Keep "media" and "media_prompt" fields if present

**HERE IS MY CURRENT CONFIG:**
```json
{current_config}
```

Please return an improved version of this config with the enhancements described above. Remember: ONLY return valid JSON, nothing else."""

            return prompt

        def accept_round_result(new_config_json, session):
            """Accept the LLM result and advance to next round."""
            if not session.get("session_active"):
                return (
                    session,
                    "*Start a session first*",
                    "**Current Round: Not started**",
                    [],
                    "*No rounds completed yet*",
                    ""  # Clear the input
                )

            if not new_config_json or not new_config_json.strip():
                return (
                    session,
                    "**Error:** Please paste the improved config from the LLM",
                    f"**Current Round: {session.get('current_round', 1)}**",
                    gr.update(),
                    gr.update(),
                    gr.update()
                )

            # Validate JSON
            try:
                json.loads(new_config_json)
            except json.JSONDecodeError as e:
                return (
                    session,
                    f"**Error:** Invalid JSON - {str(e)}",
                    f"**Current Round: {session.get('current_round', 1)}**",
                    gr.update(),
                    gr.update(),
                    gr.update()
                )

            # Compute metrics before and after
            old_metrics = compute_config_metrics(session.get("current_config", ""))
            new_metrics = compute_config_metrics(new_config_json)

            # Create round record
            round_data = {
                "round_number": session.get("current_round", 1),
                "config_before": session.get("current_config", ""),
                "config_after": new_config_json,
                "metrics_before": old_metrics,
                "metrics_after": new_metrics,
            }

            # Update session
            new_session = session.copy()
            new_session["rounds"] = session.get("rounds", []) + [round_data]
            new_session["current_config"] = new_config_json
            new_session["current_round"] = session.get("current_round", 1) + 1

            # Format metrics comparison
            comparison = f"""### Round {round_data['round_number']} Metrics Change

| Metric | Before | After | Change |
|--------|--------|-------|--------|
| States | {old_metrics.get('total_states', 0)} | {new_metrics.get('total_states', 0)} | {new_metrics.get('total_states', 0) - old_metrics.get('total_states', 0):+d} |
| Avg Choices | {old_metrics.get('avg_choices_per_state', 0)} | {new_metrics.get('avg_choices_per_state', 0)} | {new_metrics.get('avg_choices_per_state', 0) - old_metrics.get('avg_choices_per_state', 0):+.2f} |
| Avg Desc Length | {old_metrics.get('avg_description_length', 0)} | {new_metrics.get('avg_description_length', 0)} | {new_metrics.get('avg_description_length', 0) - old_metrics.get('avg_description_length', 0):+.1f} |
| Media Coverage | {old_metrics.get('media_coverage', 0)}% | {new_metrics.get('media_coverage', 0)}% | {new_metrics.get('media_coverage', 0) - old_metrics.get('media_coverage', 0):+.1f}% |
"""

            # Format history
            history_lines = ["### Improvement History\n"]
            for r in new_session["rounds"]:
                rn = r.get("round_number", "?")
                states_change = r.get("metrics_after", {}).get("total_states", 0) - r.get("metrics_before", {}).get("total_states", 0)
                history_lines.append(f"**Round {rn}:** +{states_change} states")

            # Update revert dropdown choices
            revert_choices = [(f"Round {r['round_number']}", r['round_number']) for r in new_session["rounds"]]

            return (
                new_session,
                comparison,
                f"**Current Round: {new_session['current_round']}** (Ready for next improvement)",
                revert_choices,
                "\n".join(history_lines),
                ""  # Clear the response input
            )

        def revert_to_round(round_num, session):
            """Revert to a previous round's config."""
            if not session.get("session_active") or not session.get("rounds"):
                return session, "*No rounds to revert to*", gr.update()

            # Find the round
            target_round = None
            for r in session["rounds"]:
                if r.get("round_number") == round_num:
                    target_round = r
                    break

            if not target_round:
                return session, f"*Round {round_num} not found*", gr.update()

            # Revert session
            new_session = session.copy()
            new_session["current_config"] = target_round.get("config_after", "")
            new_session["current_round"] = round_num + 1
            # Keep only rounds up to and including the reverted round
            new_session["rounds"] = [r for r in session["rounds"] if r.get("round_number", 0) <= round_num]

            return (
                new_session,
                f"*Reverted to Round {round_num}. Continue from Round {round_num + 1}*",
                target_round.get("config_after", "")
            )

        def export_session_history(session):
            """Export session history as JSON file."""
            if not session.get("rounds"):
                return None

            import tempfile
            from datetime import datetime

            export_data = {
                "export_date": datetime.now().isoformat(),
                "original_config": session.get("original_config", ""),
                "final_config": session.get("current_config", ""),
                "total_rounds": len(session.get("rounds", [])),
                "rounds": session.get("rounds", [])
            }

            # Write to temp file
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"improvement_session_{timestamp}.json"
            filepath = f"{tempfile.gettempdir()}/{filename}"

            with open(filepath, "w") as f:
                json.dump(export_data, f, indent=2)

            return filepath

        def set_focus_area(focus_value):
            """Set the focus area dropdown."""
            return focus_value

        # Wire up LLM Improvement Loop handlers
        start_session_btn.click(
            fn=start_improvement_session,
            inputs=[loop_config_input, improvement_session],
            outputs=[improvement_session, session_status, round_indicator, revert_dropdown, history_display]
        )

        generate_loop_prompt_btn.click(
            fn=generate_improvement_prompt,
            inputs=[loop_config_input, loop_config_type, loop_focus_area, improvement_session],
            outputs=[loop_prompt_output]
        )

        accept_round_btn.click(
            fn=accept_round_result,
            inputs=[loop_response_input, improvement_session],
            outputs=[improvement_session, metrics_comparison, round_indicator, revert_dropdown, history_display, loop_response_input]
        )

        revert_btn.click(
            fn=revert_to_round,
            inputs=[revert_dropdown, improvement_session],
            outputs=[improvement_session, session_status, loop_config_input]
        )

        export_history_btn.click(
            fn=export_session_history,
            inputs=[improvement_session],
            outputs=[export_history_file]
        )

        # Load from main config input
        load_from_playtest_btn.click(
            fn=lambda x: x,
            inputs=[config_input],
            outputs=[loop_config_input]
        )

        # Load demo mystery config
        def load_demo_mystery():
            """Load the mystery demo config for the workflow tutorial."""
            return '''{
  "investigation": {
    "crime_scene": {
      "description": "A dimly lit office. Papers scattered everywhere. A broken window.",
      "choices": ["Examine the desk", "Check the window", "Leave"],
      "transitions": {
        "Examine the desk": "desk_clue",
        "Check the window": "window_clue",
        "Leave": "hallway"
      }
    },
    "desk_clue": {
      "description": "The desk has a half-written letter and an empty coffee cup.",
      "choices": ["Read the letter", "Back to scene"],
      "transitions": {
        "Read the letter": "letter_reveal",
        "Back to scene": "crime_scene"
      }
    },
    "window_clue": {
      "description": "Glass shards on the inside. Someone broke in from outside.",
      "choices": ["Look outside", "Back to scene"],
      "transitions": {
        "Look outside": "outside_view",
        "Back to scene": "crime_scene"
      }
    },
    "letter_reveal": {
      "description": "The letter mentions a meeting tonight. It's unfinished.",
      "choices": ["Continue investigating"],
      "transitions": {
        "Continue investigating": "crime_scene"
      }
    },
    "outside_view": {
      "description": "Footprints in the mud lead toward the parking lot.",
      "choices": ["Follow the footprints", "Back inside"],
      "transitions": {
        "Follow the footprints": "hallway",
        "Back inside": "crime_scene"
      }
    },
    "hallway": {
      "description": "The hallway is quiet. A security guard approaches.",
      "choices": ["Talk to guard", "Return to office"],
      "transitions": {
        "Talk to guard": "guard_talk",
        "Return to office": "crime_scene"
      }
    },
    "guard_talk": {
      "description": "The guard says he heard nothing unusual tonight.",
      "choices": ["Press for details", "Thank him and leave"],
      "transitions": {
        "Press for details": "guard_talk",
        "Thank him and leave": "hallway"
      }
    }
  }
}'''

        load_demo_btn.click(
            fn=load_demo_mystery,
            outputs=[loop_config_input]
        )

        # Load complete mystery demo (after 4 rounds of improvement)
        def load_complete_demo():
            """Load the completed mystery demo showing the end result after 4 rounds."""
            import os
            demo_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "demo_mystery_complete.json")
            try:
                with open(demo_path, 'r') as f:
                    return f.read()
            except:
                # Fallback to inline minimal version
                return '{"investigation": {"crime_scene": {"description": "Complete demo file not found. Please ensure demo_mystery_complete.json exists.", "choices": ["OK"], "transitions": {"OK": "crime_scene"}}}}'

        load_complete_demo_btn.click(
            fn=load_complete_demo,
            outputs=[loop_config_input]
        )

        # Load full demo session with pre-populated round history
        def load_demo_session():
            """Load a complete demo session showing 4 rounds of improvement."""
            import os
            from demo_llm_loop_examples import DEMO_MYSTERY_STARTER, DEMO_AFTER_ROUND_1

            # Load the complete demo
            demo_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "demo_mystery_complete.json")
            try:
                with open(demo_path, 'r') as f:
                    complete_config = f.read()
            except:
                complete_config = DEMO_AFTER_ROUND_1

            # Create simulated round history
            demo_session = {
                "rounds": [
                    {
                        "round_number": 1,
                        "focus_area": "environment",
                        "config_type": "mystery",
                        "metrics_before": {"total_states": 7, "avg_choices": 2.0, "avg_desc_words": 12},
                        "metrics_after": {"total_states": 18, "avg_choices": 2.8, "avg_desc_words": 45},
                        "prompt_summary": "Focus: Environmental storytelling - plant clues in descriptions, add sensory details"
                    },
                    {
                        "round_number": 2,
                        "focus_area": "characters",
                        "config_type": "mystery",
                        "metrics_before": {"total_states": 18, "avg_choices": 2.8, "avg_desc_words": 45},
                        "metrics_after": {"total_states": 22, "avg_choices": 3.1, "avg_desc_words": 52},
                        "prompt_summary": "Focus: Character depth - NPC motivations, dialogue, relationships"
                    },
                    {
                        "round_number": 3,
                        "focus_area": "choices",
                        "config_type": "mystery",
                        "metrics_before": {"total_states": 22, "avg_choices": 3.1, "avg_desc_words": 52},
                        "metrics_after": {"total_states": 25, "avg_choices": 3.6, "avg_desc_words": 55},
                        "prompt_summary": "Focus: Investigation options - multiple approaches, hidden paths"
                    },
                    {
                        "round_number": 4,
                        "focus_area": "tension",
                        "config_type": "mystery",
                        "metrics_before": {"total_states": 25, "avg_choices": 3.6, "avg_desc_words": 55},
                        "metrics_after": {"total_states": 29, "avg_choices": 3.8, "avg_desc_words": 62},
                        "prompt_summary": "Focus: Tension & stakes - red herrings, dramatic reveals, time pressure"
                    }
                ],
                "current_config": complete_config,
                "original_config": DEMO_MYSTERY_STARTER,
                "session_active": True,
                "current_round": 5
            }

            # Format history display
            history_lines = ["## Demo Session: Mystery Deepening Workflow\n"]
            history_lines.append("| Round | Focus | States | Choices | Desc Length |")
            history_lines.append("|-------|-------|--------|---------|-------------|")
            history_lines.append("| Start | - | 7 | 2.0 | 12 words |")

            for r in demo_session["rounds"]:
                m = r["metrics_after"]
                history_lines.append(f"| R{r['round_number']} | {r['focus_area']} | {m['total_states']} | {m['avg_choices']} | {m['avg_desc_words']} words |")

            history_lines.append("\n### Round Details\n")
            for r in demo_session["rounds"]:
                delta_states = r["metrics_after"]["total_states"] - r["metrics_before"]["total_states"]
                history_lines.append(f"**Round {r['round_number']}: {r['focus_area'].title()}**")
                history_lines.append(f"- {r['prompt_summary']}")
                history_lines.append(f"- Added {delta_states} new states\n")

            history_text = "\n".join(history_lines)

            # Revert dropdown options
            revert_options = [f"Round {r['round_number']}" for r in demo_session["rounds"]]

            return (
                demo_session,
                complete_config,
                "**Demo Session Loaded!** 4 rounds of Mystery Deepening workflow. See Round History below.",
                "**Current Round: 5** (Demo complete - review history or start fresh)",
                revert_options,
                history_text
            )

        load_demo_session_btn.click(
            fn=load_demo_session,
            outputs=[improvement_session, loop_config_input, session_status, round_indicator, revert_dropdown, history_display]
        )

        # Demo comparison state storage - now holds all 5 rounds
        demo_comparison_data = gr.State(value={"rounds": [{}, {}, {}, {}, {}], "all_states": []})

        # Load comparison data for all 5 rounds
        def load_demo_comparison_data():
            """Load all 5 demo rounds and prepare comparison data."""
            from demo_llm_loop_examples import (
                DEMO_MYSTERY_STARTER, DEMO_AFTER_ROUND_1,
                DEMO_AFTER_ROUND_2, DEMO_AFTER_ROUND_3, DEMO_AFTER_ROUND_4
            )

            # Parse all rounds
            try:
                rounds = [
                    json.loads(DEMO_MYSTERY_STARTER),
                    json.loads(DEMO_AFTER_ROUND_1),
                    json.loads(DEMO_AFTER_ROUND_2),
                    json.loads(DEMO_AFTER_ROUND_3),
                    json.loads(DEMO_AFTER_ROUND_4),
                ]
            except Exception as e:
                return (
                    {"rounds": [{}, {}, {}, {}, {}], "all_states": []},
                    f"Error loading demo configs: {e}",
                    gr.update(choices=[], value=None),
                    "*Error loading data*"
                )

            # Get investigation states from each round
            round_states = [r.get("investigation", {}) for r in rounds]

            # Collect all state names across all rounds
            all_state_names = set()
            for rs in round_states:
                all_state_names.update(rs.keys())

            # Find states in R0 (starter) for "original" classification
            r0_states = set(round_states[0].keys())
            r4_states = set(round_states[4].keys())

            # Classify states
            original_states = sorted(r0_states)  # States that existed from the start
            new_states = sorted(all_state_names - r0_states)  # States added in later rounds

            # Build choices - original first, then new
            state_choices = []
            for state in original_states:
                state_choices.append((f"📝 {state} (original)", state))
            for state in new_states:
                # Find which round it first appeared in
                first_round = "?"
                for i, rs in enumerate(round_states):
                    if state in rs:
                        first_round = f"R{i}"
                        break
                state_choices.append((f"✨ {state} (added {first_round})", state))

            # Calculate metrics for each round
            def calc_metrics(states_dict):
                if not states_dict:
                    return 0, 0, 0
                count = len(states_dict)
                total_choices = sum(len(s.get('choices', [])) for s in states_dict.values())
                total_words = sum(len(s.get('description', '').split()) for s in states_dict.values())
                return count, round(total_choices / count, 1) if count else 0, round(total_words / count, 0) if count else 0

            metrics = [calc_metrics(rs) for rs in round_states]

            metrics_md = f"""## Round-by-Round Metrics

| Round | Focus | States | Avg Choices | Avg Words |
|-------|-------|--------|-------------|-----------|
| **R0** | Starter | {metrics[0][0]} | {metrics[0][1]} | {int(metrics[0][2])} |
| **R1** | Environment | {metrics[1][0]} | {metrics[1][1]} | {int(metrics[1][2])} |
| **R2** | Characters | {metrics[2][0]} | {metrics[2][1]} | {int(metrics[2][2])} |
| **R3** | Choices | {metrics[3][0]} | {metrics[3][1]} | {int(metrics[3][2])} |
| **R4** | Tension | {metrics[4][0]} | {metrics[4][1]} | {int(metrics[4][2])} |

**Legend:** 📝 Original state | ✨ Added in later round

---
*Select a state below to see its progression across all 5 rounds.*
"""
            comparison_data = {
                "rounds": round_states,
                "all_states": sorted(all_state_names)
            }

            # Default to first original state for meaningful comparison
            default_state = original_states[0] if original_states else (list(all_state_names)[0] if all_state_names else None)

            return (
                comparison_data,
                metrics_md,
                gr.update(choices=state_choices, value=default_state),
                f"*{len(original_states)} original, {len(new_states)} added*"
            )

        def format_state_view(state_data):
            """Format a single state for display."""
            if not state_data:
                return "(not present)"
            text = f"{state_data.get('description', 'N/A')}\n\n"
            choices = state_data.get('choices', [])
            text += f"Choices ({len(choices)}):\n"
            for i, c in enumerate(choices, 1):
                text += f"  {i}. {c}\n"
            return text.strip()

        def show_state_comparison(state_name, comparison_data):
            """Show state across all 5 rounds."""
            if not state_name or not comparison_data or not comparison_data.get("rounds"):
                empty = "(select a state)"
                return empty, empty, empty, empty, empty, "*Select a state to compare*"

            rounds = comparison_data.get("rounds", [{}, {}, {}, {}, {}])

            # Get state from each round
            views = []
            word_counts = []
            choice_counts = []
            for i, rs in enumerate(rounds):
                state_data = rs.get(state_name)
                views.append(format_state_view(state_data))
                if state_data:
                    word_counts.append(len(state_data.get('description', '').split()))
                    choice_counts.append(len(state_data.get('choices', [])))
                else:
                    word_counts.append(0)
                    choice_counts.append(0)

            # Build progression summary
            first_appearance = next((i for i, rs in enumerate(rounds) if state_name in rs), None)
            if first_appearance is None:
                summary = f"**`{state_name}`** - State not found in any round"
            elif first_appearance == 0:
                summary = f"""**`{state_name}`** - Original state, evolved through all rounds

| Metric | R0 | R1 | R2 | R3 | R4 |
|--------|----|----|----|----|----|
| Words | {word_counts[0]} | {word_counts[1]} | {word_counts[2]} | {word_counts[3]} | {word_counts[4]} |
| Choices | {choice_counts[0]} | {choice_counts[1]} | {choice_counts[2]} | {choice_counts[3]} | {choice_counts[4]} |
"""
            else:
                summary = f"""**`{state_name}`** - Added in Round {first_appearance}

| Metric | R0 | R1 | R2 | R3 | R4 |
|--------|----|----|----|----|----|
| Words | {word_counts[0] or '-'} | {word_counts[1] or '-'} | {word_counts[2] or '-'} | {word_counts[3] or '-'} | {word_counts[4] or '-'} |
| Choices | {choice_counts[0] or '-'} | {choice_counts[1] or '-'} | {choice_counts[2] or '-'} | {choice_counts[3] or '-'} | {choice_counts[4] or '-'} |
"""

            return views[0], views[1], views[2], views[3], views[4], summary

        load_comparison_btn.click(
            fn=load_demo_comparison_data,
            outputs=[demo_comparison_data, demo_metrics_display, state_selector, state_status]
        )

        state_selector.change(
            fn=show_state_comparison,
            inputs=[state_selector, demo_comparison_data],
            outputs=[round0_view, round1_view, round2_view, round3_view, round4_view, state_changes_display]
        )

        # Workflow step buttons - auto-set focus area
        for btn, focus in [
            (wf_branch_1, "choices"), (wf_branch_2, "choices"), (wf_branch_3, "detail"), (wf_branch_4, "characters"),
            (wf_detail_1, "detail"), (wf_detail_2, "environment"), (wf_detail_3, "choices"), (wf_detail_4, "tension"),
            (wf_nonbranch_1, "detail"), (wf_nonbranch_2, "characters"), (wf_nonbranch_3, "environment"), (wf_nonbranch_4, "tension"),
            (wf_mystery_1, "environment"), (wf_mystery_2, "characters"), (wf_mystery_3, "choices"), (wf_mystery_4, "tension"),
        ]:
            btn.click(fn=lambda f=focus: f, outputs=[loop_focus_area])