Spaces:
Build error
Build error
| """Resources Hub Tab. | |
| This module provides links to resources, engine documentation, and tutorials. | |
| """ | |
| import gradio as gr | |
| # Import test media HTML from relatively_constant_variables | |
| try: | |
| from relatively_constant_variables import TestmedialoadinHTML | |
| except ImportError: | |
| TestmedialoadinHTML = '<div>Test media loading HTML not available</div>' | |
| def create_resources_hub_tab(): | |
| """Create the Resources Hub tab.""" | |
| with gr.Tab("Resources Hub"): | |
| gr.Markdown("## Consolidated Resources for Game Development") | |
| gr.Markdown("All documentation, tools, tutorials, and references organized by category.") | |
| with gr.Tab("Engine & Mechanics"): | |
| gr.HTML("There is an issue with the loading of the choices above 4 - only 3 load max it seems") | |
| gr.HTML("Placeholder for explanations of Player and Game Session") | |
| with gr.Tab("Endless Commerce support"): | |
| gr.HTML("Need to be able to support this type of code for upgrades - https://www.decisionproblem.com/paperclips/index2.html - https://www.reddit.com/r/incremental_games/comments/rc7ks7/the_unique_storytelling_of_universal_paperclips/ <br>https://huggingface.co/spaces/osanseviero/TheMLGame/blob/main/main.js") | |
| with gr.Tab("Current '1D Engine' Defects"): | |
| gr.HTML("To test the config idea I (with llm assistance) had to make an 'engine' that was based around the config - so there are many potholes ") | |
| gr.HTML("All realtime events - Text still needs realtime as well") | |
| with gr.Tab("Inventory and Skill Support"): | |
| gr.HTML("Each decision affects Skills or inventory") | |
| with gr.Tab("NPC Support"): | |
| gr.HTML("Shared timeline that the player interfere with") | |
| with gr.Tab("Economics Support"): | |
| gr.HTML("Style Idea for a Basic Idea - Endless Economy (Tiny Tower as well) - Paperclip maximiser and inspirations - https://huggingface.co/spaces/osanseviero/TheMLGame") | |
| with gr.Tab("Time Support"): | |
| gr.HTML("No urgency / patience mechanics") | |
| with gr.Tab("LLM play testing"): | |
| gr.Markdown("*Full LLM playtesting is now in the **Test** tab at root level.*") | |
| with gr.Tab("Real World & Robotics Controllers"): | |
| gr.Markdown("### Real World Game Extensions") | |
| gr.HTML("Side Quests can be to build with a real world 3D printer - eg. Map or speculating how uncharted territories are. Or Star wars view pinpointer (rey)") | |
| gr.HTML("3D printed trophies or game items as real items") | |
| gr.HTML("Smart Watch and Phone - Notifications to watch and eg. character messages, authorisation requests, in game internet - Cloud computing / SBC or controller project ideas") | |
| gr.HTML("Some Image to 3D options (As Position Refernece) - https://huggingface.co/spaces/FrozenBurning/3DTopia-XL") | |
| gr.HTML("Sites to use (Majority Free / Free Tier) - https://stephaneginier.com/sculptgl/ (Sculpting Editor No Login) | https://www.tinkercad.com/ (Editor) | https://app.vectary.com/ (Editor) | https://clara.io/ (Editor and huge models library) | Sketchfab (Model Library) | https://www.figuro.io/Home/Welcome (Editor and some tutorials) | Spline (Editor and Presentation) | https://www.selfcad.com/ (Editor)") | |
| gr.Markdown("### LLM/Robotics as Custom Controllers") | |
| gr.HTML("https://www.reddit.com/r/singularity/comments/1fm7fup/ihmc_and_boardwalk_robotics_show_their_humanoid/") | |
| gr.HTML("Controls changed the scope of the game eg. mouse vs keyboard vs console controller vs remote vs touch screen <br>LLM can be vision/surveilance based controler (eg. MGS/GTA camera gauged by an actual camera in real life) or it can be a companion (offline/off console game progrssion ideas)") | |
| gr.HTML("https://github.com/Shaka-Labs/ACT $250 imitation learning/teleoperation - eg. a win loss result alert / NPC 'scout' telling you go or stay") | |
| gr.HTML("https://huggingface.co/posts/thomwolf/809364796644704") | |
| gr.HTML("Robotics - https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/ https://huggingface.co/lerobot https://github.com/tonyzhaozh/aloha https://github.com/Shaka-Labs/ACT https://github.com/OpenTeleVision/TeleVision https://www.stereolabs.com/ ") | |
| gr.HTML("https://www.reddit.com/r/singularity/comments/1f88z58/the_first_ever_agent_civilization_1000_truly/") | |
| with gr.Tab("Existing Game Developemnt Resources"): | |
| gr.HTML("https://enginesdatabase.com/") | |
| gr.HTML("https://develop.games/#nav-tools-engine ") | |
| with gr.Tab("Other Considerations"): | |
| with gr.Tab("General"): | |
| gr.HTML("https://www.reddit.com/r/singularity/comments/1fiugew/wonderworld_a_novel_framework_for_interactive_3d/") | |
| gr.HTML("https://huggingface.co/docs/hub/api - daily papers is an endpoint so you can turn paper abstract into games with the help of LLM") | |
| gr.HTML("Experiment for https://huggingface.co/spaces/ysharma/open-interpreter/blob/main/app.py inplementation with gradio client api") | |
| gr.HTML("https://huggingface.co/spaces/HuggingFaceTB/SmolLM-135M-Instruct-WebGPU") | |
| gr.HTML("Useful Spaces and links: https://huggingface.co/spaces/artificialguybr/Stable-Audio-Open-Zero https://huggingface.co/spaces/stabilityai/TripoSR https://huggingface.co/spaces/wangfuyun/AnimateLCM-SVD https://huggingface.co/spaces/multimodalart/face-to-all https://huggingface.co/spaces/facebook/MusicGen https://huggingface.co/spaces/Doubiiu/tooncrafter") | |
| gr.HTML("langchain docs as awareness for alot of the integration use cases and providers that are possible - https://python.langchain.com/v0.2/docs/integrations/tools/") | |
| gr.HTML("https://huggingface.co/spaces/linoyts/scribble-sdxl-flash as map planner") | |
| gr.HTML("---------------------------------------Gameplay Ideas-------------------------------") | |
| gr.HTML("https://huggingface.co/spaces/Lin-Chen/ShareCaptioner-Video - game use example police questions a event with multiple eye witnesses needs to give as close to the caption description to win") | |
| with gr.Tab("State management through huggingface?"): | |
| gr.HTML("Huggingface as the login provider? - https://huggingface.co/spaces/Wauplin/gradio-user-history/tree/main https://huggingface.co/spaces/AP123/IllusionDiffusion https://huggingface.co/docs/hub/en/spaces-oauth https://huggingface.co/docs/hub/en/oauth, persistent storage - https://huggingface.co/docs/hub/en/spaces-storage") | |
| with gr.Tab("Finetuning options"): | |
| gr.HTML("https://docs.mistral.ai/guides/finetuning/ <br>https://openpipe.ai/blog/fine-tuning-best-practices-chapter-2-models") | |
| gr.HTML("Unsloth and Colab? - https://github.com/unslothai/unsloth https://huggingface.co/unsloth <br>Mistral Nemo Base - https://huggingface.co/unsloth/Mistral-Nemo-Base-2407 - https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing <br>Llama 3 8B https://huggingface.co/unsloth/llama-3-8b-Instruct-bnb-4bit") | |
| gr.HTML("Price - https://openpipe.ai/pricing") | |
| with gr.Tab("Backend and/or Hosting?"): | |
| gr.HTML("Deployemnt options - https://huggingface.co/SpacesExamples", "https://huggingface.co/templates") | |
| gr.HTML("Prototyping and freemium <br>free api <br>HF Pro subscription") | |
| gr.HTML("GPU (Data privacy) = No Rate limits? - https://replicate.com/pricing, https://lambdalabs.com/service/gpu-cloud https://huggingface.co/pricing#endpoints https://tensordock.com/cloud-gpus", "https://massedcompute.com/home/pricing/" ) | |
| gr.HTML("Speed - Groq, SambaNova, https://www.etched.com/announcing-etched ") | |
| gr.HTML("Price - Coding - https://aider.chat/docs/leaderboards/ - https://www.deepseek.com/ 0.3 per million - is this per token or chinese character as that means converting code to chinese if possible can save api cost?") | |
| gr.HTML("Llama 3.1 405B - https://ai.meta.com/blog/meta-llama-3-1/ https://replicate.com/meta/meta-llama-3.1-405b-instruct https://fireworks.ai/pricing https://www.ibm.com/products/watsonx-ai/foundation-models") | |
| gr.HTML("Covered by Anythingllm - https://github.com/Mintplex-Labs/anything-llm : https://repocloud.io/details/?app_id=276, https://render.com/pricing, https://docs.railway.app/reference/pricing/free-trial, https://repocloud.io/pricing, https://elest.io/pricing ") | |
| with gr.Tab("HF Spaces Build Options"): | |
| gr.Markdown("### HuggingFace Spaces Build Configuration") | |
| gr.Markdown("Options for configuring builds, handling timeouts, and managing heavy dependencies.") | |
| with gr.Accordion("SDK Options", open=True): | |
| gr.Markdown(""" | |
| **1. Gradio SDK** (Simple, default) | |
| ```yaml | |
| sdk: gradio | |
| python_version: 3.10 | |
| ``` | |
| - Uses `requirements.txt` for dependencies | |
| - Build timeout: ~30 min default | |
| - Simple but limited control | |
| **2. Docker SDK** (Full control) | |
| ```yaml | |
| sdk: docker | |
| app_port: 7860 | |
| ``` | |
| - Full `Dockerfile` control | |
| - Can use pre-built images | |
| - Better layer caching | |
| """) | |
| with gr.Accordion("Timeout & Build Settings", open=True): | |
| gr.Markdown(""" | |
| **Extend Startup Timeout** (in README.md YAML): | |
| ```yaml | |
| startup_duration_timeout: 1h | |
| ``` | |
| - Default: 30 minutes | |
| - Max: varies by plan | |
| **Build Variables**: | |
| ```yaml | |
| env: | |
| - MY_VAR=value | |
| ``` | |
| Passed as build-args to Docker. | |
| **Secrets**: Set in Settings tab, expose in Dockerfile with `RUN --mount=type=secret` | |
| """) | |
| with gr.Accordion("Heavy Dependencies (like dlib)", open=True): | |
| gr.Markdown(""" | |
| | Approach | Pros | Cons | | |
| |----------|------|------| | |
| | **Docker + pre-built wheel** | Fast build | Need to find/host wheel | | |
| | **Runtime install** | No build timeout | Slow first run | | |
| | **API fallback** | No deps needed | External service reliability | | |
| | **Duplicate Space** | Pre-built available | Maintenance overhead | | |
| **Example: Pre-built dlib in Dockerfile**: | |
| ```dockerfile | |
| FROM python:3.10 | |
| RUN pip install dlib --find-links https://example.com/dlib-wheel.whl | |
| ``` | |
| **Runtime Install Pattern**: | |
| ```python | |
| @spaces.GPU | |
| def my_function(): | |
| import subprocess | |
| subprocess.run(["pip", "install", "heavy-package"]) | |
| # Then use it... | |
| ``` | |
| """) | |
| with gr.Accordion("Useful Links", open=False): | |
| gr.HTML(""" | |
| <a href="https://huggingface.co/docs/hub/en/spaces-sdks-docker" target="_blank">Docker Spaces Docs</a><br> | |
| <a href="https://huggingface.co/docs/hub/en/spaces-config-reference" target="_blank">Spaces Config Reference</a><br> | |
| <a href="https://huggingface.co/docs/hub/en/spaces-overview" target="_blank">Spaces Overview</a><br> | |
| <a href="https://huggingface.co/SpacesExamples" target="_blank">Spaces Examples</a><br> | |
| <a href="https://huggingface.co/templates" target="_blank">Space Templates</a> | |
| """) | |
| with gr.Tab("Some Interesting Git Repos"): | |
| gr.HTML("https://github.com/NVIDIA/Megatron-LM https://github.com/OpenGVLab/EfficientQAT https://github.com/evintunador/minLlama3/blob/main/model.py https://github.com/evintunador/micro-GPT-sandbox") | |
| with gr.Tab("Old Ideas"): | |
| gr.HTML("""<div style="width: 100%; text-align: center">Main ideas for this space is (June 2024) (Custom component planning?):</div> | |
| <div style="display: flex; justify-content: center; margin-bottom: 20px; align-items: center;"> | |
| <div style="width: 20%; text-align: center">We can generate almost any media data and more </div> | |
| <div style="width: 20%; text-align: center">A program exist around data </div> | |
| <div style="width: 20%; text-align: center">Time moves in a straight so all considerations are flattend by the nature of time </div> | |
| <div style="width: 20%; text-align: center">llms good at short questions </div> | |
| <div style="width: 20%; text-align: center">HF + Gradio allows for api use so this my prototype tool for tool use test</div> | |
| </div>""") | |
| with gr.Tab("Licensing"): | |
| gr.HTML("Need to find the press release to see license eg. https://blackforestlabs.ai/announcing-black-forest-labs/") | |
| with gr.Tabs("Links to go over when free"): | |
| gr.HTML("https://www.reddit.com/r/singularity/comments/1ecuu8j/you_can_now_use_ai_for_3d_model_creation/ | ") | |
| with gr.Tab("Audio Resources"): | |
| gr.Markdown("### Audio Generation & Sound Design Resources") | |
| gr.Markdown("Resources for music, sound effects, and voice generation for games.") | |
| with gr.Accordion("AI Audio Generation", open=True): | |
| gr.HTML(""" | |
| <b>HuggingFace Spaces:</b><br> | |
| <a href="https://huggingface.co/spaces/artificialguybr/Stable-Audio-Open-Zero" target="_blank">Stable Audio</a> - Sound effects & ambient<br> | |
| <a href="https://huggingface.co/spaces/facebook/MusicGen" target="_blank">MusicGen</a> - Music generation<br> | |
| <a href="https://huggingface.co/spaces/hexgrad/Kokoro-TTS" target="_blank">Kokoro-82M</a> - Fast, natural voice synthesis<br> | |
| <a href="https://huggingface.co/spaces/Supertone/supertonic-2" target="_blank">Supertonic-2</a> - High-quality expressive TTS<br> | |
| <a href="https://huggingface.co/spaces/zai-org/GLM-TTS" target="_blank">GLM-TTS</a> - Multilingual TTS with voice cloning<br> | |
| <a href="https://huggingface.co/spaces/fishaudio/fish-speech-1" target="_blank">Fish Speech</a> - Voice cloning<br> | |
| <a href="https://huggingface.co/spaces/suno/bark" target="_blank">Bark</a> - Text to speech with emotion<br> | |
| <br><b>Talking Portraits:</b><br> | |
| <a href="https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait" target="_blank">Hallo</a> - Talking portrait animation<br> | |
| <a href="https://huggingface.co/spaces/KwaiVGI/LivePortrait" target="_blank">LivePortrait</a> - Portrait animation | |
| """) | |
| with gr.Accordion("3rd Party Audio Tools", open=False): | |
| gr.HTML(""" | |
| <b>Music Generation:</b><br> | |
| <a href="https://suno.com/" target="_blank">Suno</a> - AI music creation<br> | |
| <a href="https://www.udio.com/" target="_blank">Udio</a> - AI music generation<br> | |
| <br><b>Sound Libraries:</b><br> | |
| <a href="https://freesound.org/" target="_blank">Freesound</a> - Free sound effects<br> | |
| <a href="https://www.zapsplat.com/" target="_blank">ZapSplat</a> - Free SFX library<br> | |
| <a href="https://sonniss.com/gameaudiogdc" target="_blank">Sonniss GDC</a> - Game audio bundles | |
| """) | |
| with gr.Accordion("Audio Categories for Games", open=False): | |
| gr.Markdown(""" | |
| **Music Types:** | |
| - Background/Ambient - Sets mood, loops seamlessly | |
| - Interactive - Changes based on gameplay | |
| - Cutscene - Narrative-driven, linear | |
| - Menu - UI navigation, branded | |
| **Sound Effects:** | |
| - Environmental - Weather, nature, machinery | |
| - Character - Footsteps, voice, actions | |
| - Action - Combat, items, abilities | |
| - UI - Clicks, notifications, feedback | |
| **Speech:** | |
| - Dialogue - Character conversations | |
| - Narration - Story exposition | |
| - Voiceover - Instructions, tutorials | |
| """) | |
| with gr.Accordion("Music Generation Methods (Pre vs Post 2023)", open=True): | |
| gr.Markdown(""" | |
| **Comparison of AI Music Generation Methods:** | |
| | Method | Era | VRAM | Duration | Quality | Open Source | | |
| |--------|-----|------|----------|---------|-------------| | |
| | **Jukebox** | 2020 | 16GB+ | Slow (hours) | High | Yes | | |
| | **MusicVAE** | 2018 | 4-8GB | Fast | Low-Med | Yes | | |
| | **MuseNet** | 2019 | API only | Fast | Medium | No | | |
| | **Riffusion** | 2022 | 6-8GB | Fast | Medium | Yes | | |
| | **MusicGen** | 2023 | 8-16GB | Medium | High | Yes | | |
| | **Stable Audio** | 2023 | 8-12GB | Medium | High | Partial | | |
| | **AudioCraft** | 2023 | 8-16GB | Medium | High | Yes | | |
| | **MusicLM** | 2023 | API only | Fast | Very High | No | | |
| | **Suno v3** | 2024 | API only | Fast | Very High | No | | |
| | **Udio** | 2024 | API only | Fast | Very High | No | | |
| --- | |
| **Pre-2023 Methods (GPU Efficient, Lower Quality):** | |
| *Jukebox (OpenAI, 2020)* | |
| - VQ-VAE based, generates raw audio with vocals | |
| - Can continue/extend existing songs | |
| - ~16GB+ VRAM, extremely slow (hours per minute of audio) | |
| - Quality: Good but often "dreamy"/artifacts | |
| - Good for: Experimental, background ambience | |
| *MusicVAE (Magenta, 2018)* | |
| - MIDI-based, learns latent space of melodies | |
| - Fast, controllable interpolation between styles | |
| - ~4-8GB VRAM, real-time capable | |
| - Quality: Basic MIDI, needs good synths | |
| - Good for: Procedural game music, variations | |
| *Riffusion (2022)* | |
| - Fine-tuned Stable Diffusion on spectrograms | |
| - Novel approach: image diffusion for audio | |
| - ~6-8GB VRAM (same as SD 1.5) | |
| - Quality: Medium, 5-second clips | |
| - Good for: Quick sound effects, loops | |
| *MuseNet (OpenAI, 2019)* | |
| - Transformer-based MIDI generation | |
| - Multi-instrument, various styles | |
| - API only (no local) | |
| - Good for: Classical/jazz style compositions | |
| --- | |
| **Post-2023 Methods (Higher Quality, More Resources):** | |
| *MusicGen (Meta, 2023)* | |
| - Transformer + EnCodec neural codec | |
| - Text-to-music with melody conditioning | |
| - Small: 300M (~4GB), Medium: 1.5B (~8GB), Large: 3.3B (~16GB) | |
| - Quality: High, coherent structure | |
| - Good for: Game soundtracks, ambient music | |
| - **Runs on ZeroGPU with small/medium models** | |
| *Stable Audio (Stability AI, 2023)* | |
| - Latent diffusion for audio | |
| - Long-form generation (up to 90s) | |
| - ~8-12GB VRAM | |
| - Quality: High, especially for SFX | |
| - Good for: Sound effects, ambient, loops | |
| - **Runs on ZeroGPU** | |
| *AudioCraft/MusicGen-Stereo (Meta, 2023)* | |
| - Stereo output, better quality | |
| - Same architecture as MusicGen | |
| - ~10-16GB VRAM for stereo | |
| - Good for: Production-ready game audio | |
| *Suno v3/v4 (2024)* | |
| - Full songs with vocals and lyrics | |
| - Extremely high quality, human-like | |
| - API only, commercial service | |
| - Good for: Trailer music, title themes | |
| - Limitation: API costs, usage rights | |
| *Udio (2024)* | |
| - Competing with Suno, similar quality | |
| - Better at certain genres | |
| - API only, commercial service | |
| - Good for: Professional game soundtracks | |
| --- | |
| **Recommendations for ZeroGPU/HF Spaces:** | |
| | Use Case | Recommendation | VRAM | | |
| |----------|---------------|------| | |
| | Quick SFX | Stable Audio Open | ~8GB | | |
| | Background music | MusicGen Small | ~4GB | | |
| | Higher quality music | MusicGen Medium | ~8GB | | |
| | Production soundtracks | Suno/Udio API | N/A | | |
| | Procedural variations | MusicVAE | ~4GB | | |
| **Code Example for MusicGen on ZeroGPU:** | |
| ```python | |
| @spaces.GPU(duration=120) | |
| def generate_music(prompt, duration=10): | |
| from audiocraft.models import MusicGen | |
| model = MusicGen.get_pretrained('facebook/musicgen-small') | |
| model.set_generation_params(duration=duration) | |
| wav = model.generate([prompt]) | |
| return wav[0].cpu().numpy() | |
| ``` | |
| **HuggingFace Spaces for Music:** | |
| - [MusicGen](https://huggingface.co/spaces/facebook/MusicGen) | |
| - [Stable Audio](https://huggingface.co/spaces/artificialguybr/Stable-Audio-Open-Zero) | |
| - [AudioCraft](https://huggingface.co/spaces/facebook/audiocraft) | |
| - [Riffusion](https://huggingface.co/spaces/riffusion/riffusion-playground) | |
| """) | |
| with gr.Tab("GPU & Video Generation"): | |
| gr.Markdown("### ZeroGPU & Video Generation Reference") | |
| gr.Markdown("Technical specifications and methods for GPU-based generation on HuggingFace Spaces.") | |
| with gr.Accordion("ZeroGPU Specifications & Limitations", open=True): | |
| gr.Markdown(""" | |
| **Hardware:** NVIDIA H200 GPU with ~70GB VRAM (shared/virtualized) | |
| **Default Timeout:** 60 seconds per `@spaces.GPU` decorated function call | |
| **Extending Timeout:** | |
| ```python | |
| @spaces.GPU(duration=180) # 3 minutes max recommended for video | |
| def my_video_function(): | |
| ... | |
| ``` | |
| **Key Constraints:** | |
| | Resource | Limit | Notes | | |
| |----------|-------|-------| | |
| | VRAM | ~70GB shared | May vary based on concurrent users | | |
| | Default timeout | 60s | Extendable with duration parameter | | |
| | Max timeout | ~300s | Longer tasks may be killed | | |
| | Concurrent jobs | Limited | Queue system recommended | | |
| **Common "GPU Task Aborted" Causes:** | |
| 1. **Timeout exceeded** - Task took longer than duration limit | |
| 2. **CUDA operations outside @spaces.GPU** - All GPU code must be inside decorated functions | |
| 3. **Memory pressure** - Too much VRAM used, try CPU offload | |
| **Best Practices for ZeroGPU:** | |
| ```python | |
| # Enable CPU offload to reduce VRAM usage | |
| pipe.enable_model_cpu_offload() | |
| # Enable VAE tiling for large images/videos | |
| if hasattr(pipe, 'vae'): | |
| pipe.vae.enable_tiling() | |
| # Use lower settings for video generation | |
| # - Fewer steps (4-8 instead of 25+) | |
| # - Shorter duration (2-3 seconds) | |
| # - Lower resolution (512x320) | |
| ``` | |
| **Working Examples on ZeroGPU:** | |
| - GPT-OSS 20B: 512 tokens generation works | |
| - Falcon-H1R 7B: 1000 tokens generation works | |
| - Video models: Need low settings (4 steps, 2s, 512x320) | |
| """) | |
| with gr.Accordion("Talking Head Video Generation Methods", open=True): | |
| gr.Markdown(""" | |
| **Comparison of Audio-Driven Talking Head Methods (2024-2026):** | |
| | Method | VRAM | Speed | Quality | Best For | | |
| |--------|------|-------|---------|----------| | |
| | **Wav2Lip** | 4-6GB | Fast | Medium | Lip sync only, any face | | |
| | **FOMM** | 4-6GB | Fast | Medium | Simple motion transfer | | |
| | **SadTalker** | 6-8GB | Medium | Good | Full head motion + lip sync | | |
| | **LivePortrait** | 10-12GB | Medium | High | Expression transfer | | |
| | **Hallo** | 20-24GB+ | Slow | Very High | High quality portraits | | |
| | **EDTalk** | 8-12GB | Medium | High | Efficient, good balance | | |
| | **EMO** | 24GB+ | Slow | Very High | Emotional expressions | | |
| **Old Methods (GPU Efficient, Lower Quality):** | |
| *Wav2Lip (2020)* | |
| - Only modifies lip region, keeps rest of face static | |
| - Works with any face video/image | |
| - ~4-6GB VRAM, real-time capable | |
| - Good for: Quick lip sync, game NPCs | |
| *First Order Motion Model (FOMM, 2019)* | |
| - Transfers motion from driving video to source image | |
| - Simple architecture, fast inference | |
| - ~4-6GB VRAM | |
| - Good for: Simple animations, avatars | |
| *SadTalker (2023)* | |
| - Generates 3D motion coefficients from audio | |
| - Full head movement + lip sync | |
| - ~6-8GB VRAM, slower than Wav2Lip | |
| - Good for: Realistic NPCs with head motion | |
| **New Methods (High Quality, GPU Hungry):** | |
| *LivePortrait (2024)* | |
| - Stitching and retargeting for expression control | |
| - High quality output, controllable | |
| - ~10-12GB VRAM | |
| - Good for: High quality cutscenes | |
| *Hallo (2024)* | |
| - Hierarchical audio-driven synthesis | |
| - Best quality for portrait animation | |
| - ~20-24GB VRAM, slow | |
| - Good for: Hero characters, cinematics | |
| *EDTalk (2024)* | |
| - Efficient design, competitive quality | |
| - Better VRAM/quality tradeoff | |
| - ~8-12GB VRAM | |
| - Good for: Production use with limited GPU | |
| **Recommendations for ZeroGPU:** | |
| 1. **Wav2Lip or SadTalker** - If you need it to run on Spaces | |
| 2. **API-based Hallo** - Use existing HF Space via API for quality | |
| 3. **LivePortrait** - Good balance if you have 12GB available | |
| 4. **EDTalk** - Best new method that might fit ZeroGPU constraints | |
| **HuggingFace Spaces for Talking Heads:** | |
| - [Hallo](https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait) | |
| - [LivePortrait](https://huggingface.co/spaces/KwaiVGI/LivePortrait) | |
| - [SadTalker](https://huggingface.co/spaces/vinthony/SadTalker) | |
| """) | |
| with gr.Accordion("Video Generation Models on ZeroGPU", open=False): | |
| gr.Markdown(""" | |
| **Tested Video Models & Settings:** | |
| | Model | Type | Recommended Settings | Notes | | |
| |-------|------|---------------------|-------| | |
| | LTX-Video-0.9.7-distilled | T2V | 4 steps, 2s, 512x320 | Fast, works well | | |
| | HunyuanVideo | T2V | 4 steps, 2s, 512x320 | Needs CPU offload | | |
| | Wan2.2-T2V-1.3B | T2V | 4 steps, 2s, 480x320 | Smaller model | | |
| | Wan2.2-I2V-14B | I2V | 4 steps, 2s | Use with CPU offload | | |
| **Pipeline Classes (diffusers):** | |
| ```python | |
| from diffusers import ( | |
| LTXPipeline, # NOT LTXConditionPipeline | |
| HunyuanVideoPipeline, | |
| WanPipeline, # For T2V | |
| WanImageToVideoPipeline # For I2V | |
| ) | |
| ``` | |
| **Model IDs:** | |
| - LTX: `Lightricks/LTX-Video-0.9.7-distilled` | |
| - Hunyuan: `hunyuanvideo-community/HunyuanVideo` | |
| - Wan T2V: `Wan-AI/Wan2.2-T2V-1.3B-Diffusers` | |
| - Wan I2V: `Wan-AI/Wan2.2-I2V-14B-480P-Diffusers` | |
| """) | |
| with gr.Tab("Asset loading test"): | |
| gr.HTML("SDXL (linoyts/scribble-sdxl-flash), SVD and Stable Audio used for the test assets (For commercial use need a licence) <br>testmedia/") | |
| with gr.Row(): | |
| gr.Image(value="testmedia/Flash scribble SDXL - random squiggles as roads.webp") | |
| gr.Video(value="testmedia/SVD - random squiggles as roads video 004484.mp4") | |
| gr.Audio(value="testmedia/Stable Audio - Raindrops, output.wav") | |
| gr.HTML(TestmedialoadinHTML) # imported from relatively_constant_variables | |