File size: 16,386 Bytes
28cc9a7
 
 
d861cf8
28cc9a7
bb38035
28cc9a7
1b5d55e
28cc9a7
 
 
dcb2d8b
28cc9a7
5260f7d
28cc9a7
 
 
5260f7d
28cc9a7
 
fc8839e
28cc9a7
 
 
44d2e01
28cc9a7
44d2e01
28cc9a7
 
44d2e01
 
28cc9a7
44d2e01
28cc9a7
44d2e01
28cc9a7
 
1b5d55e
dcb2d8b
c5a017f
28cc9a7
1b5d55e
28cc9a7
 
 
 
 
0b36562
 
28cc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b36562
44d2e01
 
28cc9a7
 
 
0b36562
28cc9a7
 
 
 
 
 
 
 
 
 
44d2e01
28cc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
44d2e01
28cc9a7
 
 
 
 
 
 
 
44d2e01
 
28cc9a7
 
 
44d2e01
1b5d55e
28cc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b36562
1b5d55e
 
 
 
 
 
 
 
 
44d2e01
fc8839e
1b5d55e
 
ea2bb14
28cc9a7
 
44d2e01
28cc9a7
 
 
 
1b5d55e
 
 
 
 
 
44d2e01
1b5d55e
 
 
28cc9a7
1b5d55e
28cc9a7
1b5d55e
0b36562
28cc9a7
 
 
 
44d2e01
28cc9a7
 
 
 
 
 
1b5d55e
28cc9a7
8e13fb4
28cc9a7
 
 
1b5d55e
28cc9a7
 
1b5d55e
 
44d2e01
1b5d55e
28cc9a7
 
 
44d2e01
28cc9a7
 
 
 
 
44d2e01
 
28cc9a7
1b5d55e
44d2e01
28cc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
1b5d55e
44d2e01
28cc9a7
44d2e01
28cc9a7
0b36562
28cc9a7
44d2e01
 
28cc9a7
44d2e01
0b36562
 
 
28cc9a7
 
 
0b36562
28cc9a7
 
 
0b36562
 
28cc9a7
 
1b5d55e
 
28cc9a7
 
657a63a
28cc9a7
 
 
 
 
febc658
 
28cc9a7
 
 
 
febc658
28cc9a7
bb38035
 
28cc9a7
 
 
1b5d55e
28cc9a7
1b5d55e
 
28cc9a7
 
 
1b5d55e
28cc9a7
1b5d55e
 
28cc9a7
1b5d55e
ea2bb14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
##############################################################################
#                                app.py                                      #
##############################################################################
model_repo_id = "Freepik/F-Lite-Texture"
model_name    = "F Lite Texture"

from dotenv   import load_dotenv
import gradio as gr
import numpy   as np
import random, os, logging, google.generativeai as genai, spaces, torch

from f_lite import FLitePipeline
from f_lite.pipeline import APGConfig

# ────────────────────────────────────────────────────────────────────────────
# diffusers helper (ν•„μˆ˜ 트릭 – DiT 클래슀λ₯Ό diffusers둜 μΈμ‹μ‹œν‚€κΈ° μœ„ν•¨)
# ────────────────────────────────────────────────────────────────────────────
from diffusers.pipelines.pipeline_loading_utils import LOADABLE_CLASSES, ALL_IMPORTABLE_CLASSES
LOADABLE_CLASSES["f_lite"]            = LOADABLE_CLASSES["f_lite.model"] = {"DiT": ["save_pretrained", "from_pretrained"]}
ALL_IMPORTABLE_CLASSES["DiT"]         = ["save_pretrained", "from_pretrained"]

# ────────────────────────────────────────────────────────────────────────────
# ν™˜κ²½ μ„€μ • / λͺ¨λΈ λ‘œλ“œ
# ────────────────────────────────────────────────────────────────────────────
load_dotenv()
logging.basicConfig(level=logging.INFO)

# Gemini API μ€€λΉ„ (μžˆμ„ λ•Œλ§Œ μ‚¬μš©)
gemini_available = False
if os.getenv("GEMINI_API_KEY"):
    genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
    gemini_available = True
else:
    logging.warning("GEMINI_API_KEY not found – prompt enrichment disabled.")

device      = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

pipe = FLitePipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
pipe.to(device)
pipe.vae.enable_slicing(); pipe.vae.enable_tiling()

# ────────────────────────────────────────────────────────────────────────────
# κΈ°λ³Έ κ°’ μ„€μ •
# ────────────────────────────────────────────────────────────────────────────
MAX_SEED        = np.iinfo(np.int32).max
MAX_IMAGE_SIZE  = 1600

RESOLUTIONS = {
    "horizontal": [
        {"width": 1344, "height": 896, "label": "1344Γ—896"},
        {"width": 1152, "height": 768, "label": "1152Γ—768"},
        {"width": 960 , "height": 640, "label": "960Γ—640"},
        {"width": 1600, "height": 896, "label": "1600Γ—896"}
    ],
    "vertical": [
        {"width": 896 , "height": 1344, "label": "896Γ—1344"},
        {"width": 768 , "height": 1152, "label": "768Γ—1152"},
        {"width": 640 , "height": 960 , "label": "640Γ—960"},
        {"width": 896 , "height": 1600, "label": "896Γ—1600"}
    ],
    "square": [
        {"width": 1216, "height": 1216, "label": "1216Γ—1216"},
        {"width": 1024, "height": 1024, "label": "1024Γ—1024"}
    ]
}
DEFAULT_RESOLUTION = {"width": 1024, "height": 1024, "label": "1024Γ—1024"}

# ────────────────────────────────────────────────────────────────────────────
# 해상도 λ“œλ‘­λ‹€μš΄ μ˜΅μ…˜ 생성
# ────────────────────────────────────────────────────────────────────────────
resolution_options = []
for cat, res_list in RESOLUTIONS.items():
    resolution_options.append([f"{cat.capitalize()}", None])
    for r in res_list:
        resolution_options.append([f"  {r['label']}", f"{cat}:{r['width']}:{r['height']}"])

# ────────────────────────────────────────────────────────────────────────────
# Prompt enrichment (Gemini)
# ────────────────────────────────────────────────────────────────────────────
def enrich_prompt_with_gemini(prompt: str, max_tokens: int = 1024):
    """Gemini-based prompt expansion (μ—λŸ¬ μ‹œ 원본 μœ μ§€)."""
    try:
        if not gemini_available:
            return None, "Gemini unavailable."
        model = genai.GenerativeModel("gemini-1.5-flash")
        ask   = (
            "You are an exceptional prompt enhancer for text-to-image generation.\n"
            "Rewrite the following prompt so it becomes richly detailed, cinematic, and vivid.\n"
            "Return ONE descriptive paragraph only.\n\n"
            f"Original prompt: {prompt}\n\nEnhanced prompt:"
        )
        out = model.generate_content(
            ask,
            generation_config={"max_output_tokens": max_tokens, "temperature": 1},
        )
        return out.text.strip(), None
    except Exception as e:
        logging.error(f"Gemini error: {e}")
        return None, f"Gemini error: {e}"

# ────────────────────────────────────────────────────────────────────────────
# 해상도 μ—…λ°μ΄νŠΈ
# ────────────────────────────────────────────────────────────────────────────
def update_resolution(sel: str):
    if not sel:
        return DEFAULT_RESOLUTION["width"], DEFAULT_RESOLUTION["height"]
    try:
        _, w, h = sel.split(":")
        return int(w), int(h)
    except ValueError:
        return DEFAULT_RESOLUTION["width"], DEFAULT_RESOLUTION["height"]

# ────────────────────────────────────────────────────────────────────────────
# μ˜ˆμ‹œ ν”„λ‘¬ν”„νŠΈ 10개
# ────────────────────────────────────────────────────────────────────────────
examples = [
    ["An ultra-detailed macro photograph of a dew-covered rainbow beetle perched on a spiralling fern unfurling at dawn, back-lit by golden sunrise, bokeh background, 200-mm lens, f/2.8, vivid colors, cinematic lighting", None],
    ["A retro-futuristic cityscape at night inspired by Syd Mead: neon-drenched streets reflect glistening rain, flying cars leave light-trail arcs between towering holographic billboards, 35 mm film grain, wide-angle perspective", None],
    ["An elegant 18th-century ballroom rendered in photorealistic 8K, crystal chandeliers scattering prismatic light across polished marble floors, dancers in flowing silk gowns twirl mid-motion, captured like a long-exposure still", None],
    ["A serene Japanese onsen nestled in a snowy mountain valley, steam rising into crisp twilight air, red lanterns glowing softly, snow-laden pines framing the scene, shot on medium-format analog with natural film tones", None],
    ["Hyper-real illustration of an astronaut in a translucent spacesuit tending a floating bonsai inside a zero-g greenhouse aboard an orbital station, earthrise through panoramic windows, bioluminescent plants provide teal ambience", None],
    ["A majestic white Arabian horse galloping across a mirror-like lake at sunset, droplets frozen mid-air, warm rim light outlining powerful muscles, captured at 1/4000 s with sweeping motion-blur background", None],
    ["An ancient library hollowed inside a colossal redwood tree, spiral root staircases, glowing fireflies as lamps, shafts of emerald light pierce stained-leaf windows, ultra-detailed fantasy matte painting", None],
    ["A haute-couture portrait: model in a gown composed of iridescent butterfly wings, dramatic chiaroscuro, deep-blue velvet backdrop, shot on Hasselblad with razor-sharp eye focus, 120 MP clarity", None],
    ["Cyberpunk samurai duo beneath torrential neon rain, reflective katanas crackling with violet energy, holographic kanji drifting, cinematic anamorphic lens flares, gritty atmosphere", None],
    ["A whimsical steampunk airship festival above a Victorian harbor: brass dirigibles with floral patterns, cog-shaped fireworks burst at golden hour, painterly style reminiscent of Miyazaki", None],
]

# 첫 μ˜ˆμ‹œ ν”„λ‘¬ν”„νŠΈλ₯Ό κΈ°λ³Έκ°’μœΌλ‘œ μ‚¬μš©
DEFAULT_PROMPT = examples[0][0]

# ────────────────────────────────────────────────────────────────────────────
# μΆ”λ‘  ν•¨μˆ˜
# ────────────────────────────────────────────────────────────────────────────
@spaces.GPU(duration=120)
def infer(
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,
    num_inference_steps,
    use_prompt_enrichment,
    enable_apg,
    progress=gr.Progress(track_tqdm=True),
):
    generation_prompt = prompt
    enriched_prompt, enrich_err = None, None

    if use_prompt_enrichment and gemini_available:
        enriched_prompt, enrich_err = enrich_prompt_with_gemini(prompt)
        if enriched_prompt:
            generation_prompt = enriched_prompt

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator().manual_seed(seed)

    image = pipe(
        prompt=generation_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width, height=height,
        generator=generator,
        apg_config=APGConfig(enabled=enable_apg),
    ).images[0]

    # UI μ—…λ°μ΄νŠΈ μ œμ–΄
    show_acc   = gr.update(visible=False)
    show_text  = gr.update(value="")
    show_error = gr.update(visible=False, value="")

    if enriched_prompt:
        show_acc  = gr.update(visible=True)
        show_text = gr.update(value=enriched_prompt)
    elif enrich_err:
        show_acc   = gr.update(visible=True)
        show_error = gr.update(visible=True, value=enrich_err)

    return image, seed, show_acc, show_text, show_error

# ────────────────────────────────────────────────────────────────────────────
# Gradio UI
# ────────────────────────────────────────────────────────────────────────────
css = """
#col-container {margin:0 auto; max-width:1024px;}
.prompt-row > .gr-form{gap:0.5rem !important; align-items:center;}
"""

with gr.Blocks(css=css, theme="ParityError/Interstellar") as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"# {model_name} Text-to-Image Demo")

        # ── μž…λ ₯ 파트 ──
        with gr.Row(elem_classes="prompt-row"):
            prompt_box = gr.Text(
                value=DEFAULT_PROMPT,              # κΈ°λ³Έ ν”„λ‘¬ν”„νŠΈ
                label="Prompt", show_label=False,
                max_lines=1, placeholder="Enter your prompt",
                container=False, scale=6,
            )
            use_prompt_enrichment = gr.Checkbox(
                label="Enrich", value=True, visible=False  # μˆ¨κΉ€ + κΈ°λ³Έ True
            )
            run_button = gr.Button("Run", scale=1, variant="primary", min_width=100)

        # ── κ²°κ³Ό 이미지 ── (μ΄ˆκΈ°κ°’: image1.webp)
        result_img = gr.Image(
            value="image1.webp",                 # 같은 경둜 이미지 ν‘œμ‹œ
            label="Result", show_label=False
        )

        # Enriched prompt ν‘œμ‹œμš© μ•„μ½”λ””μ–Έ
        enrich_acc = gr.Accordion("Enriched Prompt", open=False, visible=False)
        with enrich_acc:
            enrich_txt   = gr.Textbox(label="Enriched Prompt", interactive=False, lines=8)
            enrich_error = gr.Textbox(label="Error", visible=False, interactive=False)

        # ── κ³ κΈ‰ μ„€μ • ──
        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt"
            )
            with gr.Tabs():
                with gr.TabItem("Preset Resolutions"):
                    resolution_dd = gr.Dropdown(
                        label="Resolution",
                        choices=resolution_options,
                        value="horizontal:1600:896",   # κ°€μž₯ 큰 preset
                        type="value"
                    )
                with gr.TabItem("Custom Resolution"):
                    with gr.Row():
                        width_sl = gr.Slider(
                            label="Width", minimum=256, maximum=MAX_IMAGE_SIZE,
                            step=32, value=1600
                        )
                        height_sl = gr.Slider(
                            label="Height", minimum=256, maximum=MAX_IMAGE_SIZE,
                            step=32, value=896
                        )

            seed_sl = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
            random_seed = gr.Checkbox(label="Randomize seed", value=False)

            with gr.Row():
                guidance_sl = gr.Slider(label="Guidance scale", minimum=0, maximum=15, step=0.1, value=6)
                enable_apg  = gr.Checkbox(label="Enable APG", value=True)
                steps_sl    = gr.Slider(label="Number of inference steps", minimum=1, maximum=50, step=1, value=30)

        # ── 예제 ν”„λ‘¬ν”„νŠΈ λͺ©λ‘ ──
        def set_example(example, _):
            return example, gr.update(value=False)  # enrichment 끄기
        max_len = 180
        gr.Examples(
            examples=examples,
            inputs=[prompt_box, use_prompt_enrichment],
            outputs=[prompt_box, use_prompt_enrichment],
            fn=set_example,
            example_labels=[ex[0][:max_len] + "..." if len(ex[0]) > max_len else ex[0] for ex in examples]
        )

        gr.Markdown(f"[{model_name} Model Card and Weights](https://huggingface.co/{model_repo_id})")

    # ── μƒν˜Έμž‘μš© μ—°κ²° ──
    resolution_dd.change(fn=update_resolution, inputs=resolution_dd, outputs=[width_sl, height_sl])

    gr.on(
        triggers=[run_button.click, prompt_box.submit],
        fn=infer,
        inputs=[
            prompt_box, negative_prompt, seed_sl, random_seed,
            width_sl, height_sl, guidance_sl, steps_sl,
            use_prompt_enrichment, enable_apg
        ],
        outputs=[result_img, seed_sl, enrich_acc, enrich_txt, enrich_error],
    )

# ────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.launch()