from pathlib import Path from typing import Any, cast import gradio as gr import spaces import torch from finegrain_toolbox.flux import Model from finegrain_toolbox.flux.prompt import prompt_with_embeds from finegrain_toolbox.processors import product_placement from gradio_image_annotation import image_annotator from huggingface_hub import hf_hub_download from PIL import Image from safetensors.torch import load_file # initialize on CPU then move to GPU (Zero GPU) DEVICE_CPU = torch.device("cpu") DTYPE = torch.bfloat16 model = Model.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", device=DEVICE_CPU, dtype=DTYPE) lora_path = Path( hf_hub_download( repo_id="finegrain/finegrain-product-placement-lora", filename="finegrain-placement-v1-rank8.safetensors", ) ) prompt_path = Path( hf_hub_download( repo_id="finegrain/finegrain-product-placement-lora", filename="addinbox-prompt.safetensors", ) ) prompt_st = load_file(prompt_path, device="cpu") prompt = prompt_with_embeds( text="Add this in the box", clip_prompt_embeds=prompt_st["clip"], t5_prompt_embeds=prompt_st["t5"], ) model.transformer.load_lora_adapter(lora_path, adapter_name="placement") model.transformer.fuse_lora() model.transformer.unload_lora() DEVICE = torch.device("cuda") model = model.to(device=DEVICE, dtype=DTYPE) prompt = prompt.to(device=DEVICE, dtype=DTYPE) def on_change(scene: dict[str, Any] | None, reference: Image.Image | None) -> tuple[dict[str, Any], str]: bbox_str = "" if scene is not None and isinstance(scene.get("boxes"), list) and len(scene.get("boxes", [])) == 1: assert scene is not None box = scene["boxes"][0] bbox_str = f"({box['xmin']}, {box['ymin']}, {box['xmax']}, {box['ymax']})" return (gr.update(interactive=reference is not None and bbox_str != ""), bbox_str) @spaces.GPU(duration=120) def _process( scene: dict[str, Any], reference: Image.Image, seed: int = 1234, ) -> tuple[tuple[Image.Image, Image.Image], Image.Image, Image.Image]: assert isinstance(scene_image := scene["image"], Image.Image) assert isinstance(boxes := scene["boxes"], list) assert len(boxes) == 1 assert isinstance(box := boxes[0], dict) bbox = tuple(box[k] for k in ["xmin", "ymin", "xmax", "ymax"]) result = product_placement.process( model=model, scene=scene_image, reference=reference, bbox=bbox, prompt=prompt, seed=seed, max_short_size=1024, max_long_size=2048, ) output = result.output before_after = (scene_image.resize(output.size), output) return (before_after, result.reference, result.scene) def process( scene: dict[str, Any], reference: Image.Image, seed: int = 1234, ) -> tuple[tuple[Image.Image, Image.Image], Image.Image, Image.Image]: assert reference.mode == "RGBA" extrema = cast(tuple[tuple[int, int], ...], reference.getextrema()) if extrema[reference.mode.index("A")][0] == 255: raise gr.Error("The reference must be a cutout.", duration=5) return _process(scene, reference, seed) TITLE = """ # Finegrain Product Placement LoRA ๐Ÿงช An experiment to extend Flux Kontext with product placement capabilities. The LoRA was trained using EditNet, our before / after image editing dataset. Just draw a box to set where the subject should be blended, and at what size. *The reference must be a cutout, i.e. have a transparent background.* If you do not have a cutout available you can create one [here](https://huggingface.co/spaces/finegrain/finegrain-object-cutter). [Model Card](https://huggingface.co/finegrain/finegrain-product-placement-lora) | [Blog Post](https://blog.finegrain.ai/posts/product-placement-flux-lora-experiment/) | [EditNet](https://finegrain.ai/editnet) ๐ŸŒŸ If you like this Space, follow [Finegrain](https://huggingface.co/finegrain) on Hugging Face for more cool free tools! """ with gr.Blocks() as demo: gr.Markdown(TITLE) with gr.Row(): with gr.Column(): scene = image_annotator( label="Scene", image_type="pil", disable_edit_boxes=True, show_download_button=False, show_share_button=False, single_box=True, image_mode="RGB", ) reference = gr.Image( label="Product Reference", visible=True, interactive=True, type="pil", image_mode="RGBA", ) with gr.Accordion("Options", open=False): seed = gr.Slider( minimum=0, maximum=10_000, value=1234, step=1, label="Seed", ) with gr.Row(): run_btn = gr.ClearButton(value="Blend", interactive=False) with gr.Column(): output_image = gr.ImageSlider(label="Output Image", show_fullscreen_button=False) with gr.Accordion("Debug", open=False): output_textbox = gr.Textbox(label="Bounding Box", interactive=False) output_reference = gr.Image( label="Reference", visible=True, interactive=False, type="pil", image_mode="RGB", ) output_scene = gr.Image( label="Scene", visible=True, interactive=False, type="pil", image_mode="RGB", ) run_btn.add(output_image) # Watch for changes (scene and reference) # i.e. the user must select a box in the scene and upload a reference image scene.change(fn=on_change, inputs=[scene, reference], outputs=[run_btn, output_textbox]) reference.change(fn=on_change, inputs=[scene, reference], outputs=[run_btn, output_textbox]) run_btn.click( fn=process, inputs=[scene, reference, seed], outputs=[output_image, output_reference, output_scene], ) examples = [ [ { "image": "examples/sunglasses/scene.jpg", "boxes": [{"xmin": 164, "ymin": 89, "xmax": 379, "ymax": 204}], }, "examples/sunglasses/reference.webp", ], [ { "image": "examples/kitchen/scene.webp", "boxes": [{"xmin": 165, "ymin": 765, "xmax": 332, "ymax": 883}], }, "examples/kitchen/reference.webp", ], [ { "image": "examples/glass/scene.webp", "boxes": [{"xmin": 389, "ymin": 509, "xmax": 611, "ymax": 1088}], }, "examples/glass/reference.webp", ], [ { "image": "examples/chair/scene.webp", "boxes": [{"xmin": 366, "ymin": 389, "xmax": 623, "ymax": 728}], }, "examples/chair/reference.webp", ], [ { "image": "examples/lantern/scene.webp", "boxes": [{"xmin": 497, "ymin": 690, "xmax": 618, "ymax": 873}], }, "examples/lantern/reference.webp", ], ] ex = gr.Examples( examples=examples, inputs=[scene, reference], outputs=[output_image, output_reference, output_scene], fn=process, cache_examples=True, cache_mode="eager", ) demo.launch(show_api=False, ssr_mode=False)