Spaces:
Running
on
Zero
Running
on
Zero
Add Adaptive Strength Diffusion
Browse files- app.py +2 -11
- pipeline_objectclear.py +13 -2
app.py
CHANGED
|
@@ -186,7 +186,7 @@ pipe = ObjectClearPipeline.from_pretrained_with_custom_modules(
|
|
| 186 |
pipe.to(device)
|
| 187 |
|
| 188 |
@spaces.GPU
|
| 189 |
-
def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
|
| 190 |
):
|
| 191 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 192 |
image_np = image_state["origin_image"]
|
|
@@ -219,7 +219,6 @@ def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed,
|
|
| 219 |
mask_image=mask,
|
| 220 |
generator=generator,
|
| 221 |
num_inference_steps=num_inference_steps,
|
| 222 |
-
strength=strength,
|
| 223 |
guidance_scale=guidance_scale,
|
| 224 |
height=h,
|
| 225 |
width=w,
|
|
@@ -432,13 +431,6 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 432 |
)
|
| 433 |
|
| 434 |
with gr.Accordion('ObjectClear Settings', open=True):
|
| 435 |
-
strength = gr.Radio(
|
| 436 |
-
choices=[0.99, 1.0],
|
| 437 |
-
value=0.99,
|
| 438 |
-
label="Strength",
|
| 439 |
-
info="0.99 better preserves the background and color; use 1.0 if object/shadow is not fully removed (default: 0.99)"
|
| 440 |
-
)
|
| 441 |
-
|
| 442 |
guidance_scale = gr.Slider(
|
| 443 |
minimum=1, maximum=10, step=0.5, value=2.5,
|
| 444 |
label="Guidance Scale",
|
|
@@ -517,8 +509,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 517 |
mask_dropdown,
|
| 518 |
guidance_scale,
|
| 519 |
seed,
|
| 520 |
-
num_inference_steps
|
| 521 |
-
strength
|
| 522 |
],
|
| 523 |
outputs=[
|
| 524 |
output_image_component, output_compare_image_component
|
|
|
|
| 186 |
pipe.to(device)
|
| 187 |
|
| 188 |
@spaces.GPU
|
| 189 |
+
def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
|
| 190 |
):
|
| 191 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 192 |
image_np = image_state["origin_image"]
|
|
|
|
| 219 |
mask_image=mask,
|
| 220 |
generator=generator,
|
| 221 |
num_inference_steps=num_inference_steps,
|
|
|
|
| 222 |
guidance_scale=guidance_scale,
|
| 223 |
height=h,
|
| 224 |
width=w,
|
|
|
|
| 431 |
)
|
| 432 |
|
| 433 |
with gr.Accordion('ObjectClear Settings', open=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
guidance_scale = gr.Slider(
|
| 435 |
minimum=1, maximum=10, step=0.5, value=2.5,
|
| 436 |
label="Guidance Scale",
|
|
|
|
| 509 |
mask_dropdown,
|
| 510 |
guidance_scale,
|
| 511 |
seed,
|
| 512 |
+
num_inference_steps
|
|
|
|
| 513 |
],
|
| 514 |
outputs=[
|
| 515 |
output_image_component, output_compare_image_component
|
pipeline_objectclear.py
CHANGED
|
@@ -1352,7 +1352,7 @@ class ObjectClearPipeline(
|
|
| 1352 |
height: Optional[int] = None,
|
| 1353 |
width: Optional[int] = None,
|
| 1354 |
padding_mask_crop: Optional[int] = None,
|
| 1355 |
-
strength: float = 0
|
| 1356 |
num_inference_steps: int = 50,
|
| 1357 |
timesteps: List[int] = None,
|
| 1358 |
sigmas: List[float] = None,
|
|
@@ -1426,7 +1426,7 @@ class ObjectClearPipeline(
|
|
| 1426 |
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
| 1427 |
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
| 1428 |
the image is large and contain information irrelevant for inpainting, such as background.
|
| 1429 |
-
strength (`float`, *optional*, defaults to 0
|
| 1430 |
Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
|
| 1431 |
between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
|
| 1432 |
`strength`. The number of denoising steps depends on the amount of noise initially added. When
|
|
@@ -1914,6 +1914,17 @@ class ObjectClearPipeline(
|
|
| 1914 |
# progressive attention mask blending
|
| 1915 |
fuse_index = 5
|
| 1916 |
if self.config.apply_attention_guided_fusion:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1917 |
if i == len(timesteps) - 1:
|
| 1918 |
attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
|
| 1919 |
attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)
|
|
|
|
| 1352 |
height: Optional[int] = None,
|
| 1353 |
width: Optional[int] = None,
|
| 1354 |
padding_mask_crop: Optional[int] = None,
|
| 1355 |
+
strength: float = 1.0
|
| 1356 |
num_inference_steps: int = 50,
|
| 1357 |
timesteps: List[int] = None,
|
| 1358 |
sigmas: List[float] = None,
|
|
|
|
| 1426 |
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
| 1427 |
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
| 1428 |
the image is large and contain information irrelevant for inpainting, such as background.
|
| 1429 |
+
strength (`float`, *optional*, defaults to 1.0):
|
| 1430 |
Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
|
| 1431 |
between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
|
| 1432 |
`strength`. The number of denoising steps depends on the amount of noise initially added. When
|
|
|
|
| 1914 |
# progressive attention mask blending
|
| 1915 |
fuse_index = 5
|
| 1916 |
if self.config.apply_attention_guided_fusion:
|
| 1917 |
+
if i == 0:
|
| 1918 |
+
init_latents_proper = image_latents
|
| 1919 |
+
init_mask = mask[0:1]
|
| 1920 |
+
|
| 1921 |
+
noise_timestep = timesteps[i + 1]
|
| 1922 |
+
init_latents_proper = self.scheduler.add_noise(
|
| 1923 |
+
init_latents_proper, noise, torch.tensor([noise_timestep])
|
| 1924 |
+
)
|
| 1925 |
+
|
| 1926 |
+
latents = (1 - init_mask) * init_latents_proper + init_mask * latents
|
| 1927 |
+
|
| 1928 |
if i == len(timesteps) - 1:
|
| 1929 |
attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
|
| 1930 |
attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)
|