LChambon commited on
Commit
ee7c477
·
1 Parent(s): cc9bf9f
Files changed (2) hide show
  1. Dockerfile +36 -0
  2. app.py +33 -13
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image with Python 3.10
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ git \
10
+ git-lfs \
11
+ ffmpeg \
12
+ libsm6 \
13
+ libxext6 \
14
+ cmake \
15
+ rsync \
16
+ libgl1 \
17
+ && rm -rf /var/lib/apt/lists/* \
18
+ && git lfs install
19
+
20
+ # Upgrade pip
21
+ RUN pip install --no-cache-dir --upgrade pip
22
+
23
+ # Copy requirements.txt
24
+ COPY requirements.txt /tmp/requirements.txt
25
+
26
+ # Install Python dependencies
27
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
28
+
29
+ # Copy the Space code
30
+ COPY . /app
31
+
32
+ # Expose the default HuggingFace Space port
33
+ EXPOSE 7860
34
+
35
+ # Launch command for a Gradio/Streamlit app
36
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -57,9 +57,15 @@ def resize_with_aspect_ratio(img, max_size, patch_size):
57
 
58
 
59
  @torch.no_grad()
60
- def process_image(image, model_name, output_resolution):
61
  """Process image with selected model and resolution"""
62
  try:
 
 
 
 
 
 
63
  # Load the backbone using vit_wrapper
64
  backbone = PretrainedViTWrapper(model_name, norm=True).to(device)
65
  backbone.eval()
@@ -135,15 +141,19 @@ def process_image(image, model_name, output_resolution):
135
  return None
136
 
137
 
138
- # Popular vision models for the dropdown (from vit_wrapper.py)
139
- POPULAR_MODELS = [
140
- "vit_base_patch16_dinov3.lvd1689m",
141
- "radio_v2.5-b",
142
- "vit_base_patch14_reg4_dinov2",
143
- "vit_base_patch14_dinov2.lvd142m",
144
- "vit_base_patch16_224.dino",
145
- "vit_base_patch16_siglip_512.v2_webli",
146
- ]
 
 
 
 
147
 
148
  # Create Gradio interface
149
  with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
@@ -186,10 +196,16 @@ with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
186
  gr.Markdown("### ⚙️ Model Settings")
187
 
188
  model_dropdown = gr.Dropdown(
189
- choices=POPULAR_MODELS,
190
- value=POPULAR_MODELS[0],
191
  label="🤖 Vision Foundation Model",
192
  )
 
 
 
 
 
 
193
 
194
  resolution_slider = gr.Slider(
195
  minimum=64,
@@ -221,7 +237,11 @@ with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
221
  """
222
  )
223
 
224
- process_btn.click(fn=process_image, inputs=[image_input, model_dropdown, resolution_slider], outputs=output_image)
 
 
 
 
225
 
226
  gr.Markdown(
227
  """
 
57
 
58
 
59
  @torch.no_grad()
60
+ def process_image(image, model_selection, custom_model, output_resolution):
61
  """Process image with selected model and resolution"""
62
  try:
63
+ # Determine which model to use
64
+ if custom_model.strip():
65
+ model_name = custom_model.strip()
66
+ else:
67
+ model_name = MODEL_MAPPING.get(model_selection, model_selection)
68
+
69
  # Load the backbone using vit_wrapper
70
  backbone = PretrainedViTWrapper(model_name, norm=True).to(device)
71
  backbone.eval()
 
141
  return None
142
 
143
 
144
+ # Popular vision models with friendly names
145
+ MODEL_MAPPING = {
146
+ "DINOv3-B": "vit_base_patch16_dinov3.lvd1689m",
147
+ "RADIOv2.5-B": "radio_v2.5-b",
148
+ "DINOv2-B": "vit_base_patch14_dinov2.lvd142m",
149
+ "DINOv2-R-B": "vit_base_patch14_reg4_dinov2",
150
+ "DINO-B": "vit_base_patch16_224.dino",
151
+ "SigLIP2-B": "vit_base_patch16_siglip_512.v2_webli",
152
+ "PE-Core-B": "vit_pe_core_base_patch16_224.fb",
153
+ "CLIP-B": "vit_base_patch16_clip_224.openai",
154
+ }
155
+
156
+ FRIENDLY_MODEL_NAMES = list(MODEL_MAPPING.keys())
157
 
158
  # Create Gradio interface
159
  with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
 
196
  gr.Markdown("### ⚙️ Model Settings")
197
 
198
  model_dropdown = gr.Dropdown(
199
+ choices=FRIENDLY_MODEL_NAMES,
200
+ value=FRIENDLY_MODEL_NAMES[0],
201
  label="🤖 Vision Foundation Model",
202
  )
203
+
204
+ custom_model_input = gr.Textbox(
205
+ label="✍️ Or Use Custom Model (timm reference name)",
206
+ placeholder="e.g., vit_large_patch14_dinov2.lvd142m",
207
+ value="",
208
+ )
209
 
210
  resolution_slider = gr.Slider(
211
  minimum=64,
 
237
  """
238
  )
239
 
240
+ process_btn.click(
241
+ fn=process_image,
242
+ inputs=[image_input, model_dropdown, custom_model_input, resolution_slider],
243
+ outputs=output_image,
244
+ )
245
 
246
  gr.Markdown(
247
  """