Spaces:

LChambon
/

NAF

Runtime error

App Files Files Community

LChambon commited on Nov 30, 2025

Commit

ee7c477

1 Parent(s): cc9bf9f

Docker

Browse files

Files changed (2) hide show

Dockerfile +36 -0
app.py +33 -13

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+# Base image with Python 3.10
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    git-lfs \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    cmake \
+    rsync \
+    libgl1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && git lfs install
+# Upgrade pip
+RUN pip install --no-cache-dir --upgrade pip
+# Copy requirements.txt
+COPY requirements.txt /tmp/requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+# Copy the Space code
+COPY . /app
+# Expose the default HuggingFace Space port
+EXPOSE 7860
+# Launch command for a Gradio/Streamlit app
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -57,9 +57,15 @@ def resize_with_aspect_ratio(img, max_size, patch_size):
 @torch.no_grad()
-def process_image(image, model_name, output_resolution):
     """Process image with selected model and resolution"""
     try:
         # Load the backbone using vit_wrapper
         backbone = PretrainedViTWrapper(model_name, norm=True).to(device)
         backbone.eval()
@@ -135,15 +141,19 @@ def process_image(image, model_name, output_resolution):
         return None
-# Popular vision models for the dropdown (from vit_wrapper.py)
-POPULAR_MODELS = [
-    "vit_base_patch16_dinov3.lvd1689m",
-    "radio_v2.5-b",
-    "vit_base_patch14_reg4_dinov2",
-    "vit_base_patch14_dinov2.lvd142m",
-    "vit_base_patch16_224.dino",
-    "vit_base_patch16_siglip_512.v2_webli",
-]
 # Create Gradio interface
 with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
@@ -186,10 +196,16 @@ with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
             gr.Markdown("### ⚙️ Model Settings")
             model_dropdown = gr.Dropdown(
-                choices=POPULAR_MODELS,
-                value=POPULAR_MODELS[0],
                 label="🤖 Vision Foundation Model",
             )
             resolution_slider = gr.Slider(
                 minimum=64,
@@ -221,7 +237,11 @@ with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
                 """
             )
-    process_btn.click(fn=process_image, inputs=[image_input, model_dropdown, resolution_slider], outputs=output_image)
     gr.Markdown(
         """

 @torch.no_grad()
+def process_image(image, model_selection, custom_model, output_resolution):
     """Process image with selected model and resolution"""
     try:
+        # Determine which model to use
+        if custom_model.strip():
+            model_name = custom_model.strip()
+        else:
+            model_name = MODEL_MAPPING.get(model_selection, model_selection)
         # Load the backbone using vit_wrapper
         backbone = PretrainedViTWrapper(model_name, norm=True).to(device)
         backbone.eval()
         return None
+# Popular vision models with friendly names
+MODEL_MAPPING = {
+    "DINOv3-B": "vit_base_patch16_dinov3.lvd1689m",
+    "RADIOv2.5-B": "radio_v2.5-b",
+    "DINOv2-B": "vit_base_patch14_dinov2.lvd142m",
+    "DINOv2-R-B": "vit_base_patch14_reg4_dinov2",
+    "DINO-B": "vit_base_patch16_224.dino",
+    "SigLIP2-B": "vit_base_patch16_siglip_512.v2_webli",
+    "PE-Core-B": "vit_pe_core_base_patch16_224.fb",
+    "CLIP-B": "vit_base_patch16_clip_224.openai",
+}
+FRIENDLY_MODEL_NAMES = list(MODEL_MAPPING.keys())
 # Create Gradio interface
 with gr.Blocks(title="NAF: Zero-Shot Feature Upsampling") as demo:
             gr.Markdown("### ⚙️ Model Settings")
             model_dropdown = gr.Dropdown(
+                choices=FRIENDLY_MODEL_NAMES,
+                value=FRIENDLY_MODEL_NAMES[0],
                 label="🤖 Vision Foundation Model",
             )
+            custom_model_input = gr.Textbox(
+                label="✍️ Or Use Custom Model (timm reference name)",
+                placeholder="e.g., vit_large_patch14_dinov2.lvd142m",
+                value="",
+            )
             resolution_slider = gr.Slider(
                 minimum=64,
                 """
             )
+    process_btn.click(
+        fn=process_image,
+        inputs=[image_input, model_dropdown, custom_model_input, resolution_slider],
+        outputs=output_image,
+    )
     gr.Markdown(
         """