Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| import base64 | |
| import requests | |
| import json | |
| import time | |
| import threading | |
| from PIL import Image | |
| import io | |
| class CameraProcessor: | |
| def __init__(self): | |
| self.is_processing = False | |
| self.processing_thread = None | |
| self.stop_event = threading.Event() | |
| def encode_image_to_base64(self, image): | |
| """Convert numpy array to base64 string""" | |
| if image is None: | |
| return None | |
| # Convert from RGB to BGR for OpenCV | |
| image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| # Encode image to JPEG | |
| _, buffer = cv2.imencode('.jpg', image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 80]) | |
| # Convert to base64 | |
| image_base64 = base64.b64encode(buffer).decode('utf-8') | |
| return f"data:image/jpeg;base64,{image_base64}" | |
| async def send_chat_completion_request(self, instruction, image_base64_url, base_url): | |
| """Send request to chat completion API""" | |
| try: | |
| payload = { | |
| "max_tokens": 100, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": instruction}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": image_base64_url} | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| response = requests.post( | |
| f"{base_url}/v1/chat/completions", | |
| headers={"Content-Type": "application/json"}, | |
| json=payload, | |
| timeout=10 | |
| ) | |
| if not response.ok: | |
| return f"Server error: {response.status_code} - {response.text}" | |
| data = response.json() | |
| return data["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def process_frame(self, instruction, image, base_url): | |
| """Process a single frame""" | |
| print(f"DEBUG: process_frame called with base_url: {base_url}") | |
| if image is None: | |
| print("DEBUG: No image captured") | |
| return "No image captured" | |
| image_base64 = self.encode_image_to_base64(image) | |
| if not image_base64: | |
| print("DEBUG: Failed to encode image") | |
| return "Failed to encode image" | |
| print(f"DEBUG: Sending request to {base_url}/v1/chat/completions") | |
| # Since Gradio doesn't support async in interface functions easily, | |
| # we'll use requests directly | |
| try: | |
| payload = { | |
| "max_tokens": 100, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": instruction}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": image_base64} | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| print("DEBUG: Making HTTP request...") | |
| response = requests.post( | |
| f"{base_url}/v1/chat/completions", | |
| headers={"Content-Type": "application/json"}, | |
| json=payload, | |
| timeout=10 | |
| ) | |
| print(f"DEBUG: Response status: {response.status_code}") | |
| if not response.ok: | |
| error_msg = f"Server error: {response.status_code} - {response.text}" | |
| print(f"DEBUG: {error_msg}") | |
| return error_msg | |
| data = response.json() | |
| result = data["choices"][0]["message"]["content"] | |
| print(f"DEBUG: Success - got response: {result}") | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error: {str(e)}" | |
| print(f"DEBUG: Exception occurred: {error_msg}") | |
| return error_msg | |
| # Initialize processor | |
| processor = CameraProcessor() | |
| def process_image(instruction, image, base_url): | |
| """Main processing function for Gradio interface""" | |
| print(f"DEBUG: process_image called - is_processing: {processor.is_processing}") | |
| print(f"DEBUG: instruction: '{instruction}'") | |
| print(f"DEBUG: base_url: '{base_url}'") | |
| print(f"DEBUG: image is None: {image is None}") | |
| print(f"DEBUG: image type: {type(image)}") | |
| # Always return something to test if function is being called | |
| if image is None: | |
| print("DEBUG: No image from webcam") | |
| return "No image from webcam - check camera permissions or try a different browser" | |
| # For manual testing, skip the processing state check | |
| # if not processor.is_processing: | |
| # print("DEBUG: Not processing - returning early") | |
| # return "Click Start to begin processing" | |
| if not instruction.strip(): | |
| print("DEBUG: No instruction provided") | |
| return "Please enter an instruction" | |
| if not base_url.strip(): | |
| print("DEBUG: No base URL provided") | |
| return "Please enter a base URL" | |
| print("DEBUG: Calling process_frame") | |
| result = processor.process_frame(instruction, image, base_url) | |
| print(f"DEBUG: process_frame result: {result}") | |
| return result | |
| def toggle_processing(): | |
| """Toggle processing state""" | |
| processor.is_processing = not processor.is_processing | |
| print(f"DEBUG: Processing toggled to: {processor.is_processing}") | |
| if processor.is_processing: | |
| return "Stop", "Processing started..." | |
| else: | |
| return "Start", "Processing stopped." | |
| def update_stream_interval(interval): | |
| """Update streaming interval""" | |
| return gr.update(stream_every=interval) | |
| def test_api_connection(base_url): | |
| """Test if API server is reachable""" | |
| try: | |
| response = requests.get(f"{base_url}/health", timeout=5) | |
| return f"API accessible: {response.status_code}" | |
| except Exception as e: | |
| return f"API connection failed: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Camera Interaction App", theme=gr.themes.Soft()) as interface: | |
| gr.Markdown("# Camera Interaction App") | |
| gr.Markdown("**Note:** Make sure to grant camera permissions in your browser!") | |
| with gr.Row(): | |
| # Video input | |
| video_input = gr.Image( | |
| sources=["webcam"], | |
| label="Camera Feed - Click to capture", | |
| width=480, | |
| height=360 | |
| ) | |
| with gr.Column(): | |
| # Base URL input | |
| base_url_input = gr.Textbox( | |
| label="Base API URL", | |
| value="http://localhost:8080", | |
| placeholder="Enter API base URL" | |
| ) | |
| # Instruction input | |
| instruction_input = gr.Textbox( | |
| label="Instruction", | |
| value="What do you see?", | |
| placeholder="Enter your instruction", | |
| lines=2 | |
| ) | |
| # Response output | |
| response_output = gr.Textbox( | |
| label="Response", | |
| value="1. Grant camera permissions\n2. Capture a photo\n3. Click Process Image", | |
| interactive=False, | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| # Manual process button for testing | |
| process_button = gr.Button("Process Image", variant="primary") | |
| # Test button | |
| test_button = gr.Button("Test API Connection", variant="secondary") | |
| with gr.Row(): | |
| test_output = gr.Textbox(label="Connection Test", interactive=False) | |
| # Manual processing for testing | |
| process_button.click( | |
| fn=process_image, | |
| inputs=[instruction_input, video_input, base_url_input], | |
| outputs=response_output | |
| ) | |
| test_button.click( | |
| fn=test_api_connection, | |
| inputs=base_url_input, | |
| outputs=test_output | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch( | |
| server_name="localhost", | |
| server_port=7860, | |
| share=False, | |
| debug=True | |
| ) |