Spaces:
Running
Running
Additional screen as input
#3
by
oliveregger
- opened
No description provided.
oliveregger
changed pull request title from
Update src/components/WelcomeScreen.tsx
to Additional screen as input
I tried to push my changes to the PR but i get
remote: -------------------------------------------------------------------------
remote: You are not authorized to push to some of the references:
remote: - refs/heads/pr/3: forbidden
remote: -------------------------------------------------------------------------
i attached the patch for your consideration if you want to include screen as input
From 17d71b0b5f1bac5d790460460171b7bf27d05a67 Mon Sep 17 00:00:00 2001
From: oliveregger <oliver.egger@ahdis.ch>
Date: Thu, 4 Dec 2025 11:32:37 +0100
Subject: [PATCH] add screen as input in addition to camera
---
.gitignore | 67 +++++++++++++++++
src/App.tsx | 6 +-
src/components/CaptioningView.tsx | 4 +-
src/components/WebcamCapture.tsx | 4 +-
src/components/WebcamPermissionDialog.tsx | 91 ++++++++++++++++++-----
src/components/WelcomeScreen.tsx | 2 +-
6 files changed, 152 insertions(+), 22 deletions(-)
create mode 100644 .gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e00f300
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,67 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+# Dependencies
+node_modules
+.pnp
+.pnp.js
+
+# Build outputs
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+
+# Environment variables
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# Testing
+coverage
+*.lcov
+.nyc_output
+
+# Temporary files
+*.tmp
+*.temp
+.cache
+
+# OS files
+Thumbs.db
+.DS_Store
+
+# TypeScript
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Vite
+.vite
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
diff --git a/src/App.tsx b/src/App.tsx
index 430de21..bff17d3 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -9,10 +9,12 @@ export default function App() {
const [appState, setAppState] = useState<AppState>("requesting-permission");
const [webcamStream, setWebcamStream] = useState<MediaStream | null>(null);
const [isVideoReady, setIsVideoReady] = useState(false);
+ const [sourceType, setSourceType] = useState<"camera" | "screen">("camera");
const videoRef = useRef<HTMLVideoElement | null>(null);
- const handlePermissionGranted = useCallback((stream: MediaStream) => {
+ const handlePermissionGranted = useCallback((stream: MediaStream, source: "camera" | "screen") => {
setWebcamStream(stream);
+ setSourceType(source);
setAppState("welcome");
}, []);
@@ -109,7 +111,7 @@ export default function App() {
<LoadingScreen onComplete={handleLoadingComplete} />
)}
- {appState === "captioning" && <CaptioningView videoRef={videoRef} />}
+ {appState === "captioning" && <CaptioningView videoRef={videoRef} sourceType={sourceType} />}
</div>
);
}
diff --git a/src/components/CaptioningView.tsx b/src/components/CaptioningView.tsx
index 2c58085..a0c77bb 100644
--- a/src/components/CaptioningView.tsx
+++ b/src/components/CaptioningView.tsx
@@ -7,6 +7,7 @@ import { PROMPTS, TIMING } from "../constants";
interface CaptioningViewProps {
videoRef: React.RefObject<HTMLVideoElement | null>;
+ sourceType: "camera" | "screen";
}
function useCaptioningLoop(
@@ -94,7 +95,7 @@ function useCaptioningLoop(
}, [isRunning, isLoaded, runInference, promptRef, videoRef]);
}
-export default function CaptioningView({ videoRef }: CaptioningViewProps) {
+export default function CaptioningView({ videoRef, sourceType }: CaptioningViewProps) {
const { imageSize, setImageSize } = useVLMContext();
const [caption, setCaption] = useState<string>("");
const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true);
@@ -177,6 +178,7 @@ export default function CaptioningView({ videoRef }: CaptioningViewProps) {
error={error}
imageSize={imageSize}
onImageSizeChange={setImageSize}
+ sourceType={sourceType}
/>
{/* Prompt Input - Bottom Left */}
diff --git a/src/components/WebcamCapture.tsx b/src/components/WebcamCapture.tsx
index 2f70b43..a982dd0 100644
--- a/src/components/WebcamCapture.tsx
+++ b/src/components/WebcamCapture.tsx
@@ -7,6 +7,7 @@ interface WebcamCaptureProps {
error?: string | null;
imageSize?: number;
onImageSizeChange?: (size: number) => void;
+ sourceType: "camera" | "screen";
}
export default function WebcamCapture({
@@ -15,6 +16,7 @@ export default function WebcamCapture({
error,
imageSize,
onImageSizeChange,
+ sourceType,
}: WebcamCaptureProps) {
const hasError = Boolean(error);
@@ -26,7 +28,7 @@ export default function WebcamCapture({
}
: isRunning
? {
- text: "LIVE FEED",
+ text: sourceType === "screen" ? "SCREEN CAPTURE" : "LIVE FEED",
color: "bg-[var(--mistral-orange)] animate-pulse",
border: "border-[var(--mistral-orange)]",
}
diff --git a/src/components/WebcamPermissionDialog.tsx b/src/components/WebcamPermissionDialog.tsx
index dae8bc0..0aa5d03 100644
--- a/src/components/WebcamPermissionDialog.tsx
+++ b/src/components/WebcamPermissionDialog.tsx
@@ -17,13 +17,15 @@ const VIDEO_CONSTRAINTS = {
},
};
+type SourceType = "camera" | "screen";
+
interface ErrorInfo {
type: (typeof ERROR_TYPES)[keyof typeof ERROR_TYPES];
message: string;
}
interface WebcamPermissionDialogProps {
- onPermissionGranted: (stream: MediaStream) => void;
+ onPermissionGranted: (stream: MediaStream, sourceType: SourceType) => void;
}
export default function WebcamPermissionDialog({
@@ -31,6 +33,7 @@ export default function WebcamPermissionDialog({
}: WebcamPermissionDialogProps) {
const [isRequesting, setIsRequesting] = useState(false);
const [error, setError] = useState<ErrorInfo | null>(null);
+ const [selectedSource, setSelectedSource] = useState<SourceType | null>(null);
const [mounted, setMounted] = useState(false);
useEffect(() => setMounted(true), []);
@@ -91,31 +94,40 @@ export default function WebcamPermissionDialog({
};
};
- const requestWebcamAccess = useCallback(async () => {
+ const requestAccess = useCallback(async (sourceType: SourceType) => {
setIsRequesting(true);
setError(null);
+ setSelectedSource(sourceType);
try {
- if (!navigator.mediaDevices?.getUserMedia) {
- throw new Error("NOT_SUPPORTED");
+ let stream: MediaStream;
+
+ if (sourceType === "camera") {
+ if (!navigator.mediaDevices?.getUserMedia) {
+ throw new Error("NOT_SUPPORTED");
+ }
+ stream = await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
+ } else {
+ // Screen capture
+ if (!navigator.mediaDevices?.getDisplayMedia) {
+ throw new Error("NOT_SUPPORTED");
+ }
+ stream = await navigator.mediaDevices.getDisplayMedia({
+ video: true,
+ audio: false,
+ } as DisplayMediaStreamOptions);
}
- const stream =
- await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
- onPermissionGranted(stream);
+ onPermissionGranted(stream, sourceType);
} catch (err) {
const errorInfo = getErrorInfo(err);
setError(errorInfo);
- console.error("Error accessing webcam:", err, errorInfo);
+ console.error(`Error accessing ${sourceType}:`, err, errorInfo);
} finally {
setIsRequesting(false);
}
}, [onPermissionGranted]);
- useEffect(() => {
- requestWebcamAccess();
- }, [requestWebcamAccess]);
-
const troubleshootingData = useMemo(
() => ({
[ERROR_TYPES.HTTPS]: {
@@ -209,15 +221,15 @@ export default function WebcamPermissionDialog({
};
const getTitle = () => {
- if (isRequesting) return "Initialize Camera";
+ if (isRequesting) return selectedSource === "screen" ? "Initialize Screen Capture" : "Initialize Camera";
if (error) return "Connection Failed";
- return "Permission Required";
+ return "Select Video Source";
};
const getDescription = () => {
- if (isRequesting) return "Requesting access to video input device...";
+ if (isRequesting) return "Requesting access to video source...";
if (error) return error.message;
- return "Ministral WebGPU requires local camera access for real-time inference.";
+ return "Choose your video source for real-time visual inference.";
};
return (
@@ -324,12 +336,57 @@ export default function WebcamPermissionDialog({
</p>
</div>
+ {/* Source Selection Buttons */}
+ {!isRequesting && !error && (
+ <div className="flex flex-col gap-3">
+ <Button
+ onClick={() => requestAccess("camera")}
+ className="w-full px-6 py-4 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)] flex items-center justify-center gap-3"
+ >
+ <svg
+ className="w-6 h-6"
+ fill="none"
+ viewBox="0 0 24 24"
+ stroke="currentColor"
+ strokeWidth={2}
+ >
+ <path
+ strokeLinecap="round"
+ strokeLinejoin="round"
+ d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z"
+ />
+ </svg>
+ Use Camera
+ </Button>
+
+ <Button
+ onClick={() => requestAccess("screen")}
+ className="w-full px-6 py-4 bg-gray-700 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-gray-800 flex items-center justify-center gap-3"
+ >
+ <svg
+ className="w-6 h-6"
+ fill="none"
+ viewBox="0 0 24 24"
+ stroke="currentColor"
+ strokeWidth={2}
+ >
+ <path
+ strokeLinecap="round"
+ strokeLinejoin="round"
+ d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"
+ />
+ </svg>
+ Capture Screen
+ </Button>
+ </div>
+ )}
+
{/* Error Actions */}
{error && (
<div className="animate-enter">
<div className="flex justify-center mb-6">
<Button
- onClick={requestWebcamAccess}
+ onClick={() => requestAccess(selectedSource || "camera")}
disabled={isRequesting}
className="px-8 py-3 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)]"
>
diff --git a/src/components/WelcomeScreen.tsx b/src/components/WelcomeScreen.tsx
index d69ec7e..4c58392 100644
--- a/src/components/WelcomeScreen.tsx
+++ b/src/components/WelcomeScreen.tsx
@@ -131,7 +131,7 @@ export default function WelcomeScreen({ onStart }: WelcomeScreenProps) {
Private & Local
</h4>
<p className="text-gray-600 leading-relaxed">
- Your video feed is processed locally and never sent to a
+ Your video source (camera or screen) is processed locally and never sent to a
server, powered by
<a href="https://github.com/huggingface/transformers.js">
<span className="font-medium underline">
--
2.50.1 (Apple Git-155)
That's a great idea and we will indeed try to make the demo involve in that direction of having multiple sources
i was raterestricted ... see https://huggingface.co/spaces/oliveregger/Ministral_3B_WebGPU where i applied above patch, great work you have done with that example, thanks a lot!