Spaces:

sofdog
/

live-transcription-italian

Paused

App Files Files Community

Sofia Casadei commited on May 29

Commit

f100c8a

1 Parent(s): 65dd135

up

Browse files

Files changed (3) hide show

main.py +3 -3
static/client.js +19 -11
static/index-screen.html +1 -1

main.py CHANGED Viewed

@@ -150,17 +150,17 @@ stream = Stream(
             # If, after the user started speaking, there is a chunk with less than speech_threshold seconds of speech, the user stopped speaking. (default 0.1)
             speech_threshold=0.1,
             # Max duration of speech chunks before the handler is triggered, even if a pause is not detected by the VAD model. (default -inf)
-            max_continuous_speech_s=5
         ),
         model_options=SileroVadOptions(
             # Threshold for what is considered speech (default 0.5)
             threshold=0.5,
             # Final speech chunks shorter min_speech_duration_ms are thrown out (default 250)
-            min_speech_duration_ms=200,
             # Max duration of speech chunks, longer will be split at the timestamp of the last silence that lasts more than 100ms (if any) or just before max_speech_duration_s (default float('inf')) (used internally in the VAD algorithm to split the audio that's passed to the algorithm)
             max_speech_duration_s=5,
             # Wait for ms at the end of each speech chunk before separating it (default 2000)
-            min_silence_duration_ms=100,
             # Chunk size for VAD model. Can be 512, 1024, 1536 for 16k s.r. (default 1024)
             window_size_samples=1024,
             # Final speech chunks are padded by speech_pad_ms each side (default 400)

             # If, after the user started speaking, there is a chunk with less than speech_threshold seconds of speech, the user stopped speaking. (default 0.1)
             speech_threshold=0.1,
             # Max duration of speech chunks before the handler is triggered, even if a pause is not detected by the VAD model. (default -inf)
+            max_continuous_speech_s=15
         ),
         model_options=SileroVadOptions(
             # Threshold for what is considered speech (default 0.5)
             threshold=0.5,
             # Final speech chunks shorter min_speech_duration_ms are thrown out (default 250)
+            min_speech_duration_ms=250,
             # Max duration of speech chunks, longer will be split at the timestamp of the last silence that lasts more than 100ms (if any) or just before max_speech_duration_s (default float('inf')) (used internally in the VAD algorithm to split the audio that's passed to the algorithm)
             max_speech_duration_s=5,
             # Wait for ms at the end of each speech chunk before separating it (default 2000)
+            min_silence_duration_ms=200,
             # Chunk size for VAD model. Can be 512, 1024, 1536 for 16k s.r. (default 1024)
             window_size_samples=1024,
             # Final speech chunks are padded by speech_pad_ms each side (default 400)

static/client.js CHANGED Viewed

@@ -136,7 +136,7 @@ document.addEventListener('DOMContentLoaded', function() {
             const connectionTimeout = setTimeout(() => {
                 if (peerConnection && peerConnection.connectionState !== 'connected') {
                     showError('Connection timeout. Please check your network and try again.');
-                    stop(); // Stop connection attempt
                 }
             }, 15000);
@@ -371,7 +371,7 @@ document.addEventListener('DOMContentLoaded', function() {
     }
     // Stop recording and clean up resources
-    function stop() {
         console.log('Stopping recording...');
         // Stop audio visualization
         if (animationFrame) {
@@ -411,22 +411,30 @@ document.addEventListener('DOMContentLoaded', function() {
         // Update button display
         updateButtonState();
-        // Ask about clearing transcript
-        if (window.confirm('Clear transcript?')) {
-            // Clear all transcript text
-            transcriptDiv.innerHTML = '';
-            currentParagraph = null;
-        } else {
-            // Just end current paragraph
-            if (currentParagraph) {
-                currentParagraph.classList.remove('current');
                 currentParagraph = null;
             }
         }
         // Reset timestamp
         lastUpdateTime = Date.now();
         console.log('Recording stopped');
     }
     // Clean up resources when page is closed

             const connectionTimeout = setTimeout(() => {
                 if (peerConnection && peerConnection.connectionState !== 'connected') {
                     showError('Connection timeout. Please check your network and try again.');
+                    stop(true); // Restart connection on timeout
                 }
             }, 15000);
     }
     // Stop recording and clean up resources
+    function stop(restartOnTimeout = false) {
         console.log('Stopping recording...');
         // Stop audio visualization
         if (animationFrame) {
         // Update button display
         updateButtonState();
+        // Skip transcript clearing if restarting on timeout
+        if (!restartOnTimeout) {
+            // Ask about clearing transcript
+            if (window.confirm('Clear transcript?')) {
+                // Clear all transcript text
+                transcriptDiv.innerHTML = '';
                 currentParagraph = null;
+            } else {
+                // Just end current paragraph
+                if (currentParagraph) {
+                    currentParagraph.classList.remove('current');
+                    currentParagraph = null;
+                }
             }
         }
         // Reset timestamp
         lastUpdateTime = Date.now();
         console.log('Recording stopped');
+        // Restart connection if requested
+        if (restartOnTimeout) {
+            setupWebRTC();
+        }
     }
     // Clean up resources when page is closed

static/index-screen.html CHANGED Viewed

@@ -54,7 +54,7 @@
             background: transparent; /* Transparent background (no highlighting) */
             border-radius: 0; /* No rounded corners */
             line-height: 1.6; /* Increases line spacing for readability */
-            font-size: 3.5rem;  /* rem means relative to the root font size */
             font-weight: 500; /* 500 = medium weight, 700 = bold */
             max-width: 98%; /* Full width within container */
             white-space: normal;  /* Allows text to wrap normally */

             background: transparent; /* Transparent background (no highlighting) */
             border-radius: 0; /* No rounded corners */
             line-height: 1.6; /* Increases line spacing for readability */
+            font-size: 10rem;  /* rem means relative to the root font size */
             font-weight: 500; /* 500 = medium weight, 700 = bold */
             max-width: 98%; /* Full width within container */
             white-space: normal;  /* Allows text to wrap normally */