Spaces:

Hyathi
/

hotdub

Paused

App Files Files Community

Aman Sharma commited on Dec 28, 2023

Commit

4a7791f

1 Parent(s): c4d0a47

chore : Add new files and update metadata for Hotdub Real Time Translation

Browse files

Files changed (13) hide show

README.md +59 -6
streaming-react-app/index.html +20 -11
streaming-react-app/public/android-chrome-192x192.png +0 -0
streaming-react-app/public/apple-touch-icon.png +0 -0
streaming-react-app/public/favicon-16x16.png +0 -0
streaming-react-app/public/favicon-32x32.png +0 -0
streaming-react-app/public/favicon.ico +0 -0
streaming-react-app/public/logo.png +0 -0
streaming-react-app/public/site.webmanifest +1 -0
streaming-react-app/src/StreamingInterface.tsx +13 -30
streaming-react-app/src/react-xr/XRConfig.tsx +2 -2
streaming-react-app/src/react-xr/XRDialog.tsx +2 -2
streaming-react-app/src/types/StreamingTypes.ts +0 -2

README.md CHANGED Viewed

@@ -1,11 +1,64 @@
 ---
-title: Hotdub
-emoji: 🌍
-colorFrom: gray
-colorTo: pink
 sdk: docker
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Hotdub Real time translation
+emoji: 📞
+colorFrom: blue
+colorTo: yellow
 sdk: docker
 pinned: false
+suggested_hardware: t4-small
+models:
+    - hotdub-translation
 ---
+# Hotdub Real time translation demo
+## Running locally
+### Install backend seamless_server dependencies
+> [!NOTE] Please note: we _do not_ recommend running the model on CPU. CPU inference will be slow and introduce noticable delays in the simultaneous translation.
+> [!NOTE] The example below is for PyTorch stable (2.1.1) and variant cu118. Check [here](https://pytorch.org/get-started/locally/) to find the torch/torchaudio command for your variant. Check [here](https://github.com/facebookresearch/fairseq2#variants) to find the fairseq2 command for your variant.
+If running for the first time, create conda environment and install the desired torch version. Then install the rest of the requirements:
+```
+cd seamless_server
+conda create --yes --name smlss_server python=3.8 libsndfile==1.0.31
+conda activate smlss_server
+conda install --yes pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
+pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118
+pip install -r requirements.txt
+```
+### Install frontend streaming-react-app dependencies
+```
+conda install -c conda-forge nodejs
+cd streaming-react-app
+npm install --global yarn
+yarn
+yarn build  # this will create the dist/ folder
+```
+### Running the server
+The server can be run locally with uvicorn below. Run the server in dev mode:
+```
+cd seamless_server
+uvicorn app_pubsub:app --reload --host localhost
+```
+Run the server in prod mode:
+```
+cd seamless_server
+uvicorn app_pubsub:app --host 0.0.0.0
+```
+To enable additional logging from uvicorn pass `--log-level debug` or `--log-level trace`.
+### Debuging
+If you enable "Server Debug Flag" when starting streaming from the client, this enables extensive debug logging and it saves audio files in /debug folder.

streaming-react-app/index.html CHANGED Viewed

@@ -1,13 +1,22 @@
 <!DOCTYPE html>
 <html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="/src/assets/seamless.svg" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Seamless Translation</title>
-  </head>
-  <body>
-    <div id="root"></div>
-    <script type="module" src="/src/main.tsx"></script>
-  </body>
-</html>

 <!DOCTYPE html>
 <html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <link rel="icon" type="image/svg+xml" href="/favicon.ico" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
+  <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
+  <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
+  <link rel="manifest" href="/site.webmanifest">
+  <title>Hotdub Real Time Translation</title>
+  <meta name="description"
+    content="Hotdub: Your gateway to instant understanding through cutting-edge real-time translation technology." />
+</head>
+<body>
+  <div id="root"></div>
+  <script type="module" src="/src/main.tsx"></script>
+</body>
+</html>

streaming-react-app/public/android-chrome-192x192.png ADDED Viewed

streaming-react-app/public/apple-touch-icon.png ADDED Viewed

streaming-react-app/public/favicon-16x16.png ADDED Viewed

streaming-react-app/public/favicon-32x32.png ADDED Viewed

streaming-react-app/public/favicon.ico ADDED Viewed

streaming-react-app/public/logo.png ADDED Viewed

streaming-react-app/public/site.webmanifest ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}

streaming-react-app/src/StreamingInterface.tsx CHANGED Viewed

@@ -6,7 +6,6 @@ import FormControl from '@mui/material/FormControl';
 import Select, {SelectChangeEvent} from '@mui/material/Select';
 import MenuItem from '@mui/material/MenuItem';
 import Stack from '@mui/material/Stack';
-import seamlessLogoUrl from './assets/seamless.svg';
 import {
   AgentCapabilities,
   BaseResponse,
@@ -731,6 +730,9 @@ export default function StreamingInterface() {
     />
   );
   return (
     <div className="app-wrapper-sra">
       <Box
@@ -741,40 +743,27 @@ export default function StreamingInterface() {
           <div className="top-section-sra horizontal-padding-sra">
             <div className="header-container-sra">
               <img
-                src={seamlessLogoUrl}
                 className="header-icon-sra"
-                alt="Seamless Translation Logo"
                 height={24}
                 width={24}
               />
               <div>
                 <Typography variant="h1" sx={{color: '#65676B'}}>
-                  Seamless Translation
                 </Typography>
               </div>
             </div>
             <div className="header-container-sra">
               <div>
-                <Typography variant="body2" sx={{color: '#65676B'}}>
-                  Welcome! This space is limited to one speaker at a time.
-                  If using the live HF space, sharing room code to listeners on another
-                  IP address may not work because it's running on different replicas.
-                  Use headphones if you are both speaker and listener to prevent feedback.
-                  <br/>
-                  If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
-                  In your duplicated space, join a room as speaker or listener (or both),
-                  and share the room code to invite listeners.
-                  <br/>
-                  Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
-                  <br/>
-                  SeamlessStreaming model is a research model and is not released
-                  for production deployment. It is important to use a microphone with
-                  noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises.
-                  It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold
-                  in the model config. The real-time performance will degrade
-                  if you try streaming multiple speakers at the same time.
-                </Typography>
               </div>
             </div>
             <Stack spacing="22px" direction="column">
@@ -842,7 +831,7 @@ export default function StreamingInterface() {
                         value={model ?? ''}>
                         {agentsCapabilities.map((agent) => (
                           <MenuItem value={agent.name} key={agent.name}>
-                            {agent.name}
                           </MenuItem>
                         ))}
                       </Select>
@@ -1148,12 +1137,6 @@ export default function StreamingInterface() {
                 </>
               )}
             </Stack>
-            {isListener && !isSpeaker && (
-              <Box sx={{marginBottom: 1, marginTop: 2}}>
-                {xrDialogComponent}
-              </Box>
-            )}
           </div>
           {debugParam && roomID != null && <DebugSection />}

 import Select, {SelectChangeEvent} from '@mui/material/Select';
 import MenuItem from '@mui/material/MenuItem';
 import Stack from '@mui/material/Stack';
 import {
   AgentCapabilities,
   BaseResponse,
     />
   );
   return (
     <div className="app-wrapper-sra">
       <Box
           <div className="top-section-sra horizontal-padding-sra">
             <div className="header-container-sra">
               <img
+                src={"./logo.png"}
                 className="header-icon-sra"
+                alt="Hotdub Translation Logo"
                 height={24}
                 width={24}
               />
               <div>
                 <Typography variant="h1" sx={{color: '#65676B'}}>
+                  Hotdub Real Time Translation
                 </Typography>
               </div>
             </div>
             <div className="header-container-sra">
               <div>
+                  <Typography variant="body2" sx={{ color: '#65676B' }}>
+                    Welcome to our translation hub! Please note that this space is limited to one speaker at a time. If using the live translation space, sharing the room code to listeners on another IP address may not work due to different replicas. Use headphones if you are both the speaker and listener to prevent feedback.
+                    <br />
+                    <br />
+                    Our Real-Time Translation model is a research model and is not released for production deployment. Please use a microphone with noise cancellation (e.g., a smartphone) to avoid model hallucination on noises. It works best if you pause every couple of sentences, or you may wish to adjust the VAD threshold in the model config. Real-time performance may degrade if you try streaming multiple speakers simultaneously.
+                  </Typography>
               </div>
             </div>
             <Stack spacing="22px" direction="column">
                         value={model ?? ''}>
                         {agentsCapabilities.map((agent) => (
                           <MenuItem value={agent.name} key={agent.name}>
+                            Hotdub Real Time Translation
                           </MenuItem>
                         ))}
                       </Select>
                 </>
               )}
             </Stack>
           </div>
           {debugParam && roomID != null && <DebugSection />}

streaming-react-app/src/react-xr/XRConfig.tsx CHANGED Viewed

@@ -248,7 +248,7 @@ function IntroPanel({started, setStarted}) {
         ]}
         position={[xCoordinate, -0.1, -0.5]}>
         <ThreeMeshUIText
-          content="FAIR Seamless Streaming Demo"
           fontColor={BLACK}
         />
       </block>
@@ -263,7 +263,7 @@ function IntroPanel({started, setStarted}) {
         position={[xCoordinate, -0.15, -0.5001]}>
         <ThreeMeshUIText
           fontColor={BLACK}
-          content="Welcome to the Seamless team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
         />
       </block>
       <block

         ]}
         position={[xCoordinate, -0.1, -0.5]}>
         <ThreeMeshUIText
+          content="FAIR Hotdub Real time translation Demo"
           fontColor={BLACK}
         />
       </block>
         position={[xCoordinate, -0.15, -0.5001]}>
         <ThreeMeshUIText
           fontColor={BLACK}
+          content="Welcome to the Hotdub team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
         />
       </block>
       <block

streaming-react-app/src/react-xr/XRDialog.tsx CHANGED Viewed

@@ -45,7 +45,7 @@ function XRContent(props: XRConfigProps) {
       dividers
       className="xr-dialog-container xr-dialog-text-center">
       <Typography gutterBottom>
-        Welcome to the Seamless team streaming demo experience! In this demo you
         will experience AI powered text and audio translation in real time.
       </Typography>
       <div ref={canvasRef} className="xr-dialog-canvas-container" />
@@ -70,7 +70,7 @@ export default function XRDialog(props: XRConfigProps) {
       {isDialogOpen && (
         <Dialog onClose={() => setIsDialogOpen(false)} open={true}>
           <DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
-            FAIR Seamless Streaming Demo
           </DialogTitle>
           <IconButton
             aria-label="close"

       dividers
       className="xr-dialog-container xr-dialog-text-center">
       <Typography gutterBottom>
+        Welcome to the Hotdub team streaming demo experience! In this demo you
         will experience AI powered text and audio translation in real time.
       </Typography>
       <div ref={canvasRef} className="xr-dialog-canvas-container" />
       {isDialogOpen && (
         <Dialog onClose={() => setIsDialogOpen(false)} open={true}>
           <DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
+            Hotdub Real time translation Demo
           </DialogTitle>
           <IconButton
             aria-label="close"

streaming-react-app/src/types/StreamingTypes.ts CHANGED Viewed

@@ -38,8 +38,6 @@ export const SUPPORTED_OUTPUT_MODES: Array<{
   value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
   label: string;
 }> = [
-    { value: 's2s&t', label: 'Text & Speech' },
-    { value: 's2t', label: 'Text' },
     { value: 's2s', label: 'Speech' },
   ];

   value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
   label: string;
 }> = [
     { value: 's2s', label: 'Speech' },
   ];