Aman Sharma commited on
Commit
4a7791f
·
1 Parent(s): c4d0a47

chore : Add new files and update metadata for Hotdub Real Time Translation

Browse files
README.md CHANGED
@@ -1,11 +1,64 @@
1
  ---
2
- title: Hotdub
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
8
- license: mit
 
 
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Hotdub Real time translation
3
+ emoji: 📞
4
+ colorFrom: blue
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ suggested_hardware: t4-small
9
+ models:
10
+ - hotdub-translation
11
  ---
12
 
13
+ # Hotdub Real time translation demo
14
+
15
+ ## Running locally
16
+
17
+ ### Install backend seamless_server dependencies
18
+
19
+ > [!NOTE] Please note: we _do not_ recommend running the model on CPU. CPU inference will be slow and introduce noticable delays in the simultaneous translation.
20
+
21
+ > [!NOTE] The example below is for PyTorch stable (2.1.1) and variant cu118. Check [here](https://pytorch.org/get-started/locally/) to find the torch/torchaudio command for your variant. Check [here](https://github.com/facebookresearch/fairseq2#variants) to find the fairseq2 command for your variant.
22
+
23
+ If running for the first time, create conda environment and install the desired torch version. Then install the rest of the requirements:
24
+
25
+ ```
26
+ cd seamless_server
27
+ conda create --yes --name smlss_server python=3.8 libsndfile==1.0.31
28
+ conda activate smlss_server
29
+ conda install --yes pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
30
+ pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118
31
+ pip install -r requirements.txt
32
+ ```
33
+
34
+ ### Install frontend streaming-react-app dependencies
35
+
36
+ ```
37
+ conda install -c conda-forge nodejs
38
+ cd streaming-react-app
39
+ npm install --global yarn
40
+ yarn
41
+ yarn build # this will create the dist/ folder
42
+ ```
43
+
44
+ ### Running the server
45
+
46
+ The server can be run locally with uvicorn below. Run the server in dev mode:
47
+
48
+ ```
49
+ cd seamless_server
50
+ uvicorn app_pubsub:app --reload --host localhost
51
+ ```
52
+
53
+ Run the server in prod mode:
54
+
55
+ ```
56
+ cd seamless_server
57
+ uvicorn app_pubsub:app --host 0.0.0.0
58
+ ```
59
+
60
+ To enable additional logging from uvicorn pass `--log-level debug` or `--log-level trace`.
61
+
62
+ ### Debuging
63
+
64
+ If you enable "Server Debug Flag" when starting streaming from the client, this enables extensive debug logging and it saves audio files in /debug folder.
streaming-react-app/index.html CHANGED
@@ -1,13 +1,22 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <link rel="icon" type="image/svg+xml" href="/src/assets/seamless.svg" />
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
- <title>Seamless Translation</title>
8
- </head>
9
- <body>
10
- <div id="root"></div>
11
- <script type="module" src="/src/main.tsx"></script>
12
- </body>
13
- </html>
 
 
 
 
 
 
 
 
 
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <link rel="icon" type="image/svg+xml" href="/favicon.ico" />
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
8
+ <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
9
+ <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
10
+ <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
11
+ <link rel="manifest" href="/site.webmanifest">
12
+ <title>Hotdub Real Time Translation</title>
13
+ <meta name="description"
14
+ content="Hotdub: Your gateway to instant understanding through cutting-edge real-time translation technology." />
15
+ </head>
16
+
17
+ <body>
18
+ <div id="root"></div>
19
+ <script type="module" src="/src/main.tsx"></script>
20
+ </body>
21
+
22
+ </html>
streaming-react-app/public/android-chrome-192x192.png ADDED
streaming-react-app/public/apple-touch-icon.png ADDED
streaming-react-app/public/favicon-16x16.png ADDED
streaming-react-app/public/favicon-32x32.png ADDED
streaming-react-app/public/favicon.ico ADDED
streaming-react-app/public/logo.png ADDED
streaming-react-app/public/site.webmanifest ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
streaming-react-app/src/StreamingInterface.tsx CHANGED
@@ -6,7 +6,6 @@ import FormControl from '@mui/material/FormControl';
6
  import Select, {SelectChangeEvent} from '@mui/material/Select';
7
  import MenuItem from '@mui/material/MenuItem';
8
  import Stack from '@mui/material/Stack';
9
- import seamlessLogoUrl from './assets/seamless.svg';
10
  import {
11
  AgentCapabilities,
12
  BaseResponse,
@@ -731,6 +730,9 @@ export default function StreamingInterface() {
731
  />
732
  );
733
 
 
 
 
734
  return (
735
  <div className="app-wrapper-sra">
736
  <Box
@@ -741,40 +743,27 @@ export default function StreamingInterface() {
741
  <div className="top-section-sra horizontal-padding-sra">
742
  <div className="header-container-sra">
743
  <img
744
- src={seamlessLogoUrl}
745
  className="header-icon-sra"
746
- alt="Seamless Translation Logo"
747
  height={24}
748
  width={24}
749
  />
750
 
751
  <div>
752
  <Typography variant="h1" sx={{color: '#65676B'}}>
753
- Seamless Translation
754
  </Typography>
755
  </div>
756
  </div>
757
  <div className="header-container-sra">
758
  <div>
759
- <Typography variant="body2" sx={{color: '#65676B'}}>
760
- Welcome! This space is limited to one speaker at a time.
761
- If using the live HF space, sharing room code to listeners on another
762
- IP address may not work because it's running on different replicas.
763
- Use headphones if you are both speaker and listener to prevent feedback.
764
- <br/>
765
- If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
766
- In your duplicated space, join a room as speaker or listener (or both),
767
- and share the room code to invite listeners.
768
- <br/>
769
- Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
770
- <br/>
771
- SeamlessStreaming model is a research model and is not released
772
- for production deployment. It is important to use a microphone with
773
- noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises.
774
- It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold
775
- in the model config. The real-time performance will degrade
776
- if you try streaming multiple speakers at the same time.
777
- </Typography>
778
  </div>
779
  </div>
780
  <Stack spacing="22px" direction="column">
@@ -842,7 +831,7 @@ export default function StreamingInterface() {
842
  value={model ?? ''}>
843
  {agentsCapabilities.map((agent) => (
844
  <MenuItem value={agent.name} key={agent.name}>
845
- {agent.name}
846
  </MenuItem>
847
  ))}
848
  </Select>
@@ -1148,12 +1137,6 @@ export default function StreamingInterface() {
1148
  </>
1149
  )}
1150
  </Stack>
1151
-
1152
- {isListener && !isSpeaker && (
1153
- <Box sx={{marginBottom: 1, marginTop: 2}}>
1154
- {xrDialogComponent}
1155
- </Box>
1156
- )}
1157
  </div>
1158
 
1159
  {debugParam && roomID != null && <DebugSection />}
 
6
  import Select, {SelectChangeEvent} from '@mui/material/Select';
7
  import MenuItem from '@mui/material/MenuItem';
8
  import Stack from '@mui/material/Stack';
 
9
  import {
10
  AgentCapabilities,
11
  BaseResponse,
 
730
  />
731
  );
732
 
733
+
734
+
735
+
736
  return (
737
  <div className="app-wrapper-sra">
738
  <Box
 
743
  <div className="top-section-sra horizontal-padding-sra">
744
  <div className="header-container-sra">
745
  <img
746
+ src={"./logo.png"}
747
  className="header-icon-sra"
748
+ alt="Hotdub Translation Logo"
749
  height={24}
750
  width={24}
751
  />
752
 
753
  <div>
754
  <Typography variant="h1" sx={{color: '#65676B'}}>
755
+ Hotdub Real Time Translation
756
  </Typography>
757
  </div>
758
  </div>
759
  <div className="header-container-sra">
760
  <div>
761
+ <Typography variant="body2" sx={{ color: '#65676B' }}>
762
+ Welcome to our translation hub! Please note that this space is limited to one speaker at a time. If using the live translation space, sharing the room code to listeners on another IP address may not work due to different replicas. Use headphones if you are both the speaker and listener to prevent feedback.
763
+ <br />
764
+ <br />
765
+ Our Real-Time Translation model is a research model and is not released for production deployment. Please use a microphone with noise cancellation (e.g., a smartphone) to avoid model hallucination on noises. It works best if you pause every couple of sentences, or you may wish to adjust the VAD threshold in the model config. Real-time performance may degrade if you try streaming multiple speakers simultaneously.
766
+ </Typography>
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  </div>
768
  </div>
769
  <Stack spacing="22px" direction="column">
 
831
  value={model ?? ''}>
832
  {agentsCapabilities.map((agent) => (
833
  <MenuItem value={agent.name} key={agent.name}>
834
+ Hotdub Real Time Translation
835
  </MenuItem>
836
  ))}
837
  </Select>
 
1137
  </>
1138
  )}
1139
  </Stack>
 
 
 
 
 
 
1140
  </div>
1141
 
1142
  {debugParam && roomID != null && <DebugSection />}
streaming-react-app/src/react-xr/XRConfig.tsx CHANGED
@@ -248,7 +248,7 @@ function IntroPanel({started, setStarted}) {
248
  ]}
249
  position={[xCoordinate, -0.1, -0.5]}>
250
  <ThreeMeshUIText
251
- content="FAIR Seamless Streaming Demo"
252
  fontColor={BLACK}
253
  />
254
  </block>
@@ -263,7 +263,7 @@ function IntroPanel({started, setStarted}) {
263
  position={[xCoordinate, -0.15, -0.5001]}>
264
  <ThreeMeshUIText
265
  fontColor={BLACK}
266
- content="Welcome to the Seamless team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
267
  />
268
  </block>
269
  <block
 
248
  ]}
249
  position={[xCoordinate, -0.1, -0.5]}>
250
  <ThreeMeshUIText
251
+ content="FAIR Hotdub Real time translation Demo"
252
  fontColor={BLACK}
253
  />
254
  </block>
 
263
  position={[xCoordinate, -0.15, -0.5001]}>
264
  <ThreeMeshUIText
265
  fontColor={BLACK}
266
+ content="Welcome to the Hotdub team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
267
  />
268
  </block>
269
  <block
streaming-react-app/src/react-xr/XRDialog.tsx CHANGED
@@ -45,7 +45,7 @@ function XRContent(props: XRConfigProps) {
45
  dividers
46
  className="xr-dialog-container xr-dialog-text-center">
47
  <Typography gutterBottom>
48
- Welcome to the Seamless team streaming demo experience! In this demo you
49
  will experience AI powered text and audio translation in real time.
50
  </Typography>
51
  <div ref={canvasRef} className="xr-dialog-canvas-container" />
@@ -70,7 +70,7 @@ export default function XRDialog(props: XRConfigProps) {
70
  {isDialogOpen && (
71
  <Dialog onClose={() => setIsDialogOpen(false)} open={true}>
72
  <DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
73
- FAIR Seamless Streaming Demo
74
  </DialogTitle>
75
  <IconButton
76
  aria-label="close"
 
45
  dividers
46
  className="xr-dialog-container xr-dialog-text-center">
47
  <Typography gutterBottom>
48
+ Welcome to the Hotdub team streaming demo experience! In this demo you
49
  will experience AI powered text and audio translation in real time.
50
  </Typography>
51
  <div ref={canvasRef} className="xr-dialog-canvas-container" />
 
70
  {isDialogOpen && (
71
  <Dialog onClose={() => setIsDialogOpen(false)} open={true}>
72
  <DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
73
+ Hotdub Real time translation Demo
74
  </DialogTitle>
75
  <IconButton
76
  aria-label="close"
streaming-react-app/src/types/StreamingTypes.ts CHANGED
@@ -38,8 +38,6 @@ export const SUPPORTED_OUTPUT_MODES: Array<{
38
  value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
39
  label: string;
40
  }> = [
41
- { value: 's2s&t', label: 'Text & Speech' },
42
- { value: 's2t', label: 'Text' },
43
  { value: 's2s', label: 'Speech' },
44
  ];
45
 
 
38
  value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
39
  label: string;
40
  }> = [
 
 
41
  { value: 's2s', label: 'Speech' },
42
  ];
43