Aman Sharma
commited on
Commit
·
4a7791f
1
Parent(s):
c4d0a47
chore : Add new files and update metadata for Hotdub Real Time Translation
Browse files- README.md +59 -6
- streaming-react-app/index.html +20 -11
- streaming-react-app/public/android-chrome-192x192.png +0 -0
- streaming-react-app/public/apple-touch-icon.png +0 -0
- streaming-react-app/public/favicon-16x16.png +0 -0
- streaming-react-app/public/favicon-32x32.png +0 -0
- streaming-react-app/public/favicon.ico +0 -0
- streaming-react-app/public/logo.png +0 -0
- streaming-react-app/public/site.webmanifest +1 -0
- streaming-react-app/src/StreamingInterface.tsx +13 -30
- streaming-react-app/src/react-xr/XRConfig.tsx +2 -2
- streaming-react-app/src/react-xr/XRDialog.tsx +2 -2
- streaming-react-app/src/types/StreamingTypes.ts +0 -2
README.md
CHANGED
@@ -1,11 +1,64 @@
|
|
1 |
---
|
2 |
-
title: Hotdub
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
|
|
|
|
|
9 |
---
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Hotdub Real time translation
|
3 |
+
emoji: 📞
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
suggested_hardware: t4-small
|
9 |
+
models:
|
10 |
+
- hotdub-translation
|
11 |
---
|
12 |
|
13 |
+
# Hotdub Real time translation demo
|
14 |
+
|
15 |
+
## Running locally
|
16 |
+
|
17 |
+
### Install backend seamless_server dependencies
|
18 |
+
|
19 |
+
> [!NOTE] Please note: we _do not_ recommend running the model on CPU. CPU inference will be slow and introduce noticable delays in the simultaneous translation.
|
20 |
+
|
21 |
+
> [!NOTE] The example below is for PyTorch stable (2.1.1) and variant cu118. Check [here](https://pytorch.org/get-started/locally/) to find the torch/torchaudio command for your variant. Check [here](https://github.com/facebookresearch/fairseq2#variants) to find the fairseq2 command for your variant.
|
22 |
+
|
23 |
+
If running for the first time, create conda environment and install the desired torch version. Then install the rest of the requirements:
|
24 |
+
|
25 |
+
```
|
26 |
+
cd seamless_server
|
27 |
+
conda create --yes --name smlss_server python=3.8 libsndfile==1.0.31
|
28 |
+
conda activate smlss_server
|
29 |
+
conda install --yes pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
30 |
+
pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118
|
31 |
+
pip install -r requirements.txt
|
32 |
+
```
|
33 |
+
|
34 |
+
### Install frontend streaming-react-app dependencies
|
35 |
+
|
36 |
+
```
|
37 |
+
conda install -c conda-forge nodejs
|
38 |
+
cd streaming-react-app
|
39 |
+
npm install --global yarn
|
40 |
+
yarn
|
41 |
+
yarn build # this will create the dist/ folder
|
42 |
+
```
|
43 |
+
|
44 |
+
### Running the server
|
45 |
+
|
46 |
+
The server can be run locally with uvicorn below. Run the server in dev mode:
|
47 |
+
|
48 |
+
```
|
49 |
+
cd seamless_server
|
50 |
+
uvicorn app_pubsub:app --reload --host localhost
|
51 |
+
```
|
52 |
+
|
53 |
+
Run the server in prod mode:
|
54 |
+
|
55 |
+
```
|
56 |
+
cd seamless_server
|
57 |
+
uvicorn app_pubsub:app --host 0.0.0.0
|
58 |
+
```
|
59 |
+
|
60 |
+
To enable additional logging from uvicorn pass `--log-level debug` or `--log-level trace`.
|
61 |
+
|
62 |
+
### Debuging
|
63 |
+
|
64 |
+
If you enable "Server Debug Flag" when starting streaming from the client, this enables extensive debug logging and it saves audio files in /debug folder.
|
streaming-react-app/index.html
CHANGED
@@ -1,13 +1,22 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
<
|
10 |
-
|
11 |
-
|
12 |
-
</
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8" />
|
6 |
+
<link rel="icon" type="image/svg+xml" href="/favicon.ico" />
|
7 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
8 |
+
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
9 |
+
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
10 |
+
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
|
11 |
+
<link rel="manifest" href="/site.webmanifest">
|
12 |
+
<title>Hotdub Real Time Translation</title>
|
13 |
+
<meta name="description"
|
14 |
+
content="Hotdub: Your gateway to instant understanding through cutting-edge real-time translation technology." />
|
15 |
+
</head>
|
16 |
+
|
17 |
+
<body>
|
18 |
+
<div id="root"></div>
|
19 |
+
<script type="module" src="/src/main.tsx"></script>
|
20 |
+
</body>
|
21 |
+
|
22 |
+
</html>
|
streaming-react-app/public/android-chrome-192x192.png
ADDED
![]() |
streaming-react-app/public/apple-touch-icon.png
ADDED
![]() |
streaming-react-app/public/favicon-16x16.png
ADDED
![]() |
streaming-react-app/public/favicon-32x32.png
ADDED
![]() |
streaming-react-app/public/favicon.ico
ADDED
|
streaming-react-app/public/logo.png
ADDED
![]() |
streaming-react-app/public/site.webmanifest
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
|
streaming-react-app/src/StreamingInterface.tsx
CHANGED
@@ -6,7 +6,6 @@ import FormControl from '@mui/material/FormControl';
|
|
6 |
import Select, {SelectChangeEvent} from '@mui/material/Select';
|
7 |
import MenuItem from '@mui/material/MenuItem';
|
8 |
import Stack from '@mui/material/Stack';
|
9 |
-
import seamlessLogoUrl from './assets/seamless.svg';
|
10 |
import {
|
11 |
AgentCapabilities,
|
12 |
BaseResponse,
|
@@ -731,6 +730,9 @@ export default function StreamingInterface() {
|
|
731 |
/>
|
732 |
);
|
733 |
|
|
|
|
|
|
|
734 |
return (
|
735 |
<div className="app-wrapper-sra">
|
736 |
<Box
|
@@ -741,40 +743,27 @@ export default function StreamingInterface() {
|
|
741 |
<div className="top-section-sra horizontal-padding-sra">
|
742 |
<div className="header-container-sra">
|
743 |
<img
|
744 |
-
src={
|
745 |
className="header-icon-sra"
|
746 |
-
alt="
|
747 |
height={24}
|
748 |
width={24}
|
749 |
/>
|
750 |
|
751 |
<div>
|
752 |
<Typography variant="h1" sx={{color: '#65676B'}}>
|
753 |
-
|
754 |
</Typography>
|
755 |
</div>
|
756 |
</div>
|
757 |
<div className="header-container-sra">
|
758 |
<div>
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
|
766 |
-
In your duplicated space, join a room as speaker or listener (or both),
|
767 |
-
and share the room code to invite listeners.
|
768 |
-
<br/>
|
769 |
-
Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
|
770 |
-
<br/>
|
771 |
-
SeamlessStreaming model is a research model and is not released
|
772 |
-
for production deployment. It is important to use a microphone with
|
773 |
-
noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises.
|
774 |
-
It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold
|
775 |
-
in the model config. The real-time performance will degrade
|
776 |
-
if you try streaming multiple speakers at the same time.
|
777 |
-
</Typography>
|
778 |
</div>
|
779 |
</div>
|
780 |
<Stack spacing="22px" direction="column">
|
@@ -842,7 +831,7 @@ export default function StreamingInterface() {
|
|
842 |
value={model ?? ''}>
|
843 |
{agentsCapabilities.map((agent) => (
|
844 |
<MenuItem value={agent.name} key={agent.name}>
|
845 |
-
|
846 |
</MenuItem>
|
847 |
))}
|
848 |
</Select>
|
@@ -1148,12 +1137,6 @@ export default function StreamingInterface() {
|
|
1148 |
</>
|
1149 |
)}
|
1150 |
</Stack>
|
1151 |
-
|
1152 |
-
{isListener && !isSpeaker && (
|
1153 |
-
<Box sx={{marginBottom: 1, marginTop: 2}}>
|
1154 |
-
{xrDialogComponent}
|
1155 |
-
</Box>
|
1156 |
-
)}
|
1157 |
</div>
|
1158 |
|
1159 |
{debugParam && roomID != null && <DebugSection />}
|
|
|
6 |
import Select, {SelectChangeEvent} from '@mui/material/Select';
|
7 |
import MenuItem from '@mui/material/MenuItem';
|
8 |
import Stack from '@mui/material/Stack';
|
|
|
9 |
import {
|
10 |
AgentCapabilities,
|
11 |
BaseResponse,
|
|
|
730 |
/>
|
731 |
);
|
732 |
|
733 |
+
|
734 |
+
|
735 |
+
|
736 |
return (
|
737 |
<div className="app-wrapper-sra">
|
738 |
<Box
|
|
|
743 |
<div className="top-section-sra horizontal-padding-sra">
|
744 |
<div className="header-container-sra">
|
745 |
<img
|
746 |
+
src={"./logo.png"}
|
747 |
className="header-icon-sra"
|
748 |
+
alt="Hotdub Translation Logo"
|
749 |
height={24}
|
750 |
width={24}
|
751 |
/>
|
752 |
|
753 |
<div>
|
754 |
<Typography variant="h1" sx={{color: '#65676B'}}>
|
755 |
+
Hotdub Real Time Translation
|
756 |
</Typography>
|
757 |
</div>
|
758 |
</div>
|
759 |
<div className="header-container-sra">
|
760 |
<div>
|
761 |
+
<Typography variant="body2" sx={{ color: '#65676B' }}>
|
762 |
+
Welcome to our translation hub! Please note that this space is limited to one speaker at a time. If using the live translation space, sharing the room code to listeners on another IP address may not work due to different replicas. Use headphones if you are both the speaker and listener to prevent feedback.
|
763 |
+
<br />
|
764 |
+
<br />
|
765 |
+
Our Real-Time Translation model is a research model and is not released for production deployment. Please use a microphone with noise cancellation (e.g., a smartphone) to avoid model hallucination on noises. It works best if you pause every couple of sentences, or you may wish to adjust the VAD threshold in the model config. Real-time performance may degrade if you try streaming multiple speakers simultaneously.
|
766 |
+
</Typography>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
767 |
</div>
|
768 |
</div>
|
769 |
<Stack spacing="22px" direction="column">
|
|
|
831 |
value={model ?? ''}>
|
832 |
{agentsCapabilities.map((agent) => (
|
833 |
<MenuItem value={agent.name} key={agent.name}>
|
834 |
+
Hotdub Real Time Translation
|
835 |
</MenuItem>
|
836 |
))}
|
837 |
</Select>
|
|
|
1137 |
</>
|
1138 |
)}
|
1139 |
</Stack>
|
|
|
|
|
|
|
|
|
|
|
|
|
1140 |
</div>
|
1141 |
|
1142 |
{debugParam && roomID != null && <DebugSection />}
|
streaming-react-app/src/react-xr/XRConfig.tsx
CHANGED
@@ -248,7 +248,7 @@ function IntroPanel({started, setStarted}) {
|
|
248 |
]}
|
249 |
position={[xCoordinate, -0.1, -0.5]}>
|
250 |
<ThreeMeshUIText
|
251 |
-
content="FAIR
|
252 |
fontColor={BLACK}
|
253 |
/>
|
254 |
</block>
|
@@ -263,7 +263,7 @@ function IntroPanel({started, setStarted}) {
|
|
263 |
position={[xCoordinate, -0.15, -0.5001]}>
|
264 |
<ThreeMeshUIText
|
265 |
fontColor={BLACK}
|
266 |
-
content="Welcome to the
|
267 |
/>
|
268 |
</block>
|
269 |
<block
|
|
|
248 |
]}
|
249 |
position={[xCoordinate, -0.1, -0.5]}>
|
250 |
<ThreeMeshUIText
|
251 |
+
content="FAIR Hotdub Real time translation Demo"
|
252 |
fontColor={BLACK}
|
253 |
/>
|
254 |
</block>
|
|
|
263 |
position={[xCoordinate, -0.15, -0.5001]}>
|
264 |
<ThreeMeshUIText
|
265 |
fontColor={BLACK}
|
266 |
+
content="Welcome to the Hotdub team streaming demo experience! In this demo, you would experience AI powered text and audio translation in real time."
|
267 |
/>
|
268 |
</block>
|
269 |
<block
|
streaming-react-app/src/react-xr/XRDialog.tsx
CHANGED
@@ -45,7 +45,7 @@ function XRContent(props: XRConfigProps) {
|
|
45 |
dividers
|
46 |
className="xr-dialog-container xr-dialog-text-center">
|
47 |
<Typography gutterBottom>
|
48 |
-
Welcome to the
|
49 |
will experience AI powered text and audio translation in real time.
|
50 |
</Typography>
|
51 |
<div ref={canvasRef} className="xr-dialog-canvas-container" />
|
@@ -70,7 +70,7 @@ export default function XRDialog(props: XRConfigProps) {
|
|
70 |
{isDialogOpen && (
|
71 |
<Dialog onClose={() => setIsDialogOpen(false)} open={true}>
|
72 |
<DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
|
73 |
-
|
74 |
</DialogTitle>
|
75 |
<IconButton
|
76 |
aria-label="close"
|
|
|
45 |
dividers
|
46 |
className="xr-dialog-container xr-dialog-text-center">
|
47 |
<Typography gutterBottom>
|
48 |
+
Welcome to the Hotdub team streaming demo experience! In this demo you
|
49 |
will experience AI powered text and audio translation in real time.
|
50 |
</Typography>
|
51 |
<div ref={canvasRef} className="xr-dialog-canvas-container" />
|
|
|
70 |
{isDialogOpen && (
|
71 |
<Dialog onClose={() => setIsDialogOpen(false)} open={true}>
|
72 |
<DialogTitle sx={{m: 0, p: 2}} className="xr-dialog-text-center">
|
73 |
+
Hotdub Real time translation Demo
|
74 |
</DialogTitle>
|
75 |
<IconButton
|
76 |
aria-label="close"
|
streaming-react-app/src/types/StreamingTypes.ts
CHANGED
@@ -38,8 +38,6 @@ export const SUPPORTED_OUTPUT_MODES: Array<{
|
|
38 |
value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
|
39 |
label: string;
|
40 |
}> = [
|
41 |
-
{ value: 's2s&t', label: 'Text & Speech' },
|
42 |
-
{ value: 's2t', label: 'Text' },
|
43 |
{ value: 's2s', label: 'Speech' },
|
44 |
];
|
45 |
|
|
|
38 |
value: (typeof SUPPORTED_OUTPUT_MODE_VALUES)[number];
|
39 |
label: string;
|
40 |
}> = [
|
|
|
|
|
41 |
{ value: 's2s', label: 'Speech' },
|
42 |
];
|
43 |
|