Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
12ab49b
1
Parent(s):
487d997
docs: add js example
Browse files- examples/javascript/index.js +159 -0
examples/javascript/index.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
|
| 3 |
+
*/
|
| 4 |
+
import fs from 'fs';
|
| 5 |
+
import WebSocket from 'ws';
|
| 6 |
+
import fetch from 'node-fetch';
|
| 7 |
+
import FormData from 'form-data';
|
| 8 |
+
import path from 'path';
|
| 9 |
+
import ffmpeg from 'fluent-ffmpeg';
|
| 10 |
+
import dotenv from 'dotenv';
|
| 11 |
+
|
| 12 |
+
dotenv.config();
|
| 13 |
+
|
| 14 |
+
const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg';
|
| 15 |
+
ffmpeg.setFfmpegPath(ffmpegPath);
|
| 16 |
+
|
| 17 |
+
/**
|
| 18 |
+
* Transcribe an audio file using the HTTP endpoint.
|
| 19 |
+
* Supported file types include wav, mp3, webm, and other types supported by the OpenAI API.
|
| 20 |
+
* I have tested with these three types.
|
| 21 |
+
*
|
| 22 |
+
* @param {string} filePath - Path to the audio file
|
| 23 |
+
* @param {string} model - Model name
|
| 24 |
+
* @param {string} language - Language code
|
| 25 |
+
* @param {string} responseFormat - Response format
|
| 26 |
+
* @param {string} temperature - Temperature setting
|
| 27 |
+
*/
|
| 28 |
+
async function transcribeFile(filePath, model, language, responseFormat, temperature) {
|
| 29 |
+
const formData = new FormData();
|
| 30 |
+
formData.append('file', fs.createReadStream(filePath));
|
| 31 |
+
formData.append('model', model);
|
| 32 |
+
formData.append('language', language);
|
| 33 |
+
formData.append('response_format', responseFormat);
|
| 34 |
+
formData.append('temperature', temperature);
|
| 35 |
+
|
| 36 |
+
const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, {
|
| 37 |
+
method: 'POST',
|
| 38 |
+
body: formData,
|
| 39 |
+
});
|
| 40 |
+
|
| 41 |
+
const transcription = await response.json();
|
| 42 |
+
console.log('Transcription Response:', transcription);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/**
|
| 46 |
+
* Translate an audio file using the HTTP endpoint.
|
| 47 |
+
* Only English is supported for translation.
|
| 48 |
+
* Currently, I am using GLM-4-9b-int8 to translate various voices.
|
| 49 |
+
* I am not sure if the author can add an endpoint for custom API+Key translation.
|
| 50 |
+
* I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment.
|
| 51 |
+
*
|
| 52 |
+
* @param {string} filePath - Path to the audio file
|
| 53 |
+
* @param {string} model - Model name
|
| 54 |
+
* @param {string} responseFormat - Response format
|
| 55 |
+
* @param {string} temperature - Temperature setting
|
| 56 |
+
*/
|
| 57 |
+
async function translateFile(filePath, model, responseFormat, temperature) {
|
| 58 |
+
const formData = new FormData();
|
| 59 |
+
formData.append('file', fs.createReadStream(filePath));
|
| 60 |
+
formData.append('model', model);
|
| 61 |
+
formData.append('response_format', responseFormat);
|
| 62 |
+
formData.append('temperature', temperature);
|
| 63 |
+
|
| 64 |
+
const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, {
|
| 65 |
+
method: 'POST',
|
| 66 |
+
body: formData,
|
| 67 |
+
});
|
| 68 |
+
|
| 69 |
+
const translation = await response.json();
|
| 70 |
+
console.log('Translation Response:', translation);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/**
|
| 74 |
+
* Send audio data over WebSocket for transcription.
|
| 75 |
+
* Currently, the supported file type for transcription is PCM.
|
| 76 |
+
* I am not sure if other types are supported.
|
| 77 |
+
*
|
| 78 |
+
* @param {string} filePath - Path to the audio file
|
| 79 |
+
* @param {string} model - Model name
|
| 80 |
+
* @param {string} language - Language code
|
| 81 |
+
* @param {string} responseFormat - Response format
|
| 82 |
+
* @param {string} temperature - Temperature setting
|
| 83 |
+
*/
|
| 84 |
+
async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) {
|
| 85 |
+
const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`;
|
| 86 |
+
const ws = new WebSocket(wsUrl);
|
| 87 |
+
|
| 88 |
+
ws.on('open', async () => {
|
| 89 |
+
const audioBuffer = fs.readFileSync(filePath);
|
| 90 |
+
ws.send(audioBuffer);
|
| 91 |
+
});
|
| 92 |
+
|
| 93 |
+
ws.on('message', (message) => {
|
| 94 |
+
const response = JSON.parse(message);
|
| 95 |
+
console.log('WebSocket Response:', response);
|
| 96 |
+
});
|
| 97 |
+
|
| 98 |
+
ws.on('close', () => {
|
| 99 |
+
console.log('WebSocket connection closed');
|
| 100 |
+
});
|
| 101 |
+
|
| 102 |
+
ws.on('error', (error) => {
|
| 103 |
+
console.error('WebSocket error:', error);
|
| 104 |
+
});
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/**
|
| 108 |
+
* Convert audio file to PCM format.
|
| 109 |
+
*
|
| 110 |
+
* @param {string} filePath - Path to the audio file
|
| 111 |
+
* @returns {string} - Path to the converted PCM file
|
| 112 |
+
*/
|
| 113 |
+
async function convertToPcm(filePath) {
|
| 114 |
+
const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm');
|
| 115 |
+
|
| 116 |
+
await new Promise((resolve, reject) => {
|
| 117 |
+
ffmpeg(filePath)
|
| 118 |
+
.audioChannels(1)
|
| 119 |
+
.audioFrequency(16000)
|
| 120 |
+
.audioCodec('pcm_s16le')
|
| 121 |
+
.toFormat('s16le')
|
| 122 |
+
.on('end', () => {
|
| 123 |
+
console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`);
|
| 124 |
+
resolve(pcmFilePath);
|
| 125 |
+
})
|
| 126 |
+
.on('error', (error) => {
|
| 127 |
+
console.error(`Error converting audio to PCM: ${error.message}`);
|
| 128 |
+
reject(error);
|
| 129 |
+
})
|
| 130 |
+
.save(pcmFilePath);
|
| 131 |
+
});
|
| 132 |
+
|
| 133 |
+
return pcmFilePath;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
async function main() {
|
| 137 |
+
const model = 'Systran/faster-whisper-large-v3';
|
| 138 |
+
const language = 'en';
|
| 139 |
+
const responseFormat = 'json';
|
| 140 |
+
const temperature = '0';
|
| 141 |
+
const filePath = './path/to/your/audio.webm'; // Replace with the actual file path
|
| 142 |
+
|
| 143 |
+
// Convert the audio file to PCM format
|
| 144 |
+
const pcmFilePath = await convertToPcm(filePath);
|
| 145 |
+
|
| 146 |
+
// Transcribe the audio file using the HTTP endpoint
|
| 147 |
+
await transcribeFile(pcmFilePath, model, language, responseFormat, temperature);
|
| 148 |
+
|
| 149 |
+
// Translate the audio file using the HTTP endpoint
|
| 150 |
+
await translateFile(pcmFilePath, model, responseFormat, temperature);
|
| 151 |
+
|
| 152 |
+
// Transcribe the audio file using the WebSocket endpoint
|
| 153 |
+
await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature);
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x.
|
| 157 |
+
main().catch(console.error);
|
| 158 |
+
|
| 159 |
+
// Project URL: https://github.com/Gan-Xing/whisper
|