ngxson HF staff commited on
Commit
1538aa3
·
1 Parent(s): ba463b3

inference provider ok

Browse files
front/package-lock.json CHANGED
@@ -10,6 +10,7 @@
10
  "dependencies": {
11
  "@gradio/client": "^1.12.0",
12
  "@huggingface/hub": "^1.0.1",
 
13
  "@sec-ant/readable-stream": "^0.6.0",
14
  "autoprefixer": "^10.4.20",
15
  "base64-arraybuffer": "^1.0.2",
@@ -967,6 +968,18 @@
967
  "node": ">=18"
968
  }
969
  },
 
 
 
 
 
 
 
 
 
 
 
 
970
  "node_modules/@huggingface/tasks": {
971
  "version": "0.15.9",
972
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.15.9.tgz",
 
10
  "dependencies": {
11
  "@gradio/client": "^1.12.0",
12
  "@huggingface/hub": "^1.0.1",
13
+ "@huggingface/inference": "^3.3.4",
14
  "@sec-ant/readable-stream": "^0.6.0",
15
  "autoprefixer": "^10.4.20",
16
  "base64-arraybuffer": "^1.0.2",
 
968
  "node": ">=18"
969
  }
970
  },
971
+ "node_modules/@huggingface/inference": {
972
+ "version": "3.3.4",
973
+ "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-3.3.4.tgz",
974
+ "integrity": "sha512-IMTaZelduC6xywmm124NgpcnFZ1jPjdUNZgEUuigGneOiwnzWVBhrFt35Gz/oOSqHiQB1tTZG29v7oCJNR2Fog==",
975
+ "license": "MIT",
976
+ "dependencies": {
977
+ "@huggingface/tasks": "^0.15.9"
978
+ },
979
+ "engines": {
980
+ "node": ">=18"
981
+ }
982
+ },
983
  "node_modules/@huggingface/tasks": {
984
  "version": "0.15.9",
985
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.15.9.tgz",
front/package.json CHANGED
@@ -13,6 +13,7 @@
13
  "dependencies": {
14
  "@gradio/client": "^1.12.0",
15
  "@huggingface/hub": "^1.0.1",
 
16
  "@sec-ant/readable-stream": "^0.6.0",
17
  "autoprefixer": "^10.4.20",
18
  "base64-arraybuffer": "^1.0.2",
 
13
  "dependencies": {
14
  "@gradio/client": "^1.12.0",
15
  "@huggingface/hub": "^1.0.1",
16
+ "@huggingface/inference": "^3.3.4",
17
  "@sec-ant/readable-stream": "^0.6.0",
18
  "autoprefixer": "^10.4.20",
19
  "base64-arraybuffer": "^1.0.2",
front/src/App.tsx CHANGED
@@ -36,6 +36,7 @@ function App() {
36
  setScript={setGeneratedScript}
37
  setBusy={setBusy}
38
  busy={busy}
 
39
  />
40
 
41
  <PodcastGenerator
 
36
  setScript={setGeneratedScript}
37
  setBusy={setBusy}
38
  busy={busy}
39
+ hfToken={hfToken}
40
  />
41
 
42
  <PodcastGenerator
front/src/components/AuthCard.tsx CHANGED
@@ -28,7 +28,7 @@ export const AuthCard = ({
28
  console.log('oauthHandleRedirectIfPresent', res);
29
  if (res) {
30
  try {
31
- const myself = whoAmI({ accessToken: res.accessToken });
32
  console.log('myself', myself);
33
  } catch (e) {
34
  console.log(e);
 
28
  console.log('oauthHandleRedirectIfPresent', res);
29
  if (res) {
30
  try {
31
+ const myself = await whoAmI({ accessToken: res.accessToken });
32
  console.log('myself', myself);
33
  } catch (e) {
34
  console.log(e);
front/src/components/ScriptMaker.tsx CHANGED
@@ -1,8 +1,9 @@
1
  import { useEffect, useState } from 'react';
2
  import { CONFIG } from '../config';
3
  import { getPromptGeneratePodcastScript } from '../utils/prompts';
4
- import { getSSEStreamAsync } from '../utils/utils';
5
  import { EXAMPLES } from '../examples';
 
6
 
7
  interface SplitContent {
8
  thought: string;
@@ -30,11 +31,19 @@ export const ScriptMaker = ({
30
  setScript,
31
  setBusy,
32
  busy,
 
33
  }: {
34
  setScript: (script: string) => void;
35
  setBusy: (busy: boolean) => void;
36
  busy: boolean;
 
37
  }) => {
 
 
 
 
 
 
38
  const [input, setInput] = useState<string>('');
39
  const [note, setNote] = useState<string>('');
40
  const [thought, setThought] = useState<string>('');
@@ -49,12 +58,15 @@ export const ScriptMaker = ({
49
  setThought('');
50
  try {
51
  let responseContent = '';
 
52
  const fetchResponse = await fetch(CONFIG.llmEndpoint, {
53
  method: 'POST',
54
  headers: {
55
  'Content-Type': 'application/json',
 
56
  },
57
  body: JSON.stringify({
 
58
  messages: [
59
  {
60
  role: 'user',
@@ -63,18 +75,33 @@ export const ScriptMaker = ({
63
  ],
64
  temperature: 0.3,
65
  stream: true,
 
66
  }),
67
  });
68
  if (fetchResponse.status !== 200) {
69
  const body = await fetchResponse.json();
70
- throw new Error(body?.error?.message || 'Unknown error');
71
  }
72
  const chunks = getSSEStreamAsync(fetchResponse);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  for await (const chunk of chunks) {
74
  // const stop = chunk.stop;
75
- if (chunk.error) {
76
- throw new Error(chunk.error?.message || 'Unknown error');
77
- }
78
  const addedContent = chunk.choices[0].delta.content;
79
  responseContent += addedContent;
80
  const { thought, codeBlock } = splitContent(responseContent);
@@ -85,7 +112,7 @@ export const ScriptMaker = ({
85
  }
86
  } catch (error) {
87
  console.error(error);
88
- alert('Failed to generate the script. Please try again.');
89
  }
90
  setIsGenerating(false);
91
  };
@@ -124,6 +151,7 @@ export const ScriptMaker = ({
124
  onChange={(e) => setInput(e.target.value)}
125
  disabled={isGenerating || busy}
126
  ></textarea>
 
127
  <textarea
128
  className="textarea textarea-bordered w-full h-24 p-2"
129
  placeholder="Optional note (the theme, tone, etc)..."
@@ -132,6 +160,29 @@ export const ScriptMaker = ({
132
  disabled={isGenerating || busy}
133
  ></textarea>
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  {thought.length > 0 && (
136
  <>
137
  <p>Thought process:</p>
 
1
  import { useEffect, useState } from 'react';
2
  import { CONFIG } from '../config';
3
  import { getPromptGeneratePodcastScript } from '../utils/prompts';
4
+ //import { getSSEStreamAsync } from '../utils/utils';
5
  import { EXAMPLES } from '../examples';
6
+ import { HfInference } from '@huggingface/inference';
7
 
8
  interface SplitContent {
9
  thought: string;
 
31
  setScript,
32
  setBusy,
33
  busy,
34
+ hfToken,
35
  }: {
36
  setScript: (script: string) => void;
37
  setBusy: (busy: boolean) => void;
38
  busy: boolean;
39
+ hfToken: string;
40
  }) => {
41
+ const [model, setModel] = useState<string>(CONFIG.inferenceProviderModels[0]);
42
+ const [customModel, setCustomModel] = useState<string>(
43
+ CONFIG.inferenceProviderModels[0]
44
+ );
45
+ const usingModel = model === 'custom' ? customModel : model;
46
+
47
  const [input, setInput] = useState<string>('');
48
  const [note, setNote] = useState<string>('');
49
  const [thought, setThought] = useState<string>('');
 
58
  setThought('');
59
  try {
60
  let responseContent = '';
61
+ /*
62
  const fetchResponse = await fetch(CONFIG.llmEndpoint, {
63
  method: 'POST',
64
  headers: {
65
  'Content-Type': 'application/json',
66
+ 'Authorization': `Bearer ${hfToken}`,
67
  },
68
  body: JSON.stringify({
69
+ model: usingModel,
70
  messages: [
71
  {
72
  role: 'user',
 
75
  ],
76
  temperature: 0.3,
77
  stream: true,
78
+ provider: CONFIG.inferenceProvider,
79
  }),
80
  });
81
  if (fetchResponse.status !== 200) {
82
  const body = await fetchResponse.json();
83
+ throw new Error(body?.error?.message || body?.error || 'Unknown error');
84
  }
85
  const chunks = getSSEStreamAsync(fetchResponse);
86
+ */
87
+ const client = new HfInference(hfToken);
88
+ const chunks = client.chatCompletionStream({
89
+ model: usingModel,
90
+ messages: [
91
+ {
92
+ role: 'user',
93
+ content: getPromptGeneratePodcastScript(input, note),
94
+ },
95
+ ],
96
+ temperature: 0.3,
97
+ stream: true,
98
+ provider: CONFIG.inferenceProvider,
99
+ });
100
  for await (const chunk of chunks) {
101
  // const stop = chunk.stop;
102
+ //if (chunk.error) {
103
+ // throw new Error(chunk.error?.message || 'Unknown error');
104
+ //}
105
  const addedContent = chunk.choices[0].delta.content;
106
  responseContent += addedContent;
107
  const { thought, codeBlock } = splitContent(responseContent);
 
112
  }
113
  } catch (error) {
114
  console.error(error);
115
+ alert(`ERROR: ${error}`);
116
  }
117
  setIsGenerating(false);
118
  };
 
151
  onChange={(e) => setInput(e.target.value)}
152
  disabled={isGenerating || busy}
153
  ></textarea>
154
+
155
  <textarea
156
  className="textarea textarea-bordered w-full h-24 p-2"
157
  placeholder="Optional note (the theme, tone, etc)..."
 
160
  disabled={isGenerating || busy}
161
  ></textarea>
162
 
163
+ <select
164
+ className="select select-bordered"
165
+ value={model}
166
+ onChange={(e) => setModel(e.target.value)}
167
+ >
168
+ {CONFIG.inferenceProviderModels.map((s) => (
169
+ <option key={s} value={s}>
170
+ {s}
171
+ </option>
172
+ ))}
173
+ <option value="custom">Custom</option>
174
+ </select>
175
+
176
+ {model === 'custom' && (
177
+ <input
178
+ type="text"
179
+ placeholder="Use a custom model from HF Hub (must be supported by Inference Providers)"
180
+ className="input input-bordered w-full"
181
+ value={customModel}
182
+ onChange={(e) => setCustomModel(e.target.value)}
183
+ />
184
+ )}
185
+
186
  {thought.length > 0 && (
187
  <>
188
  <p>Thought process:</p>
front/src/config.ts CHANGED
@@ -7,8 +7,16 @@ const LLM_ENDPOINT = window.huggingface?.variables?.LLM_ENDPOINT;
7
  export const CONFIG = {
8
  llmEndpoint:
9
  LLM_ENDPOINT ||
10
- 'https://gnb1thady6h3noiz.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions',
11
  ttsSpaceId: TTS_SPACE_ID || 'ngxson/kokoro-podcast-backend',
 
 
 
 
 
 
 
 
12
  };
13
 
14
  console.log({ CONFIG });
 
7
  export const CONFIG = {
8
  llmEndpoint:
9
  LLM_ENDPOINT ||
10
+ 'https://router.huggingface.co/hf-inference/v1/chat/completions',
11
  ttsSpaceId: TTS_SPACE_ID || 'ngxson/kokoro-podcast-backend',
12
+ inferenceProviderModels: [
13
+ //'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
14
+ //'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
15
+ //'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
16
+ 'deepseek-ai/DeepSeek-R1',
17
+ 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
18
+ ],
19
+ inferenceProvider: 'together',
20
  };
21
 
22
  console.log({ CONFIG });
index.html CHANGED
The diff for this file is too large to render. See raw diff