dawood HF Staff commited on
Commit
9d219fb
·
verified ·
1 Parent(s): 09091ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -58,10 +58,12 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
58
  return GeminiHandler()
59
 
60
  async def start_up(self):
 
 
61
  client = genai.Client(
62
- api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
63
  )
64
- config = {"response_modalities": ["AUDIO"]}
65
  async with client.aio.live.connect(
66
  model="gemini-2.0-flash-exp",
67
  config=config, # type: ignore
@@ -87,8 +89,8 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
87
  if time.time() - self.last_frame_time > 1:
88
  self.last_frame_time = time.time()
89
  await self.session.send(input=encode_image(frame))
90
- if self.latest_args[1] is not None:
91
- await self.session.send(input=encode_image(self.latest_args[1]))
92
 
93
  async def video_emit(self):
94
  frame = await wait_for_item(self.video_queue, 0.01)
@@ -121,10 +123,16 @@ stream = Stream(
121
  handler=GeminiHandler(),
122
  modality="audio-video",
123
  mode="send-receive",
124
- rtc_configuration=get_cloudflare_turn_credentials_async,
125
  time_limit=180 if get_space() else None,
126
  additional_inputs=[
127
- gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
 
 
 
 
 
 
128
  ],
129
  ui_args={
130
  "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
@@ -135,9 +143,4 @@ stream = Stream(
135
  )
136
 
137
  if __name__ == "__main__":
138
- if (mode := os.getenv("MODE")) == "UI":
139
- stream.ui.launch(server_port=7860)
140
- elif mode == "PHONE":
141
- raise ValueError("Phone mode not supported for this demo")
142
- else:
143
- stream.ui.launch(server_port=7860)
 
58
  return GeminiHandler()
59
 
60
  async def start_up(self):
61
+ await self.wait_for_args()
62
+ api_key = self.latest_args[3]
63
  client = genai.Client(
64
+ api_key=api_key, http_options={"api_version": "v1alpha"}
65
  )
66
+ config = {"response_modalities": ["AUDIO"], "system_instruction": "You are an art history teacher that will describe the artwork passed in as an image to the user. Describe the history and significance of the artwork."}
67
  async with client.aio.live.connect(
68
  model="gemini-2.0-flash-exp",
69
  config=config, # type: ignore
 
89
  if time.time() - self.last_frame_time > 1:
90
  self.last_frame_time = time.time()
91
  await self.session.send(input=encode_image(frame))
92
+ if self.latest_args[2] is not None:
93
+ await self.session.send(input=encode_image(self.latest_args[2]))
94
 
95
  async def video_emit(self):
96
  frame = await wait_for_item(self.video_queue, 0.01)
 
123
  handler=GeminiHandler(),
124
  modality="audio-video",
125
  mode="send-receive",
126
+ rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
127
  time_limit=180 if get_space() else None,
128
  additional_inputs=[
129
+ gr.Markdown(
130
+ "## 🎨 Art History Teacher\n\n"
131
+ "Provide an image of the artwork and Gemini will describe it to you."
132
+ "To get a Gemini API key, please visit the [Gemini API Key](https://console.cloud.google.com/apis/api/generativelanguage.googleapis.com/credentials) page."
133
+ ),
134
+ gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"]),
135
+ gr.Textbox(label="Gemini API Key"),
136
  ],
137
  ui_args={
138
  "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
 
143
  )
144
 
145
  if __name__ == "__main__":
146
+ stream.ui.launch()