Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import os
|
4 |
+
import gradio as gr
|
5 |
+
import google.generativeai as genai
|
6 |
+
from gtts import gTTS
|
7 |
+
import io
|
8 |
+
from PIL import Image
|
9 |
+
import httpx
|
10 |
+
import base64
|
11 |
+
|
12 |
+
# Configure Google AI API
|
13 |
+
def configure_google_ai():
|
14 |
+
# Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY)
|
15 |
+
GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE"
|
16 |
+
|
17 |
+
if not GOOGLE_API_KEY:
|
18 |
+
raise ValueError("No API key found. Please provide a valid Google AI API key.")
|
19 |
+
|
20 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
21 |
+
|
22 |
+
# Image-to-text function using Gemini
|
23 |
+
def img2txt(image):
|
24 |
+
"""Generate a description for the uploaded image using Gemini 1.5 Pro"""
|
25 |
+
configure_google_ai()
|
26 |
+
|
27 |
+
try:
|
28 |
+
# Choose Gemini model
|
29 |
+
model = genai.GenerativeModel('gemini-exp-1121')
|
30 |
+
|
31 |
+
# Ensure image is in PIL format
|
32 |
+
if not isinstance(image, Image.Image):
|
33 |
+
image = Image.fromarray(image)
|
34 |
+
|
35 |
+
# Convert image to bytes
|
36 |
+
image_bytes = io.BytesIO()
|
37 |
+
image.save(image_bytes, format='PNG')
|
38 |
+
image_bytes.seek(0)
|
39 |
+
|
40 |
+
# Create prompt and generate content
|
41 |
+
prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements."
|
42 |
+
response = model.generate_content(
|
43 |
+
contents=[
|
44 |
+
{
|
45 |
+
'mime_type': 'image/png',
|
46 |
+
'data': image_bytes.getvalue()
|
47 |
+
},
|
48 |
+
prompt
|
49 |
+
],
|
50 |
+
generation_config=genai.types.GenerationConfig(
|
51 |
+
max_output_tokens=200,
|
52 |
+
temperature=0.7
|
53 |
+
)
|
54 |
+
)
|
55 |
+
|
56 |
+
return response.text.strip()
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Error in image description: {e}")
|
60 |
+
return "Error processing image. Please try again."
|
61 |
+
|
62 |
+
# Text-to-story generation function using Gemini
|
63 |
+
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending):
|
64 |
+
"""Generate a story using Gemini 1.5 Flash"""
|
65 |
+
configure_google_ai()
|
66 |
+
|
67 |
+
try:
|
68 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
69 |
+
|
70 |
+
# Enhanced story generation prompt
|
71 |
+
full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications:
|
72 |
+
|
73 |
+
Story Framework:
|
74 |
+
- Genre: {genre}
|
75 |
+
- Setting: {setting} in {continent}
|
76 |
+
- Narrative Tone: {tone}
|
77 |
+
- Central Theme: {theme}
|
78 |
+
- Primary Conflict: {conflict}
|
79 |
+
- Narrative Twist: {twist}
|
80 |
+
- Story Ending: {ending} ending
|
81 |
+
|
82 |
+
Comprehensive Storytelling Guidelines:
|
83 |
+
1. Story Structure:
|
84 |
+
- Develop a multi-layered narrative with rich character development
|
85 |
+
- Create intricate plot progression with meaningful character arcs
|
86 |
+
- Integrate deep psychological and emotional dimensions
|
87 |
+
- Explore nuanced motivations and complex interpersonal dynamics
|
88 |
+
|
89 |
+
2. Narrative Depth:
|
90 |
+
- Provide comprehensive background context
|
91 |
+
- Develop multiple plot layers and subplots
|
92 |
+
- Include detailed character histories and motivations
|
93 |
+
- Demonstrate cause-and-effect relationships between events
|
94 |
+
|
95 |
+
3. Thematic Exploration:
|
96 |
+
- Deeply explore the chosen theme of {theme}
|
97 |
+
- Use symbolism and metaphorical elements
|
98 |
+
- Connect character experiences to broader philosophical or existential questions
|
99 |
+
- Demonstrate subtle and profound insights into human nature
|
100 |
+
|
101 |
+
4. Stylistic Elements:
|
102 |
+
- Use vivid, evocative language
|
103 |
+
- Create immersive sensory descriptions
|
104 |
+
- Balance dialogue, internal monologue, and narrative exposition
|
105 |
+
- Maintain consistent narrative voice
|
106 |
+
|
107 |
+
5. Emotional Complexity:
|
108 |
+
- Portray nuanced emotional landscapes
|
109 |
+
- Show character growth and transformation
|
110 |
+
- Create moments of genuine emotional resonance
|
111 |
+
- Balance tension, conflict, and moments of reflection
|
112 |
+
|
113 |
+
Story Prompt Details: {prompt}
|
114 |
+
|
115 |
+
Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution.
|
116 |
+
|
117 |
+
Expected Outcome:
|
118 |
+
- Comprehensive narrative arc
|
119 |
+
- Fully developed characters
|
120 |
+
- Profound thematic exploration
|
121 |
+
- Engaging and immersive storytelling
|
122 |
+
- Meaningful resolution that resonates with the story's core themes
|
123 |
+
"""
|
124 |
+
|
125 |
+
response = model.generate_content(
|
126 |
+
contents=full_prompt,
|
127 |
+
generation_config=genai.types.GenerationConfig(
|
128 |
+
max_output_tokens=1000,
|
129 |
+
temperature=1.7,
|
130 |
+
top_p=0.9, # More diverse word selection
|
131 |
+
top_k=40 # Broader vocabulary range
|
132 |
+
)
|
133 |
+
)
|
134 |
+
|
135 |
+
return response.text.strip()
|
136 |
+
|
137 |
+
except Exception as e:
|
138 |
+
print(f"Error generating story: {e}")
|
139 |
+
return "Error generating story. Please try again."
|
140 |
+
|
141 |
+
# Text-to-speech function
|
142 |
+
def txt2speech(text):
|
143 |
+
"""Convert text to speech and save to a file"""
|
144 |
+
tts = gTTS(text=text, lang='en')
|
145 |
+
audio_path = "story_audio.mp3"
|
146 |
+
tts.save(audio_path)
|
147 |
+
return audio_path
|
148 |
+
|
149 |
+
# Main generation function
|
150 |
+
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending):
|
151 |
+
"""Main function to generate story and audio from image"""
|
152 |
+
# Ensure image is provided
|
153 |
+
if image is None:
|
154 |
+
return "", "", None
|
155 |
+
|
156 |
+
# Generate image description
|
157 |
+
image_description = img2txt(image)
|
158 |
+
|
159 |
+
# Generate story
|
160 |
+
story = txt2story(
|
161 |
+
prompt=image_description,
|
162 |
+
genre=genre,
|
163 |
+
setting=setting,
|
164 |
+
continent=continent,
|
165 |
+
tone=tone,
|
166 |
+
theme=theme,
|
167 |
+
conflict=conflict,
|
168 |
+
twist=twist,
|
169 |
+
ending=ending
|
170 |
+
)
|
171 |
+
|
172 |
+
# Generate audio
|
173 |
+
audio = txt2speech(story)
|
174 |
+
|
175 |
+
return image_description, story, audio
|
176 |
+
|
177 |
+
# Gradio interface setup
|
178 |
+
def create_gradio_app():
|
179 |
+
# Dropdown options
|
180 |
+
genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"]
|
181 |
+
setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"]
|
182 |
+
continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"]
|
183 |
+
tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"]
|
184 |
+
theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"]
|
185 |
+
conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]
|
186 |
+
twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]
|
187 |
+
ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"]
|
188 |
+
|
189 |
+
# Create Gradio interface
|
190 |
+
demo = gr.Interface(
|
191 |
+
fn=generate_story,
|
192 |
+
inputs=[
|
193 |
+
gr.Image(type="pil", label="Upload Image"),
|
194 |
+
gr.Dropdown(genre_opts, label="Genre"),
|
195 |
+
gr.Dropdown(setting_opts, label="Setting"),
|
196 |
+
gr.Dropdown(continent_opts, label="Continent"),
|
197 |
+
gr.Dropdown(tone_opts, label="Tone"),
|
198 |
+
gr.Dropdown(theme_opts, label="Theme"),
|
199 |
+
gr.Dropdown(conflict_opts, label="Conflict Type"),
|
200 |
+
gr.Dropdown(twist_opts, label="Mystery/Twist"),
|
201 |
+
gr.Dropdown(ending_opts, label="Ending")
|
202 |
+
],
|
203 |
+
outputs=[
|
204 |
+
gr.Textbox(label="Image Description"),
|
205 |
+
gr.Textbox(label="Generated Story"),
|
206 |
+
gr.Audio(label="Story Audio")
|
207 |
+
],
|
208 |
+
title="🎨 Image to Story Generator 📖",
|
209 |
+
description="Upload an image and generate a unique story!"
|
210 |
+
)
|
211 |
+
|
212 |
+
return demo
|
213 |
+
|
214 |
+
# Launch the app
|
215 |
+
if __name__ == "__main__":
|
216 |
+
demo = create_gradio_app()
|
217 |
+
demo.launch(debug=True)
|