MLX_GPT_OSS_120B / mlx-gpt-oss-120b /gpt_oss_120b_demo_final.py
TroglodyteDerivations's picture
Upload 48 files
c28358e verified
raw
history blame
25.2 kB
#!/usr/bin/env python3
"""
Comprehensive GPT-OSS-120B Demonstration with Output Saving
"""
from mlx_lm import load, generate
import logging
import re
import time
import json
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import plotly.graph_objects as go
import plotly.express as px
from collections import Counter
import numpy as np
from typing import List, Dict
import textwrap
import os
from datetime import datetime
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class GPTOSSDemo:
def __init__(self):
logger.info("πŸš€ Loading GPT-OSS-120B...")
self.model, self.tokenizer = load("mlx-community/gpt-oss-120b-MXFP4-Q4")
logger.info("βœ… Model loaded successfully!")
self.transcript = ""
self.timestamps = {}
self.timestamps_2 = {}
self.output_dir = f"gpt_oss_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
os.makedirs(self.output_dir, exist_ok=True)
logger.info(f"πŸ“ Output directory created: {self.output_dir}")
def save_output(self, content: str, filename: str):
"""Save content to a file in the output directory"""
filepath = os.path.join(self.output_dir, filename)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
logger.info(f"πŸ’Ύ Saved output to: {filepath}")
return filepath
def save_plotly_html(self, fig, filename: str):
"""Save a plotly figure as HTML"""
filepath = os.path.join(self.output_dir, filename)
fig.write_html(filepath)
logger.info(f"πŸ“Š Saved Plotly visualization to: {filepath}")
return filepath
def save_matplotlib_figure(self, fig, filename: str):
"""Save a matplotlib figure to file"""
filepath = os.path.join(self.output_dir, filename)
fig.savefig(filepath, bbox_inches='tight', dpi=300)
logger.info(f"πŸ“ˆ Saved matplotlib figure to: {filepath}")
return filepath
def load_data(self, transcript_path: str, timestamps_path: str = None, timestamps_2_path: str = None):
"""Load lecture data"""
try:
with open(transcript_path, 'r', encoding='utf-8') as f:
self.transcript = f.read()
logger.info(f"βœ… Loaded transcript: {len(self.transcript)} characters")
# Save transcript
self.save_output(self.transcript, "original_transcript.txt")
if timestamps_path:
with open(timestamps_path, 'r', encoding='utf-8') as f:
self.timestamps = json.load(f)
logger.info("βœ… Loaded timestamps data")
self.save_output(json.dumps(self.timestamps, indent=2), "timestamps_1.json")
if timestamps_2_path:
with open(timestamps_2_path, 'r', encoding='utf-8') as f:
self.timestamps_2 = json.load(f)
logger.info("βœ… Loaded timestamps_2 data")
self.save_output(json.dumps(self.timestamps_2, indent=2), "timestamps_2.json")
except Exception as e:
logger.error(f"❌ Error loading data: {e}")
raise
def extract_final_response(self, response: str) -> str:
"""Extract the final assistant response from the chat template"""
if "<|start|>assistant" in response:
parts = response.split("<|start|>assistant")
if len(parts) > 1:
final_part = parts[-1]
final_part = re.sub(r'<\|channel\|>[^<]+', '', final_part)
final_part = final_part.replace('<|message|>', '')
final_part = final_part.replace('<|end|>', '')
final_part = re.sub(r'<[^>]+>', '', final_part)
final_part = final_part.strip()
if final_part:
return final_part
cleaned = re.sub(r'<\|[^>]+\|>', '', response)
cleaned = re.sub(r'<[^>]+>', '', cleaned)
return cleaned.strip()
def generate_response(self, prompt: str, max_tokens: int = 2048) -> str:
"""Generate a response with proper formatting"""
try:
messages = [{"role": "user", "content": prompt}]
formatted_prompt = self.tokenizer.apply_chat_template(
messages, add_generation_prompt=True
)
response = generate(
self.model,
self.tokenizer,
prompt=formatted_prompt,
max_tokens=max_tokens,
verbose=False
)
return self.extract_final_response(response)
except Exception as e:
logger.error(f"Generation error: {e}")
return f"I encountered an error: {str(e)}"
def generate_tshirt_prompts(self):
"""Generate Flux1-Krea-dev graphic t-shirt prompts based on the lecture"""
print("\n" + "=" * 80)
print("πŸ‘• FLUX1-KREA-DEV T-SHIRT PROMPTS")
print("=" * 80)
prompt = f"""Create 3 graphic t-shirt design prompts for Flux1-Krea-dev based on Yuval Noah Harari's lecture
"Storytelling, Human Cooperation, and the Rise of AI" in London on June 11, 2025.
Each prompt should:
1. Include 1-2 powerful words that capture the essence of the lecture
2. Describe a visually striking design that represents the themes
3. Incorporate elements related to storytelling, AI, and human cooperation
4. Be suitable for printing on a t-shirt
Lecture themes: {self.transcript[:3000]}
Create 3 distinct prompts:
PROMPT 1:
Words:
Design:
PROMPT 2:
Words:
Design:
PROMPT 3:
Words:
Design: """
tshirt_prompts = self.generate_response(prompt, max_tokens=1024)
print(tshirt_prompts)
self.save_output(tshirt_prompts, "flux1_krea_dev_tshirt_prompts.txt")
# Generate additional minimalist versions
print("\n" + "-" * 40)
print("🎨 MINIMALIST T-SHIRT DESIGNS")
print("-" * 40)
minimalist_prompt = f"""Create 3 minimalist t-shirt design concepts based on Yuval Noah Harari's lecture.
Each should feature only 1-2 words that perfectly capture the essence of the lecture.
Lecture themes: {self.transcript[:2000]}
Design 1: [Word(s)] - [Brief explanation]
Design 2: [Word(s)] - [Brief explanation]
Design 3: [Word(s)] - [Brief explanation]"""
minimalist_designs = self.generate_response(minimalist_prompt, max_tokens=512)
print(minimalist_designs)
self.save_output(minimalist_designs, "minimalist_tshirt_designs.txt")
def generate_summaries(self):
"""Generate summaries of various lengths and save them"""
print("\n" + "=" * 80)
print("πŸ“ MULTI-LENGTH SUMMARIES")
print("=" * 80)
summary_lengths = [10, 150, 200, 250, 300]
all_summaries = []
for length in summary_lengths:
print(f"\nGenerating {length}-word summary...")
prompt = f"""Create a precise {length}-word summary of this lecture. Focus on key themes:
storytelling, AI risks/benefits, alignment problem, and human values.
Provide only the final concise summary without any additional commentary or word counting.
Transcript: {self.transcript[:6000]}
{length}-word summary:"""
summary = self.generate_response(prompt, max_tokens=500)
# Clean up the summary to remove any analysis or word counting text
clean_summary = re.sub(r'(analysis|count|words|draft|let\'s|must be exactly).*?summary:', '', summary, flags=re.IGNORECASE | re.DOTALL)
clean_summary = re.sub(r'now count words.*', '', clean_summary, flags=re.IGNORECASE | re.DOTALL)
clean_summary = re.sub(r'\d+ words.*', '', clean_summary)
clean_summary = clean_summary.strip()
print(f"βœ… {length}-word summary:")
print("-" * 60)
print(textwrap.fill(clean_summary, width=70))
print("-" * 60)
# Save individual summary
self.save_output(clean_summary, f"summary_{length}_words.txt")
all_summaries.append(f"{length}-word summary:\n{clean_summary}\n\n")
time.sleep(1)
# Save all summaries in one file
self.save_output("\n".join(all_summaries), "all_summaries.txt")
def create_visualizations(self):
"""Create various visualizations of the lecture content and save them as HTML"""
print("\n" + "=" * 80)
print("πŸ“Š DATA VISUALIZATIONS")
print("=" * 80)
# Word frequency analysis
words = re.findall(r'\b[a-zA-Z]{3,}\b', self.transcript.lower())
word_freq = Counter(words)
common_words = word_freq.most_common(500)
# Create Plotly bar chart and save as HTML
words, counts = zip(*common_words)
fig = px.bar(x=words, y=counts, title="Top 500 Words in Lecture")
self.save_plotly_html(fig, "word_frequency.html")
# Save word frequency data
freq_data = "\n".join([f"{word}: {count}" for word, count in common_words])
self.save_output(freq_data, "word_frequency_data.txt")
# Create word cloud with matplotlib (since Plotly doesn't have word cloud)
print("\nGenerating word cloud...")
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(self.transcript)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Lecture Content')
self.save_matplotlib_figure(plt, "word_cloud.png")
plt.close()
# Topic distribution visualization using Plotly
print("\nGenerating topic analysis...")
topics = ['AI', 'storytelling', 'ethics', 'risk', 'cooperation', 'trust', 'alignment']
topic_counts = {topic: self.transcript.lower().count(topic) for topic in topics}
fig = px.pie(values=list(topic_counts.values()), names=list(topic_counts.keys()),
title='Topic Distribution in Lecture')
self.save_plotly_html(fig, "topic_distribution.html")
# Save topic data
topic_data = "\n".join([f"{topic}: {count}" for topic, count in topic_counts.items()])
self.save_output(topic_data, "topic_data.txt")
def generate_debate(self):
"""Generate pro and con arguments about the lecture themes and save them"""
print("\n" + "=" * 80)
print("βš–οΈ DEBATE: AI DEVELOPMENT - PROS AND CONS")
print("=" * 80)
prompt = f"""Based on this lecture, create a structured debate with 5 strong arguments FOR
rapid AI development and 5 strong arguments AGAINST rapid AI development.
Format as two clear sections with compelling points.
Lecture content: {self.transcript[:8000]}
DEBATE STRUCTURE:
ARGUMENTS FOR RAPID AI DEVELOPMENT:
1.
2.
3.
4.
5.
ARGUMENTS AGAINST RAPID AI DEVELOPMENT:
1.
2.
3.
4.
5. """
debate = self.generate_response(prompt, max_tokens=1024)
print(debate)
self.save_output(debate, "ai_development_debate.txt")
def write_article(self):
"""Generate a professional article about the lecture and save it"""
print("\n" + "=" * 80)
print("πŸ“° PROFESSIONAL ARTICLE")
print("=" * 80)
prompt = f"""Write a comprehensive 500-word article suitable for a technology publication
about this Yuval Harari lecture. Include:
- Key themes discussed
- Importance of the alignment problem
- Societal implications of AI storytelling
- Expert perspectives from the lecture
- Future outlook
Transcript: {self.transcript[:10000]}
ARTICLE:"""
article = self.generate_response(prompt, max_tokens=1024)
print(article)
self.save_output(article, "professional_article.txt")
def write_editorial(self):
"""Generate an editorial opinion piece and save it"""
print("\n" + "=" * 80)
print("✍️ EDITORIAL OPINION")
print("=" * 80)
prompt = f"""Write a compelling editorial (400 words) expressing a strong viewpoint about
the issues raised in this lecture. Take a clear stance on AI regulation and development,
supporting your position with evidence from the lecture.
Key lecture points: {self.transcript[:5000]}
EDITORIAL:"""
editorial = self.generate_response(prompt, max_tokens=1024)
print(editorial)
self.save_output(editorial, "editorial_opinion.txt")
def generate_qna(self):
"""Generate questions and answers about the lecture and save them"""
print("\n" + "=" * 80)
print("❓ Q&A SESSION")
print("=" * 80)
prompt = f"""Create 10 insightful questions about this lecture with detailed answers.
Focus on the most important and controversial aspects.
Lecture content: {self.transcript[:6000]}
QUESTIONS AND ANSWERS:
1. Q:
A:
2. Q:
A:
[Continue for 10 questions]"""
qna = self.generate_response(prompt, max_tokens=4096)
print(qna)
self.save_output(qna, "qna_session.txt")
def is_whisper_format(self, data):
"""Check if the timestamp data is in OpenAI Whisper format"""
return 'segments' in data and isinstance(data['segments'], list) and len(data['segments']) > 0 and 'start' in data['segments'][0]
def convert_whisper_to_timeline(self, whisper_data):
"""Convert Whisper format to timeline format"""
timeline = {"sections": []}
for i, segment in enumerate(whisper_data.get('segments', [])):
start_time = segment.get('start', 0)
end_time = segment.get('end', 0)
text = segment.get('text', '').strip()
# Convert seconds to HH:MM:SS format
start_minutes, start_seconds = divmod(start_time, 60)
start_hours, start_minutes = divmod(start_minutes, 60)
start_str = f"{int(start_hours):02d}:{int(start_minutes):02d}:{int(start_seconds):02d}"
end_minutes, end_seconds = divmod(end_time, 60)
end_hours, end_minutes = divmod(end_minutes, 60)
end_str = f"{int(end_hours):02d}:{int(end_minutes):02d}:{int(end_seconds):02d}"
# Create a short topic from the text
topic = text[:50] + "..." if len(text) > 50 else text
if not topic:
topic = f"Segment {i+1}"
timeline["sections"].append({
"start_time": start_str,
"end_time": end_str,
"topic": topic,
"text": text
})
return timeline
def create_timeline_visualization_plotly(self, timestamps_data, title, filename):
"""Create timeline visualization using Plotly and save as HTML"""
if not timestamps_data or 'sections' not in timestamps_data:
return
# Extract data for visualization
segments = []
durations = []
labels = []
for i, section in enumerate(timestamps_data.get('sections', [])):
if 'start_time' in section and 'end_time' in section:
# Parse time strings to calculate duration
start_parts = section['start_time'].split(':')
end_parts = section['end_time'].split(':')
if len(start_parts) == 3 and len(end_parts) == 3:
start_sec = int(start_parts[0]) * 3600 + int(start_parts[1]) * 60 + int(start_parts[2])
end_sec = int(end_parts[0]) * 3600 + int(end_parts[1]) * 60 + int(end_parts[2])
duration = end_sec - start_sec
if duration > 0: # Only include segments with positive duration
segments.append(i)
durations.append(duration)
labels.append(f"Seg {i+1}")
if durations:
# Create pie chart for timeline 1
if "TIMELINE 1" in title:
fig = px.pie(values=durations, names=labels, title=f'{title} - Segment Durations')
self.save_plotly_html(fig, filename.replace('.txt', '_durations_pie.html'))
# Create bar chart for other timelines
else:
fig = px.bar(x=segments, y=durations, title=f'{title} - Segment Durations',
labels={'x': 'Segment Number', 'y': 'Duration (seconds)'})
fig.update_layout(xaxis=dict(tickvals=segments, ticktext=labels))
self.save_plotly_html(fig, filename.replace('.txt', '_durations.html'))
def create_timeline(self, timestamps_data, title, filename):
"""Create a visual timeline from timestamps data and save it"""
if not timestamps_data:
print(f"No timestamp data available for {title}")
return
print(f"\n⏰ {title}")
print("=" * 80)
# Check if data is in Whisper format and convert if needed
if self.is_whisper_format(timestamps_data):
print("Detected Whisper format - converting to timeline format")
timestamps_data = self.convert_whisper_to_timeline(timestamps_data)
# Extract timeline data
times = []
topics = []
full_texts = []
for section in timestamps_data.get('sections', []):
start_time = section.get('start_time', '00:00:00')
topic = section.get('topic', 'Unknown')
full_text = section.get('text', '')
times.append(start_time)
topics.append(topic)
full_texts.append(full_text)
# Create a text-based timeline
timeline_text = f"{title}\n\n"
for i, (time, topic, text) in enumerate(zip(times, topics, full_texts), 1):
timeline_text += f"{i}. {time} - {topic}\n"
if text:
timeline_text += f" Text: {text}\n"
timeline_text += "\n"
print(timeline_text)
self.save_output(timeline_text, filename)
# Create visualization using Plotly
self.create_timeline_visualization_plotly(timestamps_data, title, filename)
def create_timelines(self):
"""Create timelines for both timestamp files and save them"""
print("\n" + "=" * 80)
print("⏰ LECTURE TIMELINES")
print("=" * 80)
# Create timeline for first timestamp file
self.create_timeline(self.timestamps, "LECTURE TIMELINE 1", "timeline_1.txt")
# Create timeline for second timestamp file
self.create_timeline(self.timestamps_2, "LECTURE TIMELINE 2", "timeline_2.txt")
def generate_key_insights(self):
"""Generate key insights with visual representation and save them"""
print("\n" + "=" * 80)
print("πŸ’‘ KEY INSIGHTS ANALYSIS")
print("=" * 80)
prompt = f"""Extract the 7 most profound insights from this lecture. For each insight:
1. State the insight clearly
2. Explain its significance
3. Provide supporting evidence from the lecture
4. Rate its importance (1-10)
Lecture: {self.transcript[:8000]}
KEY INSIGHTS:"""
insights = self.generate_response(prompt, max_tokens=1024)
print(insights)
self.save_output(insights, "key_insights.txt")
# Create a radar chart of insight importance using Plotly
print("\nGenerating insights visualization...")
categories = ['Storytelling Power', 'AI Risks', 'Alignment Challenge',
'Ethical Frameworks', 'Human Cooperation', 'Trust Issues', 'Future Implications']
values = [8, 9, 9, 7, 8, 8, 9] # Example values
# Create radar chart with Plotly
fig = go.Figure(data=go.Scatterpolar(
r=values,
theta=categories,
fill='toself'
))
fig.update_layout(
polar=dict(radialaxis=dict(visible=True, range=[0, 10])),
title="Importance of Lecture Themes"
)
self.save_plotly_html(fig, "insights_radar_chart.html")
def generate_recommendations(self):
"""Generate policy and personal recommendations and save them"""
print("\n" + "=" * 80)
print("πŸ“‹ POLICY AND PERSONAL RECOMMENDATIONS")
print("=" * 80)
prompt = f"""Based on this lecture, create:
1. 5 policy recommendations for governments
2. 5 recommendations for AI companies
3. 5 personal actions individuals can take
4. 3 global cooperation initiatives needed
Lecture content: {self.transcript[:7000]}
RECOMMENDATIONS:"""
recommendations = self.generate_response(prompt, max_tokens=4096)
print(recommendations)
self.save_output(recommendations, "recommendations.txt")
def create_readme(self):
"""Create a README file with information about all generated content"""
readme_content = f"""# GPT-OSS-120B Analysis Output
## Analysis of Yuval Noah Harari Lecture on AI and Humanity
### Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
### Contents:
1. **Original Transcript** - The complete lecture transcript
2. **Multi-Length Summaries** - Summaries of various lengths (10-300 words)
3. **Data Visualizations** - Word frequency, word cloud, and topic distribution
4. **AI Development Debate** - Pros and cons of rapid AI development
5. **Professional Article** - Technology publication-style article
6. **Editorial Opinion** - Strong viewpoint on AI regulation
7. **Q&A Session** - 10 insightful questions with detailed answers
8. **Lecture Timelines** - Text and visual timelines of the lecture structure
9. **Key Insights** - 7 profound insights with significance and ratings
10. **Recommendations** - Policy, personal, and global cooperation recommendations
11. **T-Shirt Designs** - Flux1-Krea-dev graphic t-shirt prompts
### Visualization Files:
- HTML files: Interactive Plotly visualizations
- PNG files: Static images (word cloud)
### Model Information:
- Model: GPT-OSS-120B (4-bit quantized)
- Parameters: 120 billion
- Hardware: Apple M3 Ultra with 512GB RAM
### Analysis Themes:
- Storytelling as human differentiator
- AI risks and benefits
- Alignment problem
- Ethical frameworks
- Human cooperation and trust
- Future implications of AI
"""
self.save_output(readme_content, "README.md")
def run_comprehensive_demo(self):
"""Run the complete demonstration and save all outputs"""
print("πŸš€ Starting Comprehensive GPT-OSS-120B Demonstration")
print("πŸ’Ύ Model: 120B parameters, 4-bit quantized")
print("πŸ“š Analyzing: Yuval Noah Harari Lecture on AI and Humanity")
print("=" * 80)
# Load data
self.load_data(
"yuval_harari_lecture_transcript.txt",
"yuval_harari_lecture_timestamps.json",
"yuval_harari_lecture_timestamps_2.json"
)
# Run all demonstrations
demonstrations = [
self.generate_summaries,
self.create_visualizations,
self.generate_debate,
self.write_article,
self.write_editorial,
self.generate_qna,
self.create_timelines,
self.generate_key_insights,
self.generate_recommendations,
self.generate_tshirt_prompts,
self.create_readme
]
for demo in demonstrations:
try:
demo()
time.sleep(2)
except Exception as e:
logger.error(f"Error in demonstration: {e}")
continue
print(f"\nπŸŽ‰ All outputs saved to: {self.output_dir}")
print("πŸ“‹ Contents:")
for file in os.listdir(self.output_dir):
print(f" - {file}")
if __name__ == "__main__":
demo = GPTOSSDemo()
demo.run_comprehensive_demo()