import gradio as gr import os import json import time from typing import List, Dict, Optional, Tuple import logging # Import the existing app components from app.models import ResearchGoal, ContextMemory from app.agents import SupervisorAgent from app.utils import logger, is_huggingface_space, get_deployment_environment, filter_free_models from app.tools.arxiv_search import ArxivSearchTool import requests # Global state for the Gradio app global_context = ContextMemory() supervisor = SupervisorAgent() current_research_goal: Optional[ResearchGoal] = None available_models: List[str] = [] # Configure logging for Gradio logging.basicConfig(level=logging.INFO) def fetch_available_models(): """Fetch available models from OpenRouter with environment-based filtering.""" global available_models # Detect deployment environment deployment_env = get_deployment_environment() is_hf_spaces = is_huggingface_space() logger.info(f"Detected deployment environment: {deployment_env}") logger.info(f"Is Hugging Face Spaces: {is_hf_spaces}") try: response = requests.get("https://openrouter.ai/api/v1/models", timeout=10) response.raise_for_status() models_data = response.json().get("data", []) # Extract all model IDs all_models = sorted([model.get("id") for model in models_data if model.get("id")]) # Create filtered free models list free_models = filter_free_models(all_models) # Apply filtering based on environment if is_hf_spaces: # Use only free models for Hugging Face Spaces available_models = free_models logger.info(f"Hugging Face Spaces: Filtered to {len(available_models)} free models") else: # Use all models in local/development environment available_models = all_models logger.info(f"Local/Development: Using all {len(available_models)} models") except Exception as e: logger.error(f"Failed to fetch models from OpenRouter: {e}") # Fallback to safe defaults if is_hf_spaces: # Use a known free model as fallback available_models = ["google/gemini-2.0-flash-001:free"] else: available_models = ["google/gemini-2.0-flash-001"] return available_models def get_deployment_status(): """Get deployment status information.""" deployment_env = get_deployment_environment() is_hf_spaces = is_huggingface_space() if is_hf_spaces: status = f"🚀 Running in {deployment_env} | Models filtered for cost control ({len(available_models)} available)" color = "orange" else: status = f"💻 Running in {deployment_env} | All models available ({len(available_models)} total)" color = "blue" return status, color def set_research_goal( description: str, llm_model: str = None, num_hypotheses: int = 3, generation_temperature: float = 0.7, reflection_temperature: float = 0.5, elo_k_factor: int = 32, top_k_hypotheses: int = 2 ) -> Tuple[str, str]: """Set the research goal and initialize the system.""" global current_research_goal, global_context if not description.strip(): return "❌ Error: Please enter a research goal.", "" try: # Create research goal with settings current_research_goal = ResearchGoal( description=description.strip(), constraints={}, llm_model=llm_model if llm_model and llm_model != "-- Select Model --" else None, num_hypotheses=num_hypotheses, generation_temperature=generation_temperature, reflection_temperature=reflection_temperature, elo_k_factor=elo_k_factor, top_k_hypotheses=top_k_hypotheses ) # Reset context global_context = ContextMemory() logger.info(f"Research goal set: {description}") logger.info(f"Settings: model={current_research_goal.llm_model}, num={current_research_goal.num_hypotheses}") status_msg = f"✅ Research goal set successfully!\n\n**Goal:** {description}\n**Model:** {current_research_goal.llm_model or 'Default'}\n**Hypotheses per cycle:** {num_hypotheses}" return status_msg, "Ready to run first cycle. Click 'Run Cycle' to begin." except Exception as e: error_msg = f"❌ Error setting research goal: {str(e)}" logger.error(error_msg) return error_msg, "" def run_cycle() -> Tuple[str, str, str]: """Run a single research cycle with detailed step logging for debugging.""" import datetime global current_research_goal, global_context, supervisor if not current_research_goal: return "❌ Error: No research goal set. Please set a research goal first.", "", "" # Prepare log file log_dir = "results" os.makedirs(log_dir, exist_ok=True) timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") log_file = os.path.join(log_dir, f"app_log_{timestamp}.txt") with open(log_file, "w") as f: f.write(f"LOGGING FOR THIS GOAL: {current_research_goal.description}\n") f.write(f"--- Endpoint /run_cycle START ---\n") try: iteration = global_context.iteration_number + 1 logger.info(f"Running cycle {iteration}") # Run the cycle cycle_details = supervisor.run_cycle(current_research_goal, global_context) # Log all steps and hypotheses steps = cycle_details.get("steps", {}) with open(log_file, "a") as f: for step_name, step_data in steps.items(): hypos = step_data.get("hypotheses", []) f.write(f"Step: {step_name} | {len(hypos)} hypotheses\n") for h in hypos: f.write(f" - ID: {h.get('id')} | Title: {h.get('title')} | Elo: {h.get('elo_score', 'N/A')}\n") # Format results for display (also logs final rankings) results_html = format_cycle_results(cycle_details, log_file=log_file) # Get references references_html = get_references_html(cycle_details) # Status message status_msg = f"✅ Cycle {iteration} completed successfully! Log: {log_file}" return status_msg, results_html, references_html except Exception as e: error_msg = f"❌ Error during cycle execution: {str(e)}" logger.error(error_msg, exc_info=True) return error_msg, "", "" def format_cycle_results(cycle_details: Dict, log_file: str = None) -> str: """Format cycle results as HTML with expandable sections. Optionally log final rankings to log_file.""" html = f"
Generated {len(hypotheses)} new hypotheses:
" for i, hypo in enumerate(hypotheses): html += f"""{hypo.get('text', 'No description')}
Reviewed {len(hypotheses)} hypotheses:
" for hypo in hypotheses: html += f"""Novelty: {hypo.get('novelty_review', 'Not assessed')} | Feasibility: {hypo.get('feasibility_review', 'Not assessed')}
{f"Comments: {hypo.get('comments', 'No comments')}
" if hypo.get('comments') else ""}Ranking results ({len(hypotheses)} hypotheses):
" html += "Evolved {len(hypotheses)} new hypotheses by combining top performers:
" for hypo in hypotheses: html += f"""{hypo.get('text', 'No description')}
Similarity Analysis:
" html += f"Analyzed relationships between {num_hypotheses} hypotheses
" # Calculate and display average similarity all_similarities = [] for hypo_id, connections in adjacency_graph.items(): for conn in connections: all_similarities.append(conn.get('similarity', 0)) if all_similarities: avg_sim = sum(all_similarities) / len(all_similarities) html += f"Average similarity: {avg_sim:.3f}
" html += f"Total connections analyzed: {len(all_similarities)}
" # Show top similar pairs similarity_pairs = [] for hypo_id, connections in adjacency_graph.items(): for conn in connections: similarity_pairs.append((hypo_id, conn.get('other_id'), conn.get('similarity', 0))) # Sort by similarity and show top 5 similarity_pairs.sort(key=lambda x: x[2], reverse=True) if similarity_pairs: html += "No proximity data available.
" elif step_name == 'meta_review': # Debug: log the actual meta_review data structure import sys print("DEBUG: meta_review step_data =", step_data, file=sys.stderr) assert isinstance(step_data, dict), "meta_review step_data is not a dict" # Accept both direct dict or nested under 'meta_review' if "meta_review" in step_data and isinstance(step_data["meta_review"], dict): meta_review = step_data["meta_review"] else: meta_review = step_data assert "meta_review_critique" in meta_review, f"meta_review_critique missing in meta_review: {meta_review}" assert "research_overview" in meta_review, f"research_overview missing in meta_review: {meta_review}" # Critique section if meta_review.get('meta_review_critique'): html += "ID: {hypo.get('id', 'Unknown')} | Elo Score: {hypo.get('elo_score', 0):.2f}
Description: {hypo.get('text', 'No description')}
Novelty: {hypo.get('novelty_review', 'Not assessed')} | Feasibility: {hypo.get('feasibility_review', 'Not assessed')}
Duration: {step_data['duration']:.2f}s
" html += "Warning: No ranking step found. Showing hypotheses from the latest available step ("{}"). These may not be ranked.
'.format(final_step) # Log final rankings if log_file is provided if log_file: with open(log_file, "a") as f: f.write(f"--- Final Rankings Section (step: {final_step}) ---\n") for i, hypo in enumerate(final_hypotheses[:10]): f.write(f" #{i+1}: ID: {hypo.get('id')} | Title: {hypo.get('title')} | Elo: {hypo.get('elo_score', 'N/A')}\n") for i, hypo in enumerate(final_hypotheses[:10]): # Show top 10 rank_color = "#28a745" if i < 3 else "#17a2b8" if i < 6 else "#6c757d" html += f"""ID: {hypo.get('id', 'Unknown')} | Elo Score: {hypo.get('elo_score', 0):.2f}
Description: {hypo.get('text', 'No description')}
Novelty: {hypo.get('novelty_review', 'Not assessed')} | Feasibility: {hypo.get('feasibility_review', 'Not assessed')}
No hypotheses available for final ranking. This may indicate an error in the workflow.
Authors: {', '.join(paper.get('authors', [])[:5])}
arXiv ID: {paper.get('arxiv_id', 'Unknown')} | Published: {paper.get('published', 'Unknown')}
Abstract: {paper.get('abstract', 'No abstract')[:300]}...
No related arXiv papers found.
" else: return "No research goal set for reference search.
" except Exception as e: logger.error(f"Error fetching references: {e}") return f"Error loading references: {str(e)}
" def create_gradio_interface(): """Create the Gradio interface.""" # Fetch models on startup fetch_available_models() # Get deployment status status_text, status_color = get_deployment_status() with gr.Blocks( title="Open AI Co-Scientist - Hypothesis Evolution System", theme=gr.themes.Soft(), css=""" .status-box { padding: 10px; border-radius: 8px; margin-bottom: 20px; font-weight: bold; } .orange { background-color: #fff3cd; border: 1px solid #ffeaa7; } .blue { background-color: #d1ecf1; border: 1px solid #bee5eb; } """ ) as demo: # Header gr.Markdown("# 🔬 Open AI Co-Scientist - Hypothesis Evolution System") gr.Markdown("Generate, review, rank, and evolve research hypotheses using AI agents.") # Deployment status gr.HTML(f'Results will appear here after running cycles.
" ) # References section with gr.Row(): with gr.Column(): references_output = gr.HTML( label="References", value="Related research papers will appear here.
" ) # Event handler: single button sets research goal and runs cycle def run_full_cycle( research_goal, llm_model, num_hypotheses, generation_temp, reflection_temp, elo_k_factor, top_k_hypotheses ): # Set research goal status_msg, _ = set_research_goal( research_goal, llm_model, num_hypotheses, generation_temp, reflection_temp, elo_k_factor, top_k_hypotheses ) # Run cycle status, results, references = run_cycle() # Combine status messages return f"{status_msg}\n\n{status}", results, references run_cycle_btn.click( fn=run_full_cycle, inputs=[ research_goal_input, model_dropdown, num_hypotheses, generation_temp, reflection_temp, elo_k_factor, top_k_hypotheses ], outputs=[status_output, results_output, references_output] ) # Example inputs gr.Examples( examples=[ ["Develop new methods for increasing the efficiency of solar panels"], ["Create novel approaches to treat Alzheimer's disease"], ["Design sustainable materials for construction"], ["Improve machine learning model interpretability"], ["Develop new quantum computing algorithms"] ], inputs=[research_goal_input], label="Example Research Goals" ) # GitHub icon and link at the bottom gr.HTML( ''' ''' ) return demo if __name__ == "__main__": # Check for API key if not os.getenv("OPENROUTER_API_KEY"): print("⚠️ Warning: OPENROUTER_API_KEY environment variable not set.") print("The app will start but may not function properly without an API key.") # Create and launch the Gradio app demo = create_gradio_interface() # Launch with appropriate settings for HF Spaces demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )