Spaces:

liaoch
/

open-ai-co-scientist

Sleeping

App Files Files Community

Chunhua Liao commited on Apr 4

Commit

9a12a1d

1 Parent(s): 208f2ab

Refactor: Move application code into 'app' package

Browse files

Files changed (9) hide show

README.md +5 -2
app/__init__.py +1 -0
app/agents.py +414 -0
app/api.py +315 -0
app/config.py +37 -0
app/main.py +40 -0
app/models.py +90 -0
app/utils.py +174 -0
main.py +0 -1174

README.md CHANGED Viewed

@@ -48,9 +48,12 @@ Original code was generated by o3-mini-high
     ```
 3.  **Run the Application:**
     ```bash
-    python main.py
     ```
-    (Alternatively, if you want hot-reloading during development: `uvicorn main:app --host 0.0.0.0 --port 8000 --reload`)
 4.  **Access the Web Interface:**
     Open a web browser and go to `http://localhost:8000`. (Note: The server log may show `http://0.0.0.0:8000`, which means the server is listening on all network interfaces. However, you should use `localhost` in your browser to access the server from your local machine. You cannot directly type `0.0.0.0` into your browser's address bar.)
 5.  **Enter Research Goal:**

     ```
 3.  **Run the Application:**
     ```bash
+    # Run using Uvicorn, specifying the app location within the package
+    uvicorn app.api:app --host 0.0.0.0 --port 8000
     ```
+    (Alternatively, if you want hot-reloading during development: `uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload`)
+    (You can also potentially run `python -m app.main` if the `app/main.py` is set up correctly for module execution, but the `uvicorn` command is more standard for FastAPI.)
 4.  **Access the Web Interface:**
     Open a web browser and go to `http://localhost:8000`. (Note: The server log may show `http://0.0.0.0:8000`, which means the server is listening on all network interfaces. However, you should use `localhost` in your browser to access the server from your local machine. You cannot directly type `0.0.0.0` into your browser's address bar.)
 5.  **Enter Research Goal:**

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the 'app' directory a Python package.

app/agents.py ADDED Viewed

	@@ -0,0 +1,414 @@

+import random
+import math
+import json
+from typing import List, Dict
+# Import necessary components from other modules
+from .models import Hypothesis, ResearchGoal, ContextMemory
+from .utils import (
+    logger, # Use the logger configured in utils
+    call_llm,
+    generate_unique_id,
+    similarity_score,
+    generate_visjs_data
+)
+from .config import config
+# --- Agent-Specific LLM Calls (Moved from main.py/utils.py for better cohesion) ---
+def call_llm_for_generation(prompt: str, num_hypotheses: int = 3) -> List[Dict]:
+    """Calls LLM for generating hypotheses, handling JSON parsing."""
+    logger.info("LLM generation called with prompt: %s, num_hypotheses: %d", prompt, num_hypotheses)
+    full_prompt = prompt + "\n\nPlease return the response as a JSON array of objects, where each object has a 'title' and 'text' key."
+    response = call_llm(full_prompt, temperature=config.get("step_temperatures", {}).get("generation", 0.7))
+    logger.info("LLM generation response: %s", response)
+    if response.startswith("Error:"):
+        logger.error(f"LLM generation call failed: {response}")
+        return [{"title": "Error", "text": response}]
+    try:
+        response = response.strip()
+        if response.startswith("```json"):
+            response = response[7:]
+        if response.endswith("```"):
+            response = response[:-3]
+        response = response.strip()
+        hypotheses_data = json.loads(response)
+        if not isinstance(hypotheses_data, list) or not all(isinstance(h, dict) and "title" in h and "text" in h for h in hypotheses_data):
+            error_message = "Invalid JSON format: Expected a list of objects with 'title' and 'text' keys."
+            raise ValueError(error_message)
+        logger.info("Parsed generated hypotheses: %s", hypotheses_data)
+        return hypotheses_data
+    except (json.JSONDecodeError, ValueError) as e:
+        logger.error("Could not parse LLM generation response as JSON: %s", response, exc_info=True)
+        return [{"title": "Error", "text": f"Could not parse LLM response: {e}"}]
+def call_llm_for_reflection(hypothesis_text: str) -> Dict:
+    """Calls LLM for reviewing a hypothesis, handling JSON parsing."""
+    prompt = (
+        f"Review the following hypothesis and provide a novelty assessment (HIGH, MEDIUM, or LOW), "
+        f"a feasibility assessment (HIGH, MEDIUM, or LOW), a comment, and a list of references (PMIDs) in JSON format:\n\n"
+        f"Hypothesis: {hypothesis_text}\n\n"
+        f"Return the response as a JSON object with the following keys: 'novelty_review', 'feasibility_review', 'comment', 'references'."
+    )
+    response = call_llm(prompt, temperature=config.get("step_temperatures", {}).get("reflection", 0.5)) # Example: different temp
+    logger.info("LLM reflection response for hypothesis: %s", response)
+    if response.startswith("Error:"):
+        logger.error(f"LLM reflection call failed: {response}")
+        return {"novelty_review": "ERROR", "feasibility_review": "ERROR", "comment": response, "references": []}
+    # Default values
+    review_data = {
+        "novelty_review": "MEDIUM",
+        "feasibility_review": "MEDIUM",
+        "comment": "Could not parse LLM response.",
+        "references": [],
+    }
+    try:
+        response = response.strip()
+        if response.startswith("```json"):
+            response = response[7:]
+        if response.endswith("```"):
+            response = response[:-3]
+        response = response.strip()
+        parsed_data = json.loads(response)
+        # Update defaults with parsed data, performing basic validation
+        novelty = parsed_data.get("novelty_review", "MEDIUM").upper()
+        if novelty in ["HIGH", "MEDIUM", "LOW"]:
+            review_data["novelty_review"] = novelty
+        else:
+            logger.warning("Invalid novelty review value received: %s", novelty)
+        feasibility = parsed_data.get("feasibility_review", "MEDIUM").upper()
+        if feasibility in ["HIGH", "MEDIUM", "LOW"]:
+            review_data["feasibility_review"] = feasibility
+        else:
+            logger.warning("Invalid feasibility review value received: %s", feasibility)
+        review_data["comment"] = parsed_data.get("comment", "No comment provided.")
+        review_data["references"] = parsed_data.get("references", [])
+        if not isinstance(review_data["references"], list):
+             logger.warning("Invalid references format received: %s", review_data["references"])
+             review_data["references"] = []
+    except (json.JSONDecodeError, AttributeError, KeyError) as e:
+        logger.warning("Error parsing LLM reflection response: %s", response, exc_info=True)
+        review_data["comment"] = f"Could not parse LLM response: {e}" # Update comment with error
+    logger.info("Parsed reflection data: %s", review_data)
+    return review_data
+# --- Ranking Helpers (Moved from main.py) ---
+def run_pairwise_debate(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
+    """Compares two hypotheses based on novelty and feasibility scores."""
+    def score(h: Hypothesis) -> int:
+        mapping = {"HIGH": 3, "MEDIUM": 2, "LOW": 1, None: 0, "ERROR": 0} # Handle ERROR case
+        score_novelty = mapping.get(h.novelty_review, 0) if isinstance(h.novelty_review, str) else 0
+        score_feasibility = mapping.get(h.feasibility_review, 0) if isinstance(h.feasibility_review, str) else 0
+        return score_novelty + score_feasibility
+    scoreA = score(hypoA)
+    scoreB = score(hypoB)
+    if scoreA > scoreB:
+        winner = hypoA
+    elif scoreB > scoreA:
+        winner = hypoB
+    else:
+        winner = random.choice([hypoA, hypoB]) # Tie-breaker
+    logger.info("Debate: %s (score %d) vs %s (score %d) => Winner: %s",
+                hypoA.hypothesis_id, scoreA, hypoB.hypothesis_id, scoreB, winner.hypothesis_id)
+    return winner
+def update_elo(winner: Hypothesis, loser: Hypothesis):
+    """Updates Elo scores after a comparison."""
+    k_factor = config.get("elo_k_factor", 32)
+    ratingA = winner.elo_score
+    ratingB = loser.elo_score
+    expectedA = 1 / (1 + math.pow(10, (ratingB - ratingA) / 400))
+    expectedB = 1 - expectedA # Or 1 / (1 + math.pow(10, (ratingA - ratingB) / 400))
+    winner.elo_score = ratingA + k_factor * (1 - expectedA)
+    loser.elo_score = ratingB + k_factor * (0 - expectedB) # Loser's score update
+    logger.info("Updated Elo: Winner %s -> %.2f, Loser %s -> %.2f",
+                winner.hypothesis_id, winner.elo_score, loser.hypothesis_id, loser.elo_score)
+# --- Evolution Helper (Moved from main.py) ---
+def combine_hypotheses(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
+    """Combines two hypotheses into a new one."""
+    new_id = generate_unique_id("E") # Use utility function
+    combined_title = f"Combined: {hypoA.title} & {hypoB.title}"
+    # Consider a more sophisticated combination prompt/logic if needed
+    combined_text = f"Combination of:\n1. {hypoA.text}\n2. {hypoB.text}"
+    logger.info("Combining hypotheses %s and %s into %s", hypoA.hypothesis_id, hypoB.hypothesis_id, new_id)
+    new_hypothesis = Hypothesis(new_id, combined_title, combined_text)
+    new_hypothesis.parent_ids = [hypoA.hypothesis_id, hypoB.hypothesis_id]
+    return new_hypothesis
+###############################################################################
+# Agent Implementations
+###############################################################################
+class GenerationAgent:
+    def generate_new_hypotheses(self, research_goal: ResearchGoal, context: ContextMemory) -> List[Hypothesis]:
+        """Generates new hypotheses using LLM."""
+        num_to_generate = config.get("num_hypotheses", 3)
+        prompt = (
+            f"Research Goal: {research_goal.description}\n"
+            f"Constraints: {research_goal.constraints}\n"
+            f"Existing Hypothesis IDs: {list(context.hypotheses.keys())}\n" # Provide context
+            f"Please propose {num_to_generate} novel and feasible hypotheses with rationale, avoiding duplication with existing IDs.\n"
+        )
+        raw_output = call_llm_for_generation(prompt, num_hypotheses=num_to_generate)
+        new_hypos = []
+        for idea in raw_output:
+             # Check for error response from LLM call
+            if idea["title"] == "Error":
+                logger.error("Skipping hypothesis generation due to LLM error: %s", idea["text"])
+                continue # Skip this one, maybe add placeholder?
+            hypo_id = generate_unique_id("G")
+            # Ensure ID is unique within the current context
+            while hypo_id in context.hypotheses:
+                hypo_id = generate_unique_id("G")
+            h = Hypothesis(hypo_id, idea["title"], idea["text"])
+            logger.info("Generated hypothesis: %s", h.to_dict())
+            new_hypos.append(h)
+        return new_hypos
+class ReflectionAgent:
+    def review_hypotheses(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
+        """Reviews hypotheses using LLM."""
+        for h in hypotheses:
+            # Avoid re-reviewing if already reviewed (optional optimization)
+            # if h.novelty_review is not None and h.feasibility_review is not None:
+            #    continue
+            result = call_llm_for_reflection(h.text)
+            h.novelty_review = result["novelty_review"]
+            h.feasibility_review = result["feasibility_review"]
+            # Append comment only if it's not the default error message
+            if result["comment"] != "Could not parse LLM response.":
+                 h.review_comments.append(result["comment"])
+            # Only extend references if the list is not empty
+            if result["references"]:
+                 h.references.extend(result["references"])
+            logger.info("Reviewed hypothesis: %s, Novelty: %s, Feasibility: %s", h.hypothesis_id, h.novelty_review, h.feasibility_review)
+class RankingAgent:
+    def run_tournament(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
+        """Runs a pairwise tournament to rank hypotheses."""
+        if len(hypotheses) < 2:
+            logger.info("Not enough hypotheses to run a tournament.")
+            return
+        active_hypotheses = [h for h in hypotheses if h.is_active]
+        if len(active_hypotheses) < 2:
+            logger.info("Not enough *active* hypotheses to run a tournament.")
+            return
+        random.shuffle(active_hypotheses) # Shuffle only active ones
+        # Simple round-robin: each active hypothesis debates every other active one once
+        pairs = []
+        for i in range(len(active_hypotheses)):
+            for j in range(i + 1, len(active_hypotheses)):
+                pairs.append((active_hypotheses[i], active_hypotheses[j]))
+        logger.info(f"Running tournament with {len(pairs)} pairs.")
+        for hA, hB in pairs:
+            winner = run_pairwise_debate(hA, hB)
+            loser = hB if winner == hA else hA
+            update_elo(winner, loser)
+            # Record result in context (consider if this needs iteration info)
+            context.tournament_results.append({
+                "iteration": context.iteration_number, # Add iteration number
+                "winner": winner.hypothesis_id,
+                "loser": loser.hypothesis_id,
+                "winner_score_after": winner.elo_score,
+                "loser_score_after": loser.elo_score
+            })
+class EvolutionAgent:
+    def evolve_hypotheses(self, context: ContextMemory) -> List[Hypothesis]:
+        """Evolves hypotheses by combining top candidates."""
+        top_k = config.get("top_k_hypotheses", 2)
+        active = context.get_active_hypotheses()
+        if len(active) < 2:
+            logger.info("Not enough active hypotheses to perform evolution.")
+            return []
+        sorted_by_elo = sorted(active, key=lambda h: h.elo_score, reverse=True)
+        top_candidates = sorted_by_elo[:top_k]
+        new_hypotheses = []
+        # Combine the top two for now, could be extended
+        if len(top_candidates) >= 2:
+            # Optional: Add check to prevent combining very similar hypotheses
+            # sim = similarity_score(top_candidates[0].text, top_candidates[1].text)
+            # if sim < 0.8: # Example threshold
+            new_h = combine_hypotheses(top_candidates[0], top_candidates[1])
+            logger.info("Evolved hypothesis created: %s from parents %s", new_h.hypothesis_id, new_h.parent_ids)
+            new_hypotheses.append(new_h)
+            # else:
+            #     logger.info("Skipping evolution: Top 2 hypotheses are too similar (score: %.2f)", sim)
+        return new_hypotheses
+class ProximityAgent:
+    def build_proximity_graph(self, context: ContextMemory) -> Dict:
+        """Builds proximity graph data based on hypothesis similarity."""
+        active_hypotheses = context.get_active_hypotheses()
+        adjacency = {}
+        if not active_hypotheses:
+            logger.info("No active hypotheses to build proximity graph.")
+            return {"adjacency_graph": {}, "nodes_str": "", "edges_str": ""}
+        for i in range(len(active_hypotheses)):
+            hypo_i = active_hypotheses[i]
+            adjacency[hypo_i.hypothesis_id] = []
+            for j in range(len(active_hypotheses)):
+                if i == j:
+                    continue
+                hypo_j = active_hypotheses[j]
+                if hypo_i.text and hypo_j.text:
+                    sim = similarity_score(hypo_i.text, hypo_j.text)
+                    adjacency[hypo_i.hypothesis_id].append({
+                        "other_id": hypo_j.hypothesis_id,
+                        "similarity": sim
+                    })
+                else:
+                     logger.warning(f"Skipping similarity for {hypo_i.hypothesis_id} or {hypo_j.hypothesis_id} due to empty text.")
+        visjs_data = generate_visjs_data(adjacency) # Use utility function
+        logger.info("Built proximity graph adjacency with %d nodes.", len(active_hypotheses))
+        return {
+            "adjacency_graph": adjacency,
+            "nodes_str": visjs_data["nodes_str"],
+            "edges_str": visjs_data["edges_str"]
+        }
+class MetaReviewAgent:
+    def summarize_and_feedback(self, context: ContextMemory, adjacency: Dict) -> Dict:
+        """Summarizes research state and provides feedback."""
+        active_hypotheses = context.get_active_hypotheses()
+        if not active_hypotheses:
+             return {"meta_review_critique": ["No active hypotheses."], "research_overview": {"top_ranked_hypotheses": [], "suggested_next_steps": []}}
+        comment_summary = set()
+        for h in active_hypotheses:
+            # Example critique based on reviews
+            if h.novelty_review == "LOW":
+                comment_summary.add("Some ideas lack novelty.")
+            if h.feasibility_review == "LOW":
+                comment_summary.add("Some ideas may have low feasibility.")
+            # Could add critiques based on adjacency graph (e.g., clusters, outliers)
+        best_hypotheses = sorted(active_hypotheses, key=lambda h: h.elo_score, reverse=True)[:3]
+        logger.info("Top hypotheses for meta-review: %s", [h.hypothesis_id for h in best_hypotheses])
+        # Example suggested next steps
+        next_steps = [
+            "Refine top hypotheses based on review comments.",
+            "Consider exploring areas with fewer, less connected hypotheses (if any).",
+            "Seek external expert feedback on top candidates."
+        ]
+        if not comment_summary:
+             comment_summary.add("Overall hypothesis quality seems reasonable based on automated review.")
+        overview = {
+            "meta_review_critique": list(comment_summary),
+            "research_overview": {
+                "top_ranked_hypotheses": [h.to_dict() for h in best_hypotheses], # Use to_dict for serialization
+                "suggested_next_steps": next_steps
+            }
+        }
+        context.meta_review_feedback.append(overview) # Store feedback in context
+        logger.info("Meta-review complete: %s", overview)
+        return overview
+class SupervisorAgent:
+    """Orchestrates the AI Co-Scientist workflow."""
+    def __init__(self):
+        self.generation_agent = GenerationAgent()
+        self.reflection_agent = ReflectionAgent()
+        self.ranking_agent = RankingAgent()
+        self.evolution_agent = EvolutionAgent()
+        self.proximity_agent = ProximityAgent()
+        self.meta_review_agent = MetaReviewAgent()
+    def run_cycle(self, research_goal: ResearchGoal, context: ContextMemory) -> Dict:
+        """Runs a single cycle of hypothesis generation and refinement."""
+        logger.info("--- Starting Cycle %d ---", context.iteration_number + 1)
+        cycle_details = {"iteration": context.iteration_number + 1, "steps": {}, "meta_review": {}}
+        # 1. Generation
+        logger.info("Step 1: Generation")
+        new_hypotheses = self.generation_agent.generate_new_hypotheses(research_goal, context)
+        for nh in new_hypotheses:
+            context.add_hypothesis(nh) # Add to central context
+        cycle_details["steps"]["generation"] = {"hypotheses": [h.to_dict() for h in new_hypotheses]}
+        # Get all active hypotheses for subsequent steps
+        active_hypos = context.get_active_hypotheses()
+        # 2. Reflection
+        logger.info("Step 2: Reflection")
+        self.reflection_agent.review_hypotheses(active_hypos, context) # Review all active hypotheses
+        cycle_details["steps"]["reflection"] = {"hypotheses": [h.to_dict() for h in active_hypos]} # Log state after review
+        # 3. Ranking (Tournament 1)
+        logger.info("Step 3: Ranking 1")
+        self.ranking_agent.run_tournament(active_hypos, context)
+        # Log state after ranking (Elo scores updated)
+        cycle_details["steps"]["ranking1"] = {"hypotheses": [h.to_dict() for h in active_hypos]}
+        # 4. Evolution
+        logger.info("Step 4: Evolution")
+        evolved_hypotheses = self.evolution_agent.evolve_hypotheses(context)
+        if evolved_hypotheses:
+            for eh in evolved_hypotheses:
+                context.add_hypothesis(eh)
+            # 4a. Review newly evolved hypotheses immediately
+            logger.info("Step 4a: Reviewing Evolved Hypotheses")
+            self.reflection_agent.review_hypotheses(evolved_hypotheses, context)
+            # Update active list for next steps
+            active_hypos = context.get_active_hypotheses()
+        cycle_details["steps"]["evolution"] = {"hypotheses": [h.to_dict() for h in evolved_hypotheses]}
+        # 5. Ranking (Tournament 2 - includes evolved)
+        logger.info("Step 5: Ranking 2")
+        self.ranking_agent.run_tournament(active_hypos, context)
+        cycle_details["steps"]["ranking2"] = {"hypotheses": [h.to_dict() for h in active_hypos]}
+        # 6. Proximity Analysis
+        logger.info("Step 6: Proximity Analysis")
+        proximity_result = self.proximity_agent.build_proximity_graph(context) # Pass context
+        cycle_details["steps"]["proximity"] = {
+            "adjacency_graph": proximity_result["adjacency_graph"],
+            "nodes_str": proximity_result["nodes_str"],
+            "edges_str": proximity_result["edges_str"]
+        }
+        # 7. Meta-review
+        logger.info("Step 7: Meta-Review")
+        overview = self.meta_review_agent.summarize_and_feedback(context, proximity_result["adjacency_graph"])
+        cycle_details["meta_review"] = overview
+        # Increment iteration number at the end of the cycle
+        context.iteration_number += 1
+        logger.info("--- Cycle %d Complete ---", context.iteration_number)
+        return cycle_details

app/api.py ADDED Viewed

	@@ -0,0 +1,315 @@

+import datetime
+from typing import List, Optional, Dict
+from fastapi import FastAPI, HTTPException, responses
+from fastapi.staticfiles import StaticFiles
+# Import components from other modules in the package
+from .models import (
+    ContextMemory, ResearchGoal, ResearchGoalRequest,
+    HypothesisResponse, Hypothesis # Hypothesis needed by ContextMemory
+)
+from .agents import SupervisorAgent
+from .utils import logger # Use the configured logger
+# from .config import config # Config might be needed if endpoints use it directly
+###############################################################################
+# FastAPI Application Setup
+###############################################################################
+app = FastAPI(title="AI Co-Scientist System", version="1.0")
+# --- Global State (Consider alternatives for production) ---
+# These globals make the app stateful, which can be problematic for scaling.
+# For simple cases or demos, it might be acceptable.
+# Alternatives: Dependency Injection with classes, external storage (DB, Redis).
+global_context = ContextMemory()
+supervisor = SupervisorAgent()
+current_research_goal: Optional[ResearchGoal] = None
+# --- Static Files ---
+# Assuming a 'static' directory exists at the project root
+# If it should be inside 'app', adjust the path: StaticFiles(directory="app/static")
+try:
+    app.mount("/static", StaticFiles(directory="static"), name="static")
+    logger.info("Mounted static files directory.")
+except RuntimeError as e:
+     logger.warning(f"Could not mount static directory (may not exist): {e}")
+###############################################################################
+# API Endpoints
+###############################################################################
+@app.post("/research_goal", response_model=dict)
+def set_research_goal(goal: ResearchGoalRequest):
+    """Sets the research goal and resets the context."""
+    global current_research_goal, global_context
+    logger.info(f"Received new research goal: {goal.description}")
+    current_research_goal = ResearchGoal(goal.description, goal.constraints)
+    # Reset context for the new goal
+    global_context = ContextMemory()
+    logger.info("Global context reset for new research goal.")
+    # Note: Logger setup per request might be better handled via middleware or dependency
+    # timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    # log_filename = f"log_{timestamp}.txt" # This will create many log files
+    # setup_logger(log_filename) # Consider if logger needs reconfiguration per goal
+    return {"message": "Research goal successfully set. Ready to run cycles."}
+@app.post("/run_cycle", response_model=Dict) # Return type might be more specific, e.g., CycleResponse
+def run_cycle_endpoint():
+    """Runs a single cycle of the AI Co-Scientist workflow."""
+    global current_research_goal, global_context, supervisor
+    if not current_research_goal:
+        logger.error("Run cycle called before setting research goal.")
+        raise HTTPException(status_code=400, detail="No research goal set. Please POST to /research_goal first.")
+    logger.info(f"Running cycle {global_context.iteration_number + 1} for goal: {current_research_goal.description}")
+    try:
+        # The supervisor agent now handles the full cycle logic
+        cycle_details = supervisor.run_cycle(current_research_goal, global_context)
+        logger.info(f"Cycle {global_context.iteration_number} complete.") # Iteration number was incremented in run_cycle
+        return cycle_details
+    except Exception as e:
+        logger.error(f"Error during cycle execution: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"An internal error occurred during cycle execution: {e}")
+@app.get("/hypotheses", response_model=List[HypothesisResponse])
+def list_hypotheses_endpoint():
+    """Retrieves a list of all currently active hypotheses."""
+    global global_context
+    active_hypotheses = global_context.get_active_hypotheses()
+    logger.info(f"Retrieving {len(active_hypotheses)} active hypotheses.")
+    # Convert Hypothesis objects to dicts using .to_dict() before creating HypothesisResponse
+    # Pydantic should handle the conversion if the fields match, but explicit is safer
+    return [HypothesisResponse(**h.to_dict()) for h in active_hypotheses]
+@app.get("/")
+async def root_endpoint():
+    """Serves the main HTML page."""
+    logger.debug("Serving root HTML page.")
+    # HTML content remains largely the same, ensure JS function names match
+    return responses.HTMLResponse(content="""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>AI Co-Scientist</title>
+        <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
+        <style>
+            body { font-family: sans-serif; margin: 20px; }
+            textarea { width: 90%; }
+            button { margin-top: 10px; padding: 8px 15px; }
+            #results { margin-top: 20px; border-top: 1px solid #eee; padding-top: 20px; }
+            #errors { color: red; margin-top: 10px; }
+            h2, h3, h4, h5 { margin-top: 1.5em; }
+            ul { padding-left: 20px; }
+            li { margin-bottom: 10px; }
+            #mynetwork {
+                width: 100%;
+                height: 500px; /* Explicit height */
+                border: 1px solid lightgray;
+                margin-bottom: 10px;
+            }
+            .graph-explanation p {
+                 margin-top: 0;
+                 margin-bottom: 20px;
+                 font-size: 0.9em;
+                 color: #555;
+            }
+        </style>
+    </head>
+    <body>
+        <h1>Welcome to the AI Co-Scientist System</h1>
+        <p>Set your research goal and run cycles to generate hypotheses.</p>
+        <label for="researchGoal">Research Goal:</label><br>
+        <textarea id="researchGoal" name="researchGoal" rows="4" cols="50"></textarea><br><br>
+        <button onclick="submitResearchGoal()">Submit Research Goal</button>
+        <button onclick="runCycle()">Run Next Cycle</button> <!-- Added manual run button -->
+        <h2>Results</h2>
+        <div id="results"><p>Submit a research goal to begin.</p></div>
+        <h2>Errors</h2>
+        <div id="errors"></div>
+        <script>
+            let currentIteration = 0; // Keep track of the iteration
+            async function submitResearchGoal() {
+                const researchGoal = document.getElementById('researchGoal').value;
+                if (!researchGoal.trim()) {
+                    document.getElementById('errors').innerHTML = '<p>Please enter a research goal.</p>';
+                    return;
+                }
+                document.getElementById('results').innerHTML = '<p>Setting research goal...</p>';
+                document.getElementById('errors').innerHTML = '';
+                currentIteration = 0; // Reset iteration count
+                try {
+                    const response = await fetch('/research_goal', {
+                        method: 'POST',
+                        headers: {'Content-Type': 'application/json'},
+                        body: JSON.stringify({ description: researchGoal })
+                    });
+                    if (!response.ok) {
+                        const errorData = await response.json();
+                        throw new Error(errorData.detail || `HTTP error! status: ${response.status}`);
+                    }
+                    const data = await response.json();
+                    document.getElementById('results').innerHTML = `<p>${data.message}</p><p>Running first cycle...</p>`;
+                    runCycle(); // Automatically run the first cycle
+                } catch (error) {
+                    console.error('Error submitting research goal:', error);
+                    document.getElementById('errors').innerHTML = `<p>Error: ${error.message}</p>`;
+                    document.getElementById('results').innerHTML = ''; // Clear results area on error
+                }
+            }
+            async function runCycle() {
+                document.getElementById('errors').innerHTML = ''; // Clear previous errors
+                const resultsDiv = document.getElementById('results');
+                // Append status message if it's not the first auto-run
+                if (currentIteration > 0 || !resultsDiv.innerHTML.includes("Running first cycle")) {
+                     resultsDiv.innerHTML += `<p>Running cycle ${currentIteration + 1}...</p>`;
+                }
+                try {
+                    const response = await fetch('/run_cycle', { method: 'POST' });
+                    if (!response.ok) {
+                        const errorData = await response.json();
+                        throw new Error(errorData.detail || `HTTP error! status: ${response.status}`);
+                    }
+                    const data = await response.json();
+                    currentIteration = data.iteration; // Update iteration count
+                    let resultsHTML = `<h3>Iteration: ${data.iteration}</h3>`;
+                    let graphData = null; // To store graph data for initialization later
+                    const stepExplanations = { /* ... explanations ... */ }; // Keep explanations if desired
+                    for (const stepName in data.steps) {
+                        if (data.steps.hasOwnProperty(stepName)) {
+                            const step = data.steps[stepName];
+                            resultsHTML += `<h4>Step: ${stepName}</h4>`;
+                            // Add explanation if available
+                            // if (stepExplanations[stepName]) { resultsHTML += `<p>${stepExplanations[stepName]}</p>`; }
+                            if (step.hypotheses && step.hypotheses.length > 0) {
+                                resultsHTML += `<h5>Hypotheses:</h5><ul>`;
+                                // Sort hypotheses by Elo score descending for display
+                                step.hypotheses.sort((a, b) => b.elo_score - a.elo_score).forEach(hypo => {
+                                    resultsHTML += \`<li>
+                                        <strong>\${hypo.title}</strong> (ID: \${hypo.id}, Elo: \${hypo.elo_score.toFixed(2)})<br>\`;
+                                    if (hypo.parent_ids && hypo.parent_ids.length > 0) {
+                                        resultsHTML += \`<em>Parents: \${hypo.parent_ids.join(', ')}</em><br>\`;
+                                    }
+                                    resultsHTML += \`<p>\${hypo.text}</p>\`;
+                                    if (hypo.novelty_review) { resultsHTML += \`<p>Novelty: \${hypo.novelty_review}</p>\`; }
+                                    if (hypo.feasibility_review){ resultsHTML += \`<p>Feasibility: \${hypo.feasibility_review}</p>\`; }
+                                    // Add comments and references if needed
+                                    resultsHTML += \`</li>\`;
+                                });
+                                resultsHTML += `</ul>`;
+                            } else if (step.hypotheses) {
+                                 resultsHTML += `<p>No hypotheses generated or active in this step.</p>`;
+                            }
+                            // Handle graph data specifically from the 'proximity' step
+                            if (stepName === "proximity" && step.nodes_str && step.edges_str) {
+                                resultsHTML += \`<h5>Hypothesis Similarity Graph:</h5>\`;
+                                resultsHTML += \`<div id="mynetwork"></div>\`; // Container for the graph
+                                resultsHTML += \`<div class="graph-explanation"><p>
+                                    <b>How to read:</b> Nodes are hypotheses. Edges show similarity > 0.2.
+                                </p></div>\`;
+                                // Store data for initialization after HTML is rendered
+                                graphData = { nodesStr: step.nodes_str, edgesStr: step.edges_str };
+                            } else if (stepName === "proximity" && step.adjacency_graph) {
+                                 resultsHTML += \`<p>Adjacency Graph (raw): \${JSON.stringify(step.adjacency_graph)}</p>\`;
+                            }
+                        }
+                    }
+                    // Display meta-review
+                    if (data.meta_review) {
+                         resultsHTML += `<h4>Meta-Review:</h4>`;
+                         if (data.meta_review.meta_review_critique && data.meta_review.meta_review_critique.length > 0) {
+                              resultsHTML += `<h5>Critique:</h5><ul>\${data.meta_review.meta_review_critique.map(item => \`<li>\${item}</li>\`).join('')}</ul>`;
+                         }
+                         if (data.meta_review.research_overview && data.meta_review.research_overview.suggested_next_steps.length > 0) {
+                              resultsHTML += `<h5>Suggested Next Steps:</h5><ul>\${data.meta_review.research_overview.suggested_next_steps.map(item => \`<li>\${item}</li>\`).join('')}</ul>`;
+                         }
+                    }
+                    // Update the results div content
+                    resultsDiv.innerHTML = resultsHTML;
+                    // Initialize the graph *after* its container is in the DOM
+                    if (graphData) {
+                        initializeGraph(graphData.nodesStr, graphData.edgesStr);
+                    }
+                } catch (error) {
+                    console.error('Error running cycle:', error);
+                    document.getElementById('errors').innerHTML = `<p>Error during cycle ${currentIteration + 1}: ${error.message}</p>`;
+                     // Optionally clear or update resultsDiv on error
+                     resultsDiv.innerHTML += `<p>Cycle failed. See errors above.</p>`;
+                }
+            }
+            // Function to initialize the Vis.js graph (remains the same)
+            function initializeGraph(nodesStr, edgesStr) {
+                // Check if vis is loaded
+                if (typeof vis === 'undefined') {
+                    console.error("Vis.js library not loaded!");
+                    document.getElementById('errors').innerHTML += '<p>Error: Vis.js library failed to load.</p>';
+                    return;
+                }
+                 const container = document.getElementById('mynetwork');
+                 if (!container) {
+                     console.error("Graph container #mynetwork not found in DOM!");
+                     return; // Don't proceed if container doesn't exist
+                 }
+                try {
+                    // Use Function constructor for safe parsing of JS object strings
+                    const nodesArray = nodesStr ? new Function(\`return [\${nodesStr}]\`)() : [];
+                    const edgesArray = edgesStr ? new Function(\`return [\${edgesStr}]\`)() : [];
+                    var nodes = new vis.DataSet(nodesArray);
+                    var edges = new vis.DataSet(edgesArray);
+                    var data = { nodes: nodes, edges: edges };
+                    var options = { /* ... vis options ... */
+                         edges: {
+                            smooth: { enabled: true, type: "dynamic" },
+                            font: { size: 12, align: 'middle' }
+                        },
+                        nodes: {
+                            shape: 'circle',
+                            font: { size: 14 }
+                        },
+                        physics: {
+                            stabilization: true,
+                            barnesHut: { gravitationalConstant: -2000, centralGravity: 0.3, springLength: 150, springConstant: 0.04 }
+                        }
+                    };
+                    var network = new vis.Network(container, data, options);
+                } catch (e) {
+                    console.error("Error initializing Vis.js graph:", e);
+                    document.getElementById('errors').innerHTML += `<p>Error initializing graph: ${e.message}</p>`;
+                    // Optionally clear the graph container on error
+                    container.innerHTML = '<p style="color:red;">Could not render graph.</p>';
+                }
+            }
+        </script>
+    </body>
+    </html>
+    """)

app/config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import yaml
+import logging
+from typing import Dict
+def load_config(config_path: str = "config.yaml") -> Dict:
+    """Loads the configuration from the specified YAML file."""
+    try:
+        with open(config_path, "r") as f:
+            config_data = yaml.safe_load(f)
+            if not isinstance(config_data, dict):
+                print(f"Error: Configuration file {config_path} did not load as a dictionary.")
+                exit(1)
+            # Convert logging level string to actual level
+            log_level_str = config_data.get("logging_level", "INFO").upper()
+            config_data["logging_level"] = getattr(logging, log_level_str, logging.INFO)
+        return config_data
+    except FileNotFoundError:
+        print(f"Error: Configuration file not found at {config_path}")
+        exit(1)
+    except yaml.YAMLError as e:
+        print(f"Error parsing YAML in {config_path}: {e}")
+        exit(1)
+    except AttributeError as e:
+        print(f"Error: Invalid logging level '{log_level_str}' in config file")
+        exit(1)
+    except KeyError as e:
+        print(f"Error: Missing key in config file: {e}")
+        exit(1)
+    except Exception as e:
+        print(f"An unexpected error occurred while loading config: {e}")
+        exit(1)
+# Load configuration at the start when this module is imported
+config = load_config()
+# Example of accessing config values (optional, for clarity)
+# print(f"LLM Model from config: {config.get('llm_model')}")

app/main.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import uvicorn
+import os
+# Import the FastAPI app instance from the api module
+from .api import app
+# Import the config dictionary from the config module
+from .config import config
+# Import the logger from utils (optional, if main needs logging)
+from .utils import logger
+# Ensure OPENROUTER_API_KEY is set before starting (optional check)
+if not os.getenv("OPENROUTER_API_KEY"):
+    logger.warning("OPENROUTER_API_KEY environment variable is not set.")
+    # Depending on requirements, you might exit here or let the app handle it.
+    # print("Error: OPENROUTER_API_KEY environment variable must be set.")
+    # exit(1)
+if __name__ == "__main__":
+    host = config.get("fastapi_host", "0.0.0.0")
+    port = config.get("fastapi_port", 8000)
+    reload_flag = config.get("uvicorn_reload", False) # Add a config option for reload
+    logger.info(f"Starting Uvicorn server on {host}:{port} (Reload: {reload_flag})")
+    # Note: When running this script directly (python app/main.py),
+    # Uvicorn needs the app location string relative to the execution directory.
+    # If run from project root: "app.api:app"
+    # If run from inside 'app': "api:app"
+    # The string "app.api:app" assumes you run `python -m app.main` from the project root,
+    # or configure the run environment correctly.
+    # A simpler approach for direct execution `python app/main.py` might be needed
+    # if relative imports cause issues depending on how it's run.
+    # Let's assume running from project root for now.
+    # If issues arise, might need to adjust how uvicorn is called or the project structure.
+    uvicorn.run(app, host=host, port=port, reload=reload_flag)
+    # Alternative if running `python app/main.py` directly causes import issues:
+    # uvicorn.run("app.api:app", host=host, port=port, reload=reload_flag)
+    # This tells uvicorn where to find the app object.

app/models.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import logging
+from typing import List, Dict, Optional
+from pydantic import BaseModel
+# Assuming logger is configured elsewhere or passed in if needed within methods
+# If models need logging, consider passing a logger instance during initialization
+# or using a globally accessible logger configured in utils.py or config.py.
+# For simplicity, direct logging calls are removed from models for now.
+# logger = logging.getLogger(__name__) # Example if models needed their own logger
+###############################################################################
+# Data Models
+###############################################################################
+class Hypothesis:
+    def __init__(self, hypothesis_id: str, title: str, text: str):
+        self.hypothesis_id = hypothesis_id
+        self.title = title
+        self.text = text
+        self.novelty_review: Optional[str] = None   # "HIGH", "MEDIUM", "LOW"
+        self.feasibility_review: Optional[str] = None
+        self.elo_score: float = 1200.0      # initial Elo score
+        self.review_comments: List[str] = []
+        self.references: List[str] = []
+        self.is_active: bool = True
+        self.parent_ids: List[str] = []  # Store IDs of parent hypotheses
+    def to_dict(self) -> dict:
+        return {
+            "id": self.hypothesis_id,
+            "title": self.title,
+            "text": self.text,
+            "novelty_review": self.novelty_review,
+            "feasibility_review": self.feasibility_review,
+            "elo_score": self.elo_score,
+            "review_comments": self.review_comments,
+            "references": self.references,
+            "is_active": self.is_active,
+            "parent_ids": self.parent_ids,  # Include parent IDs
+        }
+class ResearchGoal:
+    def __init__(self, description: str, constraints: Dict = None):
+        self.description = description
+        self.constraints = constraints if constraints else {}
+class ContextMemory:
+    """
+    A simple in-memory context storage.
+    """
+    def __init__(self):
+        self.hypotheses: Dict[str, Hypothesis] = {}  # key: hypothesis_id
+        self.tournament_results: List[Dict] = []
+        self.meta_review_feedback: List[Dict] = []
+        self.iteration_number: int = 0
+    def add_hypothesis(self, hypothesis: Hypothesis):
+        self.hypotheses[hypothesis.hypothesis_id] = hypothesis
+        # Consider moving logging out of the model if possible
+        # logger.info(f"Added hypothesis {hypothesis.hypothesis_id}")
+    def get_active_hypotheses(self) -> List[Hypothesis]:
+        return [h for h in self.hypotheses.values() if h.is_active]
+###############################################################################
+# Pydantic Schemas for API
+###############################################################################
+class ResearchGoalRequest(BaseModel):
+    description: str
+    constraints: Optional[Dict] = {}
+class HypothesisResponse(BaseModel):
+    id: str
+    title: str
+    text: str
+    novelty_review: Optional[str]
+    feasibility_review: Optional[str]
+    elo_score: float
+    review_comments: List[str]
+    references: List[str]
+    is_active: bool
+    # parent_ids: List[str] # Add if needed in API response
+class OverviewResponse(BaseModel):
+    iteration: int
+    meta_review_critique: List[str]
+    top_hypotheses: List[HypothesisResponse]
+    suggested_next_steps: List[str]

app/utils.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import logging
+import time
+import os
+import random
+import json
+from typing import List, Dict
+import openai
+from openai import OpenAI
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+# Import config loading function and config object
+from .config import config, load_config
+# --- Logging Setup ---
+# Configure a root logger or a specific logger for the app
+# Using a basic configuration here, can be enhanced
+logging.basicConfig(level=config.get("logging_level", logging.INFO),
+                    format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+logger = logging.getLogger("aicoscientist") # Use a specific name for the app logger
+# Optional: Add file handler based on config (if needed globally)
+# log_filename_base = config.get('log_file_name', 'app')
+# timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+# file_handler = logging.FileHandler(f"{log_filename_base}_{timestamp}.txt")
+# formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
+# file_handler.setFormatter(formatter)
+# logger.addHandler(file_handler)
+# --- LLM Interaction ---
+def call_llm(prompt: str, temperature: float = 0.7) -> str:
+    """
+    Calls an LLM via the OpenRouter API and returns the response. Handles retries.
+    """
+    client = OpenAI(
+        base_url=config.get("openrouter_base_url"),
+        api_key=os.getenv("OPENROUTER_API_KEY"),
+    )
+    llm_model = config.get("llm_model")
+    max_retries = config.get("max_retries", 3)
+    initial_delay = config.get("initial_retry_delay", 1)
+    if not llm_model:
+        logger.error("LLM model not configured in config.yaml")
+        return "Error: LLM model not configured."
+    if not client.api_key:
+        logger.error("OPENROUTER_API_KEY environment variable not set.")
+        return "Error: OpenRouter API key not set."
+    last_error_message = "API call failed after multiple retries." # Default error
+    for attempt in range(max_retries):
+        try:
+            completion = client.chat.completions.create(
+                model=llm_model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=temperature,
+            )
+            if completion.choices and len(completion.choices) > 0:
+                return completion.choices[0].message.content or "" # Return empty string if content is None
+            else:
+                logger.error("No choices in the LLM response: %s", completion)
+                last_error_message = f"No choices in the response: {completion}"
+                # Continue to retry if possible
+        except Exception as e:
+            error_str = str(e)
+            if "Rate limit exceeded" in error_str:
+                logger.warning(f"Rate limit exceeded (attempt {attempt + 1}/{max_retries}): {e}")
+                last_error_message = f"Rate limit exceeded: {e}"
+            else:
+                logger.error(f"API call failed (attempt {attempt + 1}/{max_retries}): {e}")
+                last_error_message = f"API call failed: {e}"
+            if attempt < max_retries - 1:
+                wait_time = initial_delay * (2 ** attempt)
+                logger.info(f"Retrying in {wait_time} seconds...")
+                time.sleep(wait_time)
+            else:
+                logger.error("Max retries reached. Giving up.")
+                break # Exit loop after last attempt
+    return f"Error: {last_error_message}" # Return the last recorded error
+# --- ID Generation ---
+def generate_unique_id(prefix="H") -> str:
+    """Generates a unique identifier string."""
+    return f"{prefix}{random.randint(1000, 9999)}"
+# --- VIS.JS Graph Data Generation ---
+def generate_visjs_data(adjacency_graph: Dict) -> Dict[str, str]:
+    """Generates node and edge data strings for vis.js graph."""
+    nodes = []
+    edges = []
+    if not isinstance(adjacency_graph, dict):
+        logger.error(f"Invalid adjacency_graph type: {type(adjacency_graph)}. Expected dict.")
+        return {"nodes_str": "", "edges_str": ""}
+    for node_id, connections in adjacency_graph.items():
+        nodes.append(f"{{id: '{node_id}', label: '{node_id}'}}")
+        if isinstance(connections, list):
+            for connection in connections:
+                if isinstance(connection, dict) and 'similarity' in connection and 'other_id' in connection:
+                    similarity_val = connection.get('similarity')
+                    if isinstance(similarity_val, (int, float)) and similarity_val > 0.2:
+                        edges.append(f"{{from: '{node_id}', to: '{connection['other_id']}', label: '{similarity_val:.2f}', arrows: 'to'}}")
+                    # Optional: Log skipped edges due to low similarity
+                    # else:
+                    #     logger.debug(f"Skipping edge from {node_id} to {connection['other_id']} due to low/invalid similarity: {similarity_val}")
+                else:
+                    logger.warning(f"Skipping invalid connection format for node {node_id}: {connection}")
+        else:
+             logger.warning(f"Skipping invalid connections format for node {node_id}: {connections}")
+    nodes_str = ",\n".join(nodes)
+    edges_str = ",\n".join(edges)
+    return {
+        "nodes_str": nodes_str,
+        "edges_str": edges_str
+    }
+# --- Similarity Calculation ---
+_sentence_transformer_model = None
+def get_sentence_transformer_model():
+    """Loads and returns a singleton instance of the sentence transformer model."""
+    global _sentence_transformer_model
+    if _sentence_transformer_model is None:
+        model_name = config.get('sentence_transformer_model', 'all-MiniLM-L6-v2')
+        try:
+            logger.info(f"Loading sentence transformer model: {model_name}...")
+            _sentence_transformer_model = SentenceTransformer(model_name)
+            logger.info("Sentence transformer model loaded successfully.")
+        except ImportError:
+            logger.error("Failed to import sentence_transformers. Please install it: pip install sentence-transformers")
+            raise
+        except Exception as e:
+            logger.error(f"Failed to load sentence transformer model '{model_name}': {e}")
+            raise # Re-raise after logging
+    return _sentence_transformer_model
+def similarity_score(textA: str, textB: str) -> float:
+    """Calculates cosine similarity between two texts using sentence embeddings."""
+    try:
+        if not textA or not textB:
+            logger.warning("Empty string provided to similarity_score.")
+            return 0.0
+        model = get_sentence_transformer_model()
+        if model is None: # Check if model loading failed previously
+             return 0.0 # Or handle error appropriately
+        embedding_a = model.encode(textA, convert_to_tensor=True)
+        embedding_b = model.encode(textB, convert_to_tensor=True)
+        # Ensure embeddings are 2D numpy arrays for cosine_similarity
+        embedding_a_np = embedding_a.cpu().numpy().reshape(1, -1)
+        embedding_b_np = embedding_b.cpu().numpy().reshape(1, -1)
+        similarity = cosine_similarity(embedding_a_np, embedding_b_np)[0][0]
+        # Clamp the value between 0.0 and 1.0
+        similarity = float(np.clip(similarity, 0.0, 1.0))
+        # logger.debug(f"Similarity score: {similarity:.4f}") # Use debug level
+        return similarity
+    except Exception as e:
+        logger.error(f"Error calculating similarity score: {e}", exc_info=True) # Log traceback
+        return 0.0 # Return 0 on error instead of 0.5

main.py DELETED Viewed

@@ -1,1174 +0,0 @@
-# Generated by o3-mini-high
-# https://gist.github.com/chunhualiao/f90c48a0bdac24ba686c25c86150cca8
-import math
-import random
-import logging
-from typing import List, Dict, Optional
-import openai
-from openai import OpenAI
-import os
-import datetime
-from fastapi import FastAPI, HTTPException, responses
-from fastapi.staticfiles import StaticFiles
-from pydantic import BaseModel
-import uvicorn
-import yaml
-################################################################################
-# Utility Functions
-################################################################################
-import time
-# Configure logging
-def load_config(config_path: str) -> Dict:
-    """Loads the configuration from the specified YAML file."""
-    try:
-        with open(config_path, "r") as f:
-            config = yaml.safe_load(f)
-            # Convert logging level string to actual level
-            config["logging_level"] = getattr(logging, config["logging_level"].upper(), logging.INFO)
-        return config
-    except FileNotFoundError:
-        print(f"Error: Configuration file not found at {config_path}")
-        exit(1)
-    except yaml.YAMLError as e:
-        print(f"Error parsing YAML in {config_path}: {e}")
-        exit(1)
-    except AttributeError as e:
-        print("Error: Invalid logging level in config file")
-        exit(1)
-    except KeyError as e:
-        print(f"Error: Missing key in config file: {e}")
-        exit(1)
-def setup_logger(log_filename):
-    logger = logging.getLogger(log_filename)  # Create a logger with the filename
-    logger.setLevel(config["logging_level"])
-    formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
-    # Remove existing handlers to avoid duplicate logs
-    for handler in logger.handlers[:]:
-        logger.removeHandler(handler)
-    file_handler = logging.FileHandler(f"{config['log_file_name']}_{log_filename}")
-    file_handler.setFormatter(formatter)
-    logger.addHandler(file_handler)
-    return logger
-# Load configuration at the start
-config = load_config("config.yaml")
-def call_llm(prompt: str, temperature: float = 0.7) -> str:
-    """
-    Calls an LLM via the OpenRouter API and returns the response.
-    Args:
-        prompt (str): The input prompt for the LLM.
-        temperature (float, optional): The temperature setting for the LLM. Defaults to 0.7.
-    Returns:
-        str: The LLM's response.
-    Args:
-        prompt (str): The input prompt for the LLM.
-    Returns:
-        str: The LLM's response.
-    """
-    client = OpenAI(
-        base_url=config["openrouter_base_url"],
-        api_key=os.getenv("OPENROUTER_API_KEY"),
-    )
-    try:
-        completion = client.chat.completions.create(
-            model=config["llm_model"],
-            messages=[{"role": "user", "content": prompt}],
-            temperature=temperature,  # Pass temperature to the API call
-        )
-    except Exception as e:
-        retries = config.get("max_retries", 3)
-        delay = config.get("initial_retry_delay", 1)  # seconds
-        if "Rate limit exceeded" in str(e):
-            logger.warning(f"Rate limit exceeded: {e}")
-            error_message = f"Rate limit exceeded: {e}"
-        else:
-            logger.error(f"API call failed with exception: {e}")
-            error_message = f"API call failed with exception: {e}"
-        for attempt in range(retries):
-            try:
-                wait_time = delay * (2 ** attempt)  # Exponential backoff
-                logger.info(f"Retrying in {wait_time} seconds (attempt {attempt + 1}/{retries})")
-                time.sleep(wait_time)
-                completion = client.chat.completions.create(
-                    model=config["llm_model"],
-                    messages=[{"role": "user", "content": prompt}],
-                    temperature=temperature,  # Pass temperature to the API call
-                )
-                if completion.choices and len(completion.choices) > 0:
-                    return completion.choices[0].message.content
-            except Exception as inner_e:
-                if "Rate limit exceeded" in str(inner_e):
-                    logger.warning(f"Rate limit exceeded (retry attempt {attempt + 1}): {inner_e}")
-                    error_message = f"Rate limit exceeded: {inner_e}"
-                else:
-                    logger.error(f"API call failed with exception (retry attempt {attempt + 1}): {inner_e}")
-                    error_message = f"API call failed with exception: {inner_e}"
-                if attempt == retries - 1:
-                    logger.error("Max retries reached. Giving up.")
-                    return f"API call failed after multiple retries. Error: {error_message}" # Detailed error
-        logger.error("Max retries reached without a successful response.")
-        return f"API call failed after multiple retries. Error: {error_message}"  # Detailed error
-    # If no exception, you can safely access attributes
-    if completion.choices and len(completion.choices) > 0:
-        return completion.choices[0].message.content
-    else:
-        logger.error("No choices in the response: %s", completion)
-        return f"No choices in the response: {completion}"
-###############################################################################
-# Data Models and Pydantic Schemas
-###############################################################################
-class Hypothesis:
-    def __init__(self, hypothesis_id: str, title: str, text: str):
-        self.hypothesis_id = hypothesis_id
-        self.title = title
-        self.text = text
-        self.novelty_review: Optional[str] = None   # "HIGH", "MEDIUM", "LOW"
-        self.feasibility_review: Optional[str] = None
-        self.elo_score: float = 1200.0      # initial Elo score
-        self.review_comments: List[str] = []
-        self.references: List[str] = []
-        self.is_active: bool = True
-        self.parent_ids: List[str] = []  # Store IDs of parent hypotheses
-    def to_dict(self) -> dict:
-        return {
-            "id": self.hypothesis_id,
-            "title": self.title,
-            "text": self.text,
-            "novelty_review": self.novelty_review,
-            "feasibility_review": self.feasibility_review,
-            "elo_score": self.elo_score,
-            "review_comments": self.review_comments,
-            "references": self.references,
-            "is_active": self.is_active,
-            "parent_ids": self.parent_ids,  # Include parent IDs
-        }
-class ResearchGoal:
-    def __init__(self, description: str, constraints: Dict = None):
-        self.description = description
-        self.constraints = constraints if constraints else {}
-class ContextMemory:
-    """
-    A simple in-memory context storage.
-    """
-    def __init__(self):
-        self.hypotheses: Dict[str, Hypothesis] = {}  # key: hypothesis_id
-        self.tournament_results: List[Dict] = []
-        self.meta_review_feedback: List[Dict] = []
-        self.iteration_number: int = 0
-    def add_hypothesis(self, hypothesis: Hypothesis):
-        self.hypotheses[hypothesis.hypothesis_id] = hypothesis
-        logger.info(f"Added hypothesis {hypothesis.hypothesis_id}")
-    def get_active_hypotheses(self) -> List[Hypothesis]:
-        return [h for h in self.hypotheses.values() if h.is_active]
-# Pydantic schemas for API endpoints.
-class ResearchGoalRequest(BaseModel):
-    description: str
-    constraints: Optional[Dict] = {}
-class HypothesisResponse(BaseModel):
-    id: str
-    title: str
-    text: str
-    novelty_review: Optional[str]
-    feasibility_review: Optional[str]
-    elo_score: float
-    review_comments: List[str]
-    references: List[str]
-    is_active: bool
-class OverviewResponse(BaseModel):
-    iteration: int
-    meta_review_critique: List[str]
-    top_hypotheses: List[HypothesisResponse]
-    suggested_next_steps: List[str]
-###############################################################################
-# Utility Functions (Placeholders for LLM Calls and Similarity Measures)
-###############################################################################
-def generate_unique_id(prefix="H") -> str:
-    """
-    Generates a unique identifier string.
-    Args:
-        prefix (str, optional): A prefix for the ID. Defaults to "H".
-    Returns:
-        str: A unique identifier string consisting of the prefix and a random 4-digit number.
-    """
-    return f"{prefix}{random.randint(1000, 9999)}"
-import json
-# --- VIS.JS INTEGRATION ---
-def generate_visjs_data(adjacency_graph: Dict) -> Dict[str, str]:
-    """
-    Generates node and edge data strings for vis.js graph.
-    Args:
-        adjacency_graph (Dict): The adjacency graph data.
-    Returns:
-        Dict[str, str]: A dictionary containing 'nodes_str' and 'edges_str'.
-    """
-    nodes = []
-    edges = []
-    # Check if adjacency_graph is a dictionary
-    if not isinstance(adjacency_graph, dict):
-        logger.error(f"Invalid adjacency_graph type: {type(adjacency_graph)}. Expected dict.")
-        return {"nodes_str": "", "edges_str": ""}
-    for node_id, connections in adjacency_graph.items():
-        # Ensure node_id is treated as a string for JS
-        nodes.append(f"{{id: '{node_id}', label: '{node_id}'}}")
-        # Check if connections is a list
-        if isinstance(connections, list):
-            for connection in connections:
-                 # Check if connection is a dictionary and has 'similarity'
-                if isinstance(connection, dict) and 'similarity' in connection:
-                    # Ensure similarity is checked correctly
-                    if isinstance(connection.get('similarity'), (int, float)) and connection['similarity'] > 0.2:
-                         # Ensure 'from' and 'to' are strings for JS and 'other_id' exists
-                        if 'other_id' in connection:
-                            edges.append(f"{{from: '{node_id}', to: '{connection['other_id']}', label: '{connection['similarity']:.2f}', arrows: 'to'}}")
-                        else:
-                            logger.warning(f"Skipping edge from {node_id} due to missing 'other_id' in connection: {connection}")
-                    # Log skipped edges due to low similarity or non-numeric similarity
-                    elif not (isinstance(connection.get('similarity'), (int, float)) and connection['similarity'] > 0.2):
-                         logger.debug(f"Skipping edge from {node_id} to {connection.get('other_id', 'N/A')} due to low/invalid similarity: {connection.get('similarity', 'N/A')}")
-                else:
-                    logger.warning(f"Skipping invalid connection format for node {node_id}: {connection}")
-        else:
-             logger.warning(f"Skipping invalid connections format for node {node_id}: {connections}")
-    nodes_str = ",\n".join(nodes)
-    edges_str = ",\n".join(edges)
-    return {
-        "nodes_str": nodes_str,
-        "edges_str": edges_str
-    }
-def call_llm_for_generation(prompt: str, num_hypotheses: int = 3) -> List[Dict]:
-    """
-    Calls a Large Language Model (LLM) for generating hypotheses.
-    Args:
-        prompt (str): The input prompt for the LLM.
-        num_hypotheses (int, optional): The number of hypotheses to generate. Defaults to 3.
-    Returns:
-        List[Dict]: A list of dictionaries, each representing a generated hypothesis.
-                    Each dictionary contains "title" and "text" keys.
-    """
-    logger.info("LLM generation called with prompt: %s, num_hypotheses: %d", prompt, num_hypotheses)
-    # Modify the prompt to request JSON output
-    prompt += "\n\nPlease return the response as a JSON array of objects, where each object has a 'title' and 'text' key."
-    # Call LLM with the appropriate temperature
-    response = call_llm(prompt, temperature=config["step_temperatures"]["generation"])
-    logger.info("LLM response: %s", response)
-    if "API call failed" in response:
-        # If the call failed, log it and return the error message
-        logger.error(f"LLM call failed: {response}")
-        return [{"title": "Error", "text": response}]  # Return error as a hypothesis
-    try:
-        # Remove potential Markdown code block formatting
-        response = response.strip()
-        if response.startswith("```json"):
-            response = response[7:]
-        if response.endswith("```"):
-            response = response[:-3]
-        response = response.strip()
-        # Attempt to parse the response as JSON
-        hypotheses = json.loads(response)
-        logger.info("Parsed hypotheses: %s", hypotheses)
-        # Basic validation: Check if the response is a list and each item has 'title' and 'text'
-        if not isinstance(hypotheses, list) or not all(isinstance(h, dict) and "title" in h and "text" in h for h in hypotheses):
-            error_message = "Invalid JSON format: Expected a list of objects with 'title' and 'text' keys."
-            raise ValueError(error_message)
-    except (json.JSONDecodeError, ValueError) as e:
-        logger.error("Could not parse LLM response as JSON: %s", response)
-        logger.error(f"Error: {e}")
-        return [{"title": "Error", "text": f"Could not parse LLM response: {e}"}]  # Return error as a hypothesis
-    return hypotheses
-def call_llm_for_reflection(hypothesis_text: str) -> Dict:
-    """
-    Calls a Large Language Model (LLM) for reviewing a hypothesis.
-    Args:
-        hypothesis_text (str): The text of the hypothesis to be reviewed.
-    Returns:
-        Dict: A dictionary containing the review results, including novelty and feasibility
-              assessments (HIGH, MEDIUM, or LOW), a comment, and a list of references.
-    """
-    prompt = (
-        f"Review the following hypothesis and provide a novelty assessment (HIGH, MEDIUM, or LOW), "
-        f"a feasibility assessment (HIGH, MEDIUM, or LOW), a comment, and a list of references (PMIDs) in JSON format:\n\n"
-        f"Hypothesis: {hypothesis_text}\n\n"
-        f"Return the response as a JSON object with the following keys: 'novelty_review', 'feasibility_review', 'comment', 'references'."
-    )
-    # Call LLM with the appropriate temperature
-    response = call_llm(prompt, temperature=config["step_temperatures"]["reflection"])
-    logger.info("LLM reflection for hypothesis: %s, response: %s", hypothesis_text, response)
-    if "API call failed" in response:
-        # If the call failed, log it and return the error message
-        logger.error(f"LLM call failed: {response}")
-        return {
-            "novelty_review": "ERROR",
-            "feasibility_review": "ERROR",
-            "comment": response,  # Return the error message
-            "references": [],
-        }
-    # Initialize default values
-    novelty_review = "MEDIUM"
-    feasibility_review = "MEDIUM"
-    comment = "Could not parse LLM response."
-    references = []
-    try:
-        # Remove potential Markdown code block formatting
-        response = response.strip()
-        if response.startswith("```json"):
-            response = response[7:]
-        if response.endswith("```"):
-            response = response[:-3]
-        response = response.strip()
-        # Parse the JSON response
-        data = json.loads(response)
-        novelty_review = data.get("novelty_review", "MEDIUM")
-        feasibility_review = data.get("feasibility_review", "MEDIUM")
-        comment = data.get("comment", "Could not parse LLM response.")
-        references = data.get("references", [])
-        # Basic validation of review values
-        if not any(level in novelty_review.upper() for level in ["HIGH", "MEDIUM", "LOW"]):
-            logger.warning("Invalid novelty review value: %s", novelty_review)
-            novelty_review = "MEDIUM"
-        if not any(level in feasibility_review.upper() for level in ["HIGH", "MEDIUM", "LOW"]):
-            logger.warning("Invalid feasibility review value: %s", feasibility_review)
-            feasibility_review = "MEDIUM"
-        if not isinstance(comment, str):
-            logger.warning("Invalid comment value: %s", comment)
-            comment = "Could not parse LLM response."
-    except (json.JSONDecodeError, AttributeError, KeyError) as e:
-        logger.warning("Error parsing LLM response: %s", e)
-        logger.warning("Response: %s", response)
-        comment = f"Could not parse LLM response: {e}"
-    return {
-        "novelty_review": novelty_review,
-        "feasibility_review": feasibility_review,
-        "comment": comment,
-        "references": references,
-    }
-def run_pairwise_debate(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
-    """
-    Compares two hypotheses based on their novelty and feasibility review scores.
-    Args:
-        hypoA (Hypothesis): The first hypothesis.
-        hypoB (Hypothesis): The second hypothesis.
-    Returns:
-        Hypothesis: The winning hypothesis. If scores are tied, a winner is chosen randomly.
-    """
-    def score(h: Hypothesis) -> int:
-        """
-        Calculates a numerical score for a hypothesis based on its novelty and feasibility reviews.
-        Args:
-            h (Hypothesis): The hypothesis to score.
-        Returns:
-            int: The calculated score.  HIGH=3, MEDIUM=2, LOW=1, None=0.  The score is the sum of
-                 the novelty and feasibility scores.
-        """
-        mapping = {"HIGH": 3, "MEDIUM": 2, "LOW": 1, None: 0}
-        score_novelty = 0
-        if isinstance(h.novelty_review, str):
-            score_novelty = mapping.get(h.novelty_review, 0)
-        else:
-            logger.error(f"Invalid novelty_review type: {type(h.novelty_review)}, value: {h.novelty_review}")
-        score_feasibility = 0
-        if isinstance(h.feasibility_review, str):
-            score_feasibility = mapping.get(h.feasibility_review, 0)
-        else:
-            logger.error(f"Invalid feasibility_review type: {type(h.feasibility_review)}, value: {h.feasibility_review}")
-        return score_novelty + score_feasibility
-    scoreA = score(hypoA)
-    scoreB = score(hypoB)
-    winner = hypoA if scoreA > scoreB else hypoB if scoreB > scoreA else random.choice([hypoA, hypoB])
-    logger.info("Debate: %s (score %d) vs %s (score %d) => Winner: %s",
-                hypoA.hypothesis_id, scoreA, hypoB.hypothesis_id, scoreB, winner.hypothesis_id)
-    return winner
-def update_elo(winner: Hypothesis, loser: Hypothesis, k_factor: int = config["elo_k_factor"]):
-    """
-    Updates the Elo scores of two hypotheses after a pairwise comparison.
-    Args:
-        winner (Hypothesis): The winning hypothesis.
-        loser (Hypothesis): The losing hypothesis.
-        k_factor (int, optional): The K-factor used in the Elo calculation. Defaults to 32.
-    Returns:
-        None
-    """
-    ratingA = winner.elo_score
-    ratingB = loser.elo_score
-    expectedA = 1 / (1 + math.pow(10, (ratingB - ratingA) / 400))
-    expectedB = 1 - expectedA
-    winner.elo_score = ratingA + k_factor * (1 - expectedA)
-    loser.elo_score = ratingB + k_factor * (0 - expectedB)
-    logger.info("Updated Elo: Winner %s -> %.2f, Loser %s -> %.2f",
-                winner.hypothesis_id, winner.elo_score, loser.hypothesis_id, loser.elo_score)
-def combine_hypotheses(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
-    """
-    Combines two hypotheses into a new, evolved hypothesis.
-    Args:
-        hypoA (Hypothesis): The first hypothesis.
-        hypoB (Hypothesis): The second hypothesis.
-    Returns:
-        Hypothesis: A new hypothesis combining the two input hypotheses. The new ID is prefixed with "E".
-    """
-    new_id = generate_unique_id("E")
-    combined_title = f"Combined: {hypoA.title} & {hypoB.title}"
-    combined_text = f"{hypoA.text}\n\nAdditionally, {hypoB.text}"
-    logger.info("Combined hypotheses %s and %s into %s", hypoA.hypothesis_id, hypoB.hypothesis_id, new_id)
-    new_hypothesis = Hypothesis(new_id, combined_title, combined_text)
-    new_hypothesis.parent_ids = [hypoA.hypothesis_id, hypoB.hypothesis_id]  # Store parent IDs
-    logger.info("New hypothesis parent_ids: %s", new_hypothesis.parent_ids) # Added logging
-    return new_hypothesis
-# Global variable to store the sentence transformer model
-_sentence_transformer_model = None
-def get_sentence_transformer_model():
-    """
-    Returns a singleton instance of the sentence transformer model.
-    Loads the model only once to improve performance.
-    Returns:
-        SentenceTransformer: The sentence transformer model.
-    """
-    global _sentence_transformer_model
-    if _sentence_transformer_model is None:
-        try:
-            from sentence_transformers import SentenceTransformer
-            logger.info("Loading sentence transformer model...")
-            # Using a smaller model for efficiency, can be replaced with larger models for better accuracy
-            _sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
-            logger.info("Sentence transformer model loaded successfully")
-        except ImportError as e:
-            logger.error(f"Failed to import sentence_transformers: {e}")
-            raise
-        except Exception as e:
-            logger.error(f"Failed to load sentence transformer model: {e}")
-            raise
-    return _sentence_transformer_model
-def similarity_score(textA: str, textB: str) -> float:
-    """
-    Calculates a similarity score between two text strings using sentence embeddings
-    and cosine similarity.
-    Args:
-        textA (str): The first text string.
-        textB (str): The second text string.
-    Returns:
-        float: A similarity score between 0 and 1 (inclusive), where 1 indicates
-              identical semantic meaning and 0 indicates completely different meanings.
-    """
-    try:
-        # Handle empty strings
-        if not textA.strip() or not textB.strip():
-            logger.warning("Empty string provided to similarity_score")
-            return 0.0
-        # Get the model
-        model = get_sentence_transformer_model()
-        # Generate embeddings
-        embedding_a = model.encode(textA, convert_to_tensor=True)
-        embedding_b = model.encode(textB, convert_to_tensor=True)
-        # Calculate cosine similarity
-        from sklearn.metrics.pairwise import cosine_similarity
-        import numpy as np
-        # Convert to numpy arrays if they're tensors
-        if hasattr(embedding_a, 'cpu') and hasattr(embedding_b, 'cpu'):
-            embedding_a = embedding_a.cpu().numpy().reshape(1, -1)
-            embedding_b = embedding_b.cpu().numpy().reshape(1, -1)
-        similarity = cosine_similarity(embedding_a, embedding_b)[0][0]
-        # Ensure the result is between 0 and 1
-        similarity = float(max(0.0, min(1.0, similarity)))
-        logger.info(f"Similarity score between texts: {similarity:.4f}")
-        return similarity
-    except Exception as e:
-        logger.error(f"Error calculating similarity score: {e}")
-        # Fallback to a default value in case of error
-        return 0.5
-###############################################################################
-# Agent Implementations
-###############################################################################
-class GenerationAgent:
-    def generate_new_hypotheses(self, research_goal: ResearchGoal, context: ContextMemory) -> List[Hypothesis]:
-        """
-        Generates new hypotheses based on the given research goal and context.
-        Args:
-            research_goal (ResearchGoal): The research goal.
-            context (ContextMemory): The current context memory.
-        Returns:
-            List[Hypothesis]: A list of newly generated hypotheses.
-        """
-        prompt = (
-            f"Research Goal: {research_goal.description}\n"
-            f"Constraints: {research_goal.constraints}\n"
-            f"Please propose {config['num_hypotheses']} new hypotheses with rationale.\n"
-        )
-        raw_output = call_llm_for_generation(prompt, num_hypotheses=config["num_hypotheses"])
-        new_hypos = []
-        for idea in raw_output:
-            hypo_id = generate_unique_id("G")
-            h = Hypothesis(hypo_id, idea["title"], idea["text"])
-            logger.info("Generated hypothesis: %s", h.to_dict())
-            new_hypos.append(h)
-        return new_hypos
-class ReflectionAgent:
-    def review_hypotheses(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
-        """
-        Reviews a list of hypotheses, updating their novelty, feasibility, comments, and references.
-        Args:
-            hypotheses (List[Hypothesis]): The list of hypotheses to review.
-            context (ContextMemory): The current context memory.
-        Returns:
-            None
-        """
-        for h in hypotheses:
-            result = call_llm_for_reflection(h.text)
-            h.novelty_review = result["novelty_review"]
-            h.feasibility_review = result["feasibility_review"]
-            h.review_comments.append(result["comment"])
-            h.references.extend(result["references"])
-            logger.info("Reviewed hypothesis: %s, Novelty: %s, Feasibility: %s", h.hypothesis_id, h.novelty_review, h.feasibility_review)
-class RankingAgent:
-    def run_tournament(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
-        """
-        Runs a tournament among the given hypotheses, updating their Elo scores and recording results.
-        Args:
-            hypotheses (List[Hypothesis]): The list of hypotheses to participate in the tournament.
-            context (ContextMemory): The current context memory.
-        Returns:
-            None
-        """
-        random.shuffle(hypotheses)
-        pairs = []
-        for i in range(len(hypotheses)):
-            for j in range(i + 1, len(hypotheses)):
-                pairs.append((hypotheses[i], hypotheses[j]))
-        for hA, hB in pairs:
-            if hA.is_active and hB.is_active:
-                winner = run_pairwise_debate(hA, hB)
-                loser = hB if winner == hA else hA
-                update_elo(winner, loser)
-                logger.info("Ran pairwise debate between %s and %s. Winner: %s", hA.hypothesis_id, hB.hypothesis_id, winner.hypothesis_id)
-                context.tournament_results.append({
-                    "winner": winner.hypothesis_id,
-                    "loser": loser.hypothesis_id,
-                    "winner_score": winner.elo_score,
-                    "loser_score": loser.elo_score
-                })
-class EvolutionAgent:
-    def evolve_hypotheses(self, top_k: int, context: ContextMemory) -> List[Hypothesis]:
-        """
-        Evolves hypotheses by combining the top-k hypotheses based on Elo score.
-        Args:
-            top_k (int): The number of top hypotheses to consider for evolution.
-            context (ContextMemory): The current context memory.
-        Returns:
-            List[Hypothesis]: A list of new, evolved hypotheses.  Currently, at most one
-                              new hypothesis is generated by combining the top two.
-        """
-        active = context.get_active_hypotheses()
-        sorted_by_elo = sorted(active, key=lambda h: h.elo_score, reverse=True)
-        top_candidates = sorted_by_elo[:config["top_k_hypotheses"]]
-        new_hypotheses = []
-        if len(top_candidates) >= 2:
-            new_h = combine_hypotheses(top_candidates[0], top_candidates[1])
-            logger.info("Evolved hypothesis: %s", new_h.to_dict())
-            logger.info("top_candidates: %s", [h.to_dict() for h in top_candidates]) # Added logging
-            new_hypotheses.append(new_h)
-        return new_hypotheses
-class ProximityAgent:
-    def build_proximity_graph(self, hypotheses: List[Hypothesis], context: ContextMemory) -> Dict:
-        """
-        Builds a proximity graph representing the similarity between hypotheses.
-        Args:
-            hypotheses (List[Hypothesis]): The list of hypotheses.
-            context (ContextMemory): The current context memory.
-        Returns:
-            Dict: A dictionary containing:
-                - "adjacency_graph": An adjacency list representing the proximity graph.
-                - "nodes_str": JavaScript string for vis.js nodes.
-                - "edges_str": JavaScript string for vis.js edges.
-        """
-        adjacency = {}
-        # Ensure we only process active hypotheses if needed, or all as currently done
-        active_hypotheses = context.get_active_hypotheses() # Use context to get active ones
-        for i in range(len(active_hypotheses)):
-            hypo_i = active_hypotheses[i]
-            adjacency[hypo_i.hypothesis_id] = []
-            for j in range(len(active_hypotheses)):
-                if i == j:
-                    continue
-                hypo_j = active_hypotheses[j]
-                # Ensure text is not empty before calculating similarity
-                if hypo_i.text and hypo_j.text:
-                    sim = similarity_score(hypo_i.text, hypo_j.text)
-                    adjacency[hypo_i.hypothesis_id].append({
-                        "other_id": hypo_j.hypothesis_id,
-                        "similarity": sim
-                    })
-                else:
-                     logger.warning(f"Skipping similarity for hypothesis {hypo_i.hypothesis_id} or {hypo_j.hypothesis_id} due to empty text.")
-        # Generate the data strings for the graph visualization
-        visjs_data = generate_visjs_data(adjacency)
-        logger.info("Built proximity graph adjacency: %s", adjacency)
-        return {
-            "adjacency_graph": adjacency,
-            "nodes_str": visjs_data["nodes_str"],
-            "edges_str": visjs_data["edges_str"]
-        }
-class MetaReviewAgent:
-    def summarize_and_feedback(self, context: ContextMemory, adjacency: Dict) -> Dict:
-        """
-        Summarizes the current state of research and provides feedback.
-        Args:
-            context (ContextMemory): The current context memory.
-            adjacency (Dict): The proximity graph of hypotheses.
-        Returns:
-            Dict: A dictionary containing a meta-review critique, a research overview
-                  (including top-ranked hypotheses and suggested next steps).
-        """
-        reflection_comments = []
-        for h in context.get_active_hypotheses():
-            reflection_comments.extend(h.review_comments)
-        comment_summary = set()
-        for c in reflection_comments:
-            if "novelty=LOW" in c:
-                comment_summary.add("Some ideas are not very novel.")
-            if "feasibility=LOW" in c:
-                comment_summary.add("Some ideas may be infeasible.")
-        best_hypotheses = sorted(context.get_active_hypotheses(), key=lambda h: h.elo_score, reverse=True)[:3]
-        logger.info("Top hypotheses: %s", [h.to_dict() for h in best_hypotheses])
-        overview = {
-            "meta_review_critique": list(comment_summary),
-            "research_overview": {
-                "top_ranked_hypotheses": [h.to_dict() for h in best_hypotheses],
-                "suggested_next_steps": [
-                    "Conduct further in experiments on top hypotheses.",
-                    "Collect domain expert feedback and refine constraints."
-                ]
-            }
-        }
-        context.meta_review_feedback.append(overview)
-        logger.info("Meta-review and feedback: %s", overview)
-        return overview
-class SupervisorAgent:
-    def __init__(self):
-        self.generation_agent = GenerationAgent()
-        self.reflection_agent = ReflectionAgent()
-        self.ranking_agent = RankingAgent()
-        self.evolution_agent = EvolutionAgent()
-        self.proximity_agent = ProximityAgent()
-        self.meta_review_agent = MetaReviewAgent()
-    def run_cycle(self, research_goal: ResearchGoal, context: ContextMemory) -> Dict:
-        """
-        Runs a single cycle of the hypothesis generation, review, ranking, and evolution process.
-        Args:
-            research_goal (ResearchGoal): The research goal.
-            context (ContextMemory): The current context memory.
-        Returns:
-            Dict: A dictionary containing detailed information about each step of the cycle.
-        """
-        logger.info("Starting a new cycle, iteration %d", context.iteration_number + 1)
-        # Initialize a dictionary to store cycle details
-        cycle_details = {
-            "iteration": context.iteration_number + 1,
-            "steps": {},
-            "meta_review": {}
-        }
-        # 1. Generation
-        new_hypotheses = self.generation_agent.generate_new_hypotheses(research_goal, context)
-        for nh in new_hypotheses:
-            context.add_hypothesis(nh)
-        cycle_details["steps"]["generation"] = {
-            "hypotheses": [h.to_dict() for h in new_hypotheses]
-        }
-        # 2. Reflection
-        active_hypos = context.get_active_hypotheses()
-        self.reflection_agent.review_hypotheses(active_hypos, context)
-        cycle_details["steps"]["reflection"] = {
-            "hypotheses": [h.to_dict() for h in active_hypos]
-        }
-        # 3. Ranking (Tournament)
-        active_hypos = context.get_active_hypotheses()
-        self.ranking_agent.run_tournament(active_hypos, context)
-        cycle_details["steps"]["ranking1"] = {
-            "tournament_results": context.tournament_results,
-            "hypotheses": [h.to_dict() for h in active_hypos]
-        }
-        # 4. Evolution (Improve top ideas)
-        new_evolved = self.evolution_agent.evolve_hypotheses(top_k=2, context=context)
-        for nh in new_evolved:
-            context.add_hypothesis(nh)
-        if new_evolved:
-            self.reflection_agent.review_hypotheses(new_evolved, context)
-        cycle_details["steps"]["evolution"] = {
-            "hypotheses": [h.to_dict() for h in new_evolved]
-        }
-        # 5. Ranking again
-        active_hypos = context.get_active_hypotheses()
-        self.ranking_agent.run_tournament(active_hypos, context)
-        cycle_details["steps"]["ranking2"] = {
-            "tournament_results": context.tournament_results,
-            "hypotheses": [h.to_dict() for h in active_hypos]
-        }
-        # 6. Proximity Analysis
-        # Pass active_hypos directly, ProximityAgent now gets active ones from context
-        proximity_result = self.proximity_agent.build_proximity_graph(active_hypos, context)
-        cycle_details["steps"]["proximity"] = {
-            "adjacency_graph": proximity_result["adjacency_graph"],
-            "nodes_str": proximity_result["nodes_str"],
-            "edges_str": proximity_result["edges_str"]
-        }
-        # 7. Meta-review
-        overview = self.meta_review_agent.summarize_and_feedback(context, proximity_result["adjacency_graph"])
-        cycle_details["meta_review"] = overview
-        context.iteration_number += 1
-        logger.info("Cycle complete, iteration now %d", context.iteration_number)
-        return cycle_details
-###############################################################################
-# FastAPI Application
-###############################################################################
-app = FastAPI(title="AI Co-Scientist System", version="1.0")
-# Global context and supervisor (in production, consider persistent storage)
-global_context = ContextMemory()
-supervisor = SupervisorAgent()
-current_research_goal: Optional[ResearchGoal] = None
-app.mount("/static", StaticFiles(directory="static"), name="static")
-@app.post("/research_goal", response_model=dict)
-def set_research_goal(goal: ResearchGoalRequest):
-    """
-    Sets the research goal for the AI Co-Scientist.
-    Args:
-        goal (ResearchGoalRequest): The research goal, including a description and optional constraints.
-    Returns:
-        dict: A confirmation message.
-    """
-    global current_research_goal, global_context, logger
-    current_research_goal = ResearchGoal(goal.description, goal.constraints)
-    # Reset context for new research goal
-    global_context = ContextMemory()
-    # Create a new logger for this submission
-    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    log_filename = f"log_{timestamp}.txt"
-    logger = setup_logger(log_filename)
-    logger.info("Research goal set: %s", goal.description)
-    return {"message": "Research goal successfully set. Please wait for results. This may take a few minutes. Please be patient."}
-@app.post("/run_cycle")
-def run_cycle():
-    """
-    Runs a single cycle of hypothesis generation, review, ranking, and evolution.
-    Raises:
-        HTTPException: If no research goal has been set.
-    Returns:
-        Dict: A dictionary containing detailed information about each step of the cycle.
-    """
-    global current_research_goal, global_context
-    if not current_research_goal:
-        raise HTTPException(status_code=400, detail="No research goal set.")
-    cycle_details = supervisor.run_cycle(current_research_goal, global_context)
-    logger.info("Run cycle complete. Overview: %s", cycle_details)
-    return cycle_details
-@app.get("/hypotheses", response_model=List[HypothesisResponse])
-def list_hypotheses():
-    """
-    Retrieves a list of all currently active hypotheses.
-    Returns:
-        List[HypothesisResponse]: A list of active hypotheses, each including its ID, title, text,
-                                  novelty/feasibility reviews, Elo score, comments, references,
-                                  and active status.
-    """
-    global global_context
-    return [HypothesisResponse(**h.to_dict()) for h in global_context.get_active_hypotheses()]
-@app.get("/")
-async def root():
-    """
-    Root endpoint for the API. Returns an HTML page with a form to input the research goal.
-    """
-    return responses.HTMLResponse(content="""
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>AI Co-Scientist</title>
-        <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
-        <style>
-            #mynetwork {
-                width: 100%;
-                height: 500px; /* Explicit height for the graph container */
-                border: 1px solid lightgray;
-                margin-bottom: 20px; /* Add some space below the graph */
-            }
-            .graph-container p { /* Style the explanation text */
-                 margin-top: 5px;
-                 font-size: 0.9em;
-                 color: #555;
-            }
-        </style>
-    </head>
-    <body>
-        <h1>Welcome to the AI Co-Scientist System</h1>
-        <p>Set your research goal and run cycles to generate hypotheses.</p>
-        <label for="researchGoal">Research Goal:</label><br>
-        <textarea id="researchGoal" name="researchGoal" rows="4" cols="50"></textarea><br><br>
-        <button onclick="submitResearchGoal()">Submit Research Goal</button>
-        <h2>Results</h2>
-        <div id="results"></div>
-        <h2>Errors</h2>
-        <div id="errors" style="color: red;"></div>
-        <script>
-            async function submitResearchGoal() {
-                const researchGoal = document.getElementById('researchGoal').value;
-                const response = await fetch('/research_goal', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json'
-                    },
-                    body: JSON.stringify({ description: researchGoal })
-                });
-                // Clear previous errors
-                document.getElementById('errors').innerHTML = '';
-                if (!response.ok) {
-                    const errorData = await response.json();
-                    document.getElementById('errors').innerHTML = `<p>Error: ${errorData.detail}</p>`;
-                    return; // Stop execution if there's an error
-                }
-                const data = await response.json();
-                document.getElementById('results').innerHTML = `<p>${data.message}</p>`;
-                runCycle(); // Automatically run a cycle after setting the goal
-            }
-            async function runCycle() {
-                const response = await fetch('/run_cycle', { method: 'POST' });
-                // Clear previous errors
-                document.getElementById('errors').innerHTML = '';
-                if (!response.ok) {
-                    const errorData = await response.json();
-                    document.getElementById('errors').innerHTML = `<p>Error: ${errorData.detail}</p>`;
-                    return; // Stop execution if there's an error
-                }
-                const data = await response.json();
-                let resultsHTML = `<h3>Iteration: ${data.iteration}</h3>`;
-                // Define step explanations
-                const stepExplanations = {
-                    generation: "Generates new hypotheses based on the research goal and current context.",
-                    reflection: "Reviews the generated hypotheses for novelty and feasibility.",
-                    ranking1: "Ranks hypotheses based on a pairwise comparison (tournament).",
-                    evolution: "Combines the top-ranked hypotheses to create new, evolved hypotheses.",
-                    ranking2: "Ranks hypotheses again after the evolution step.",
-                    proximity: "Analyzes the similarity between hypotheses.",
-                };
-                // Display details for each step
-                for (const stepName in data.steps) {
-                    if (data.steps.hasOwnProperty(stepName)) {
-                        const step = data.steps[stepName];
-                        resultsHTML += `<h4>Step: ${stepName}</h4>`;
-                        // Add explanation if available
-                        if (stepExplanations[stepName]) {
-                            resultsHTML += `<p>${stepExplanations[stepName]}</p>`;
-                        }
-                        if (step.hypotheses) {
-                            resultsHTML += `<h5>Hypotheses:</h5><ul>`;
-                            step.hypotheses.sort((a, b) => b.elo_score - a.elo_score).forEach(hypo => {
-                                resultsHTML += `<li>
-                                    <strong>${hypo.title}</strong> (ID: ${hypo.id}, Elo: ${hypo.elo_score.toFixed(2)})<br>`;
-                                if (hypo.parent_ids && hypo.parent_ids.length > 0) {
-                                    resultsHTML += `<em>Parent IDs: ${hypo.parent_ids.join(', ')}</em><br>`;
-                                }
-                                resultsHTML += `<p>${hypo.text}</p>`;
-                                if (hypo.novelty_review) {
-                                    resultsHTML += `<p>Novelty: ${hypo.novelty_review}</p>`;
-                                }
-                                if (hypo.feasibility_review){
-                                    resultsHTML += `<p>Feasibility: ${hypo.feasibility_review}</p>`;
-                                }
-                                if (hypo.review_comments && hypo.review_comments.length > 0) {
-                                    resultsHTML += `<p>Review Comments:</p><ul>`;
-                                    hypo.review_comments.forEach(comment => {
-                                        resultsHTML += `<li>${comment}</li>`;
-                                    });
-                                    resultsHTML += `</ul>`;
-                                }
-                                if (hypo.references && hypo.references.length > 0) {
-                                    resultsHTML += `<p>References:</p><ul>`;
-                                    hypo.references.forEach(ref => {
-                                        resultsHTML += `<li>${ref}</li>`;
-                                    });
-                                    resultsHTML += `</ul>`;
-                                }
-                                resultsHTML += `</li>`;
-                            });
-                            resultsHTML += `</ul>`;
-                        }
-                        if (stepName.startsWith("ranking") && step.tournament_results){
-                            resultsHTML += '<h5>Ranking Results</h5>';
-                            resultsHTML += '<ul>';
-                            for (let i = 0; i < step.tournament_results.length; i++){
-                                const result = step.tournament_results[i];
-                                resultsHTML += `<li>${result.winner} beat ${result.loser}</li>`;
-                            }
-                            resultsHTML += '</ul>';
-                        }
-                        // Handle graph data from proximity step
-                        if (stepName === "proximity" && step.nodes_str && step.edges_str) {
-                            resultsHTML += `<h5>Hypothesis Similarity Graph:</h5>`;
-                            // Add the container div for the graph
-                            resultsHTML += `<div id="mynetwork"></div>`;
-                            resultsHTML += `<p>
-                                <b>How to read the graph:</b><br>
-                                - Each node (circle) represents an item.<br>
-                                - Lines (edges) between nodes indicate a relationship.<br>
-                                - The number on each edge represents the similarity score between the connected nodes. Higher numbers mean greater similarity. Only similarities above 0.2 are shown.<br>
-                            </p>`;
-                            // Store data for later initialization
-                            graphData = { nodesStr: step.nodes_str, edgesStr: step.edges_str };
-                        } else if (stepName === "proximity" && step.adjacency_graph) {
-                             // Fallback if only adjacency graph is available
-                             resultsHTML += `<p>Adjacency Graph (raw): ${JSON.stringify(step.adjacency_graph)}</p>`;
-                        }
-                    }
-                }
-                // Display meta-review information
-                if (data.meta_review.meta_review_critique && data.meta_review.meta_review_critique.length > 0) {
-                    resultsHTML += `<h4>Meta-Review Critique:</h4><ul>`;
-                    data.meta_review.meta_review_critique.forEach(item => {
-                        resultsHTML += `<li>${item}</li>`;
-                    });
-                    resultsHTML += `</ul>`;
-                }
-                if (data.meta_review.research_overview && data.meta_review.research_overview.suggested_next_steps.length > 0) {
-                    resultsHTML += `<h4>Suggested Next Steps:</h4><ul>`;
-                    data.meta_review.research_overview.suggested_next_steps.forEach(item => {
-                        resultsHTML += `<li>${item}</li>`;
-                    });
-                    resultsHTML += `</ul>`;
-                }
-                document.getElementById('results').innerHTML = resultsHTML;
-                // Initialize the graph if data is available
-                if (typeof graphData !== 'undefined' && graphData.nodesStr && graphData.edgesStr) {
-                    initializeGraph(graphData.nodesStr, graphData.edgesStr);
-                }
-            }
-            // Function to initialize the Vis.js graph
-            function initializeGraph(nodesStr, edgesStr) {
-                try {
-                    // IMPORTANT: Need to parse the string data into actual JS arrays/objects
-                    // This assumes the strings are valid JS array content (e.g., "{id: 'H1'}, {id: 'H2'}")
-                    // We wrap them in [] and use Function constructor for safe evaluation
-                    const nodesArray = new Function(`return [${nodesStr}]`)();
-                    const edgesArray = new Function(`return [${edgesStr}]`)();
-                    var nodes = new vis.DataSet(nodesArray);
-                    var edges = new vis.DataSet(edgesArray);
-                    var container = document.getElementById('mynetwork');
-                    if (!container) {
-                        console.error("Graph container #mynetwork not found!");
-                        return;
-                    }
-                    var data = {
-                        nodes: nodes,
-                        edges: edges
-                    };
-                    var options = {
-                        edges: {
-                            smooth: {
-                                enabled: true,
-                                type: "dynamic",
-                            },
-                            font: {
-                                size: 12,
-                                align: 'middle'
-                            }
-                        },
-                        nodes: {
-                            shape: 'circle',
-                            font: {
-                                size: 14
-                            }
-                        },
-                        physics: { // Add physics for better layout
-                            stabilization: true,
-                            barnesHut: {
-                                gravitationalConstant: -2000,
-                                centralGravity: 0.3,
-                                springLength: 150,
-                                springConstant: 0.04,
-                            }
-                        }
-                    };
-                    var network = new vis.Network(container, data, options);
-                } catch (e) {
-                    console.error("Error initializing Vis.js graph:", e);
-                    document.getElementById('errors').innerHTML += `<p>Error initializing graph: ${e.message}</p>`;
-                }
-            }
-        </script>
-    </body>
-    </html>
-    """)
-###############################################################################
-# Main Entrypoint
-###############################################################################
-if __name__ == "__main__":
-    # Run with: uvicorn this_script:app --host 0.0.0.0 --port 8000
-    uvicorn.run("main:app", host=config["fastapi_host"], port=config["fastapi_port"], reload=False)