Chunhua Liao commited on
Commit
9a12a1d
·
1 Parent(s): 208f2ab

Refactor: Move application code into 'app' package

Browse files
Files changed (9) hide show
  1. README.md +5 -2
  2. app/__init__.py +1 -0
  3. app/agents.py +414 -0
  4. app/api.py +315 -0
  5. app/config.py +37 -0
  6. app/main.py +40 -0
  7. app/models.py +90 -0
  8. app/utils.py +174 -0
  9. main.py +0 -1174
README.md CHANGED
@@ -48,9 +48,12 @@ Original code was generated by o3-mini-high
48
  ```
49
  3. **Run the Application:**
50
  ```bash
51
- python main.py
 
52
  ```
53
- (Alternatively, if you want hot-reloading during development: `uvicorn main:app --host 0.0.0.0 --port 8000 --reload`)
 
 
54
  4. **Access the Web Interface:**
55
  Open a web browser and go to `http://localhost:8000`. (Note: The server log may show `http://0.0.0.0:8000`, which means the server is listening on all network interfaces. However, you should use `localhost` in your browser to access the server from your local machine. You cannot directly type `0.0.0.0` into your browser's address bar.)
56
  5. **Enter Research Goal:**
 
48
  ```
49
  3. **Run the Application:**
50
  ```bash
51
+ # Run using Uvicorn, specifying the app location within the package
52
+ uvicorn app.api:app --host 0.0.0.0 --port 8000
53
  ```
54
+ (Alternatively, if you want hot-reloading during development: `uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload`)
55
+
56
+ (You can also potentially run `python -m app.main` if the `app/main.py` is set up correctly for module execution, but the `uvicorn` command is more standard for FastAPI.)
57
  4. **Access the Web Interface:**
58
  Open a web browser and go to `http://localhost:8000`. (Note: The server log may show `http://0.0.0.0:8000`, which means the server is listening on all network interfaces. However, you should use `localhost` in your browser to access the server from your local machine. You cannot directly type `0.0.0.0` into your browser's address bar.)
59
  5. **Enter Research Goal:**
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'app' directory a Python package.
app/agents.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import math
3
+ import json
4
+ from typing import List, Dict
5
+
6
+ # Import necessary components from other modules
7
+ from .models import Hypothesis, ResearchGoal, ContextMemory
8
+ from .utils import (
9
+ logger, # Use the logger configured in utils
10
+ call_llm,
11
+ generate_unique_id,
12
+ similarity_score,
13
+ generate_visjs_data
14
+ )
15
+ from .config import config
16
+
17
+ # --- Agent-Specific LLM Calls (Moved from main.py/utils.py for better cohesion) ---
18
+
19
+ def call_llm_for_generation(prompt: str, num_hypotheses: int = 3) -> List[Dict]:
20
+ """Calls LLM for generating hypotheses, handling JSON parsing."""
21
+ logger.info("LLM generation called with prompt: %s, num_hypotheses: %d", prompt, num_hypotheses)
22
+ full_prompt = prompt + "\n\nPlease return the response as a JSON array of objects, where each object has a 'title' and 'text' key."
23
+
24
+ response = call_llm(full_prompt, temperature=config.get("step_temperatures", {}).get("generation", 0.7))
25
+ logger.info("LLM generation response: %s", response)
26
+
27
+ if response.startswith("Error:"):
28
+ logger.error(f"LLM generation call failed: {response}")
29
+ return [{"title": "Error", "text": response}]
30
+
31
+ try:
32
+ response = response.strip()
33
+ if response.startswith("```json"):
34
+ response = response[7:]
35
+ if response.endswith("```"):
36
+ response = response[:-3]
37
+ response = response.strip()
38
+
39
+ hypotheses_data = json.loads(response)
40
+
41
+ if not isinstance(hypotheses_data, list) or not all(isinstance(h, dict) and "title" in h and "text" in h for h in hypotheses_data):
42
+ error_message = "Invalid JSON format: Expected a list of objects with 'title' and 'text' keys."
43
+ raise ValueError(error_message)
44
+ logger.info("Parsed generated hypotheses: %s", hypotheses_data)
45
+ return hypotheses_data
46
+ except (json.JSONDecodeError, ValueError) as e:
47
+ logger.error("Could not parse LLM generation response as JSON: %s", response, exc_info=True)
48
+ return [{"title": "Error", "text": f"Could not parse LLM response: {e}"}]
49
+
50
+ def call_llm_for_reflection(hypothesis_text: str) -> Dict:
51
+ """Calls LLM for reviewing a hypothesis, handling JSON parsing."""
52
+ prompt = (
53
+ f"Review the following hypothesis and provide a novelty assessment (HIGH, MEDIUM, or LOW), "
54
+ f"a feasibility assessment (HIGH, MEDIUM, or LOW), a comment, and a list of references (PMIDs) in JSON format:\n\n"
55
+ f"Hypothesis: {hypothesis_text}\n\n"
56
+ f"Return the response as a JSON object with the following keys: 'novelty_review', 'feasibility_review', 'comment', 'references'."
57
+ )
58
+ response = call_llm(prompt, temperature=config.get("step_temperatures", {}).get("reflection", 0.5)) # Example: different temp
59
+ logger.info("LLM reflection response for hypothesis: %s", response)
60
+
61
+ if response.startswith("Error:"):
62
+ logger.error(f"LLM reflection call failed: {response}")
63
+ return {"novelty_review": "ERROR", "feasibility_review": "ERROR", "comment": response, "references": []}
64
+
65
+ # Default values
66
+ review_data = {
67
+ "novelty_review": "MEDIUM",
68
+ "feasibility_review": "MEDIUM",
69
+ "comment": "Could not parse LLM response.",
70
+ "references": [],
71
+ }
72
+
73
+ try:
74
+ response = response.strip()
75
+ if response.startswith("```json"):
76
+ response = response[7:]
77
+ if response.endswith("```"):
78
+ response = response[:-3]
79
+ response = response.strip()
80
+
81
+ parsed_data = json.loads(response)
82
+
83
+ # Update defaults with parsed data, performing basic validation
84
+ novelty = parsed_data.get("novelty_review", "MEDIUM").upper()
85
+ if novelty in ["HIGH", "MEDIUM", "LOW"]:
86
+ review_data["novelty_review"] = novelty
87
+ else:
88
+ logger.warning("Invalid novelty review value received: %s", novelty)
89
+
90
+ feasibility = parsed_data.get("feasibility_review", "MEDIUM").upper()
91
+ if feasibility in ["HIGH", "MEDIUM", "LOW"]:
92
+ review_data["feasibility_review"] = feasibility
93
+ else:
94
+ logger.warning("Invalid feasibility review value received: %s", feasibility)
95
+
96
+ review_data["comment"] = parsed_data.get("comment", "No comment provided.")
97
+ review_data["references"] = parsed_data.get("references", [])
98
+ if not isinstance(review_data["references"], list):
99
+ logger.warning("Invalid references format received: %s", review_data["references"])
100
+ review_data["references"] = []
101
+
102
+
103
+ except (json.JSONDecodeError, AttributeError, KeyError) as e:
104
+ logger.warning("Error parsing LLM reflection response: %s", response, exc_info=True)
105
+ review_data["comment"] = f"Could not parse LLM response: {e}" # Update comment with error
106
+
107
+ logger.info("Parsed reflection data: %s", review_data)
108
+ return review_data
109
+
110
+
111
+ # --- Ranking Helpers (Moved from main.py) ---
112
+
113
+ def run_pairwise_debate(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
114
+ """Compares two hypotheses based on novelty and feasibility scores."""
115
+ def score(h: Hypothesis) -> int:
116
+ mapping = {"HIGH": 3, "MEDIUM": 2, "LOW": 1, None: 0, "ERROR": 0} # Handle ERROR case
117
+ score_novelty = mapping.get(h.novelty_review, 0) if isinstance(h.novelty_review, str) else 0
118
+ score_feasibility = mapping.get(h.feasibility_review, 0) if isinstance(h.feasibility_review, str) else 0
119
+ return score_novelty + score_feasibility
120
+
121
+ scoreA = score(hypoA)
122
+ scoreB = score(hypoB)
123
+
124
+ if scoreA > scoreB:
125
+ winner = hypoA
126
+ elif scoreB > scoreA:
127
+ winner = hypoB
128
+ else:
129
+ winner = random.choice([hypoA, hypoB]) # Tie-breaker
130
+
131
+ logger.info("Debate: %s (score %d) vs %s (score %d) => Winner: %s",
132
+ hypoA.hypothesis_id, scoreA, hypoB.hypothesis_id, scoreB, winner.hypothesis_id)
133
+ return winner
134
+
135
+ def update_elo(winner: Hypothesis, loser: Hypothesis):
136
+ """Updates Elo scores after a comparison."""
137
+ k_factor = config.get("elo_k_factor", 32)
138
+ ratingA = winner.elo_score
139
+ ratingB = loser.elo_score
140
+ expectedA = 1 / (1 + math.pow(10, (ratingB - ratingA) / 400))
141
+ expectedB = 1 - expectedA # Or 1 / (1 + math.pow(10, (ratingA - ratingB) / 400))
142
+ winner.elo_score = ratingA + k_factor * (1 - expectedA)
143
+ loser.elo_score = ratingB + k_factor * (0 - expectedB) # Loser's score update
144
+ logger.info("Updated Elo: Winner %s -> %.2f, Loser %s -> %.2f",
145
+ winner.hypothesis_id, winner.elo_score, loser.hypothesis_id, loser.elo_score)
146
+
147
+ # --- Evolution Helper (Moved from main.py) ---
148
+
149
+ def combine_hypotheses(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
150
+ """Combines two hypotheses into a new one."""
151
+ new_id = generate_unique_id("E") # Use utility function
152
+ combined_title = f"Combined: {hypoA.title} & {hypoB.title}"
153
+ # Consider a more sophisticated combination prompt/logic if needed
154
+ combined_text = f"Combination of:\n1. {hypoA.text}\n2. {hypoB.text}"
155
+ logger.info("Combining hypotheses %s and %s into %s", hypoA.hypothesis_id, hypoB.hypothesis_id, new_id)
156
+ new_hypothesis = Hypothesis(new_id, combined_title, combined_text)
157
+ new_hypothesis.parent_ids = [hypoA.hypothesis_id, hypoB.hypothesis_id]
158
+ return new_hypothesis
159
+
160
+
161
+ ###############################################################################
162
+ # Agent Implementations
163
+ ###############################################################################
164
+
165
+ class GenerationAgent:
166
+ def generate_new_hypotheses(self, research_goal: ResearchGoal, context: ContextMemory) -> List[Hypothesis]:
167
+ """Generates new hypotheses using LLM."""
168
+ num_to_generate = config.get("num_hypotheses", 3)
169
+ prompt = (
170
+ f"Research Goal: {research_goal.description}\n"
171
+ f"Constraints: {research_goal.constraints}\n"
172
+ f"Existing Hypothesis IDs: {list(context.hypotheses.keys())}\n" # Provide context
173
+ f"Please propose {num_to_generate} novel and feasible hypotheses with rationale, avoiding duplication with existing IDs.\n"
174
+ )
175
+ raw_output = call_llm_for_generation(prompt, num_hypotheses=num_to_generate)
176
+ new_hypos = []
177
+ for idea in raw_output:
178
+ # Check for error response from LLM call
179
+ if idea["title"] == "Error":
180
+ logger.error("Skipping hypothesis generation due to LLM error: %s", idea["text"])
181
+ continue # Skip this one, maybe add placeholder?
182
+
183
+ hypo_id = generate_unique_id("G")
184
+ # Ensure ID is unique within the current context
185
+ while hypo_id in context.hypotheses:
186
+ hypo_id = generate_unique_id("G")
187
+ h = Hypothesis(hypo_id, idea["title"], idea["text"])
188
+ logger.info("Generated hypothesis: %s", h.to_dict())
189
+ new_hypos.append(h)
190
+ return new_hypos
191
+
192
+ class ReflectionAgent:
193
+ def review_hypotheses(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
194
+ """Reviews hypotheses using LLM."""
195
+ for h in hypotheses:
196
+ # Avoid re-reviewing if already reviewed (optional optimization)
197
+ # if h.novelty_review is not None and h.feasibility_review is not None:
198
+ # continue
199
+ result = call_llm_for_reflection(h.text)
200
+ h.novelty_review = result["novelty_review"]
201
+ h.feasibility_review = result["feasibility_review"]
202
+ # Append comment only if it's not the default error message
203
+ if result["comment"] != "Could not parse LLM response.":
204
+ h.review_comments.append(result["comment"])
205
+ # Only extend references if the list is not empty
206
+ if result["references"]:
207
+ h.references.extend(result["references"])
208
+ logger.info("Reviewed hypothesis: %s, Novelty: %s, Feasibility: %s", h.hypothesis_id, h.novelty_review, h.feasibility_review)
209
+
210
+ class RankingAgent:
211
+ def run_tournament(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
212
+ """Runs a pairwise tournament to rank hypotheses."""
213
+ if len(hypotheses) < 2:
214
+ logger.info("Not enough hypotheses to run a tournament.")
215
+ return
216
+
217
+ active_hypotheses = [h for h in hypotheses if h.is_active]
218
+ if len(active_hypotheses) < 2:
219
+ logger.info("Not enough *active* hypotheses to run a tournament.")
220
+ return
221
+
222
+ random.shuffle(active_hypotheses) # Shuffle only active ones
223
+
224
+ # Simple round-robin: each active hypothesis debates every other active one once
225
+ pairs = []
226
+ for i in range(len(active_hypotheses)):
227
+ for j in range(i + 1, len(active_hypotheses)):
228
+ pairs.append((active_hypotheses[i], active_hypotheses[j]))
229
+
230
+ logger.info(f"Running tournament with {len(pairs)} pairs.")
231
+ for hA, hB in pairs:
232
+ winner = run_pairwise_debate(hA, hB)
233
+ loser = hB if winner == hA else hA
234
+ update_elo(winner, loser)
235
+ # Record result in context (consider if this needs iteration info)
236
+ context.tournament_results.append({
237
+ "iteration": context.iteration_number, # Add iteration number
238
+ "winner": winner.hypothesis_id,
239
+ "loser": loser.hypothesis_id,
240
+ "winner_score_after": winner.elo_score,
241
+ "loser_score_after": loser.elo_score
242
+ })
243
+
244
+ class EvolutionAgent:
245
+ def evolve_hypotheses(self, context: ContextMemory) -> List[Hypothesis]:
246
+ """Evolves hypotheses by combining top candidates."""
247
+ top_k = config.get("top_k_hypotheses", 2)
248
+ active = context.get_active_hypotheses()
249
+ if len(active) < 2:
250
+ logger.info("Not enough active hypotheses to perform evolution.")
251
+ return []
252
+
253
+ sorted_by_elo = sorted(active, key=lambda h: h.elo_score, reverse=True)
254
+ top_candidates = sorted_by_elo[:top_k]
255
+
256
+ new_hypotheses = []
257
+ # Combine the top two for now, could be extended
258
+ if len(top_candidates) >= 2:
259
+ # Optional: Add check to prevent combining very similar hypotheses
260
+ # sim = similarity_score(top_candidates[0].text, top_candidates[1].text)
261
+ # if sim < 0.8: # Example threshold
262
+ new_h = combine_hypotheses(top_candidates[0], top_candidates[1])
263
+ logger.info("Evolved hypothesis created: %s from parents %s", new_h.hypothesis_id, new_h.parent_ids)
264
+ new_hypotheses.append(new_h)
265
+ # else:
266
+ # logger.info("Skipping evolution: Top 2 hypotheses are too similar (score: %.2f)", sim)
267
+
268
+ return new_hypotheses
269
+
270
+ class ProximityAgent:
271
+ def build_proximity_graph(self, context: ContextMemory) -> Dict:
272
+ """Builds proximity graph data based on hypothesis similarity."""
273
+ active_hypotheses = context.get_active_hypotheses()
274
+ adjacency = {}
275
+ if not active_hypotheses:
276
+ logger.info("No active hypotheses to build proximity graph.")
277
+ return {"adjacency_graph": {}, "nodes_str": "", "edges_str": ""}
278
+
279
+ for i in range(len(active_hypotheses)):
280
+ hypo_i = active_hypotheses[i]
281
+ adjacency[hypo_i.hypothesis_id] = []
282
+ for j in range(len(active_hypotheses)):
283
+ if i == j:
284
+ continue
285
+ hypo_j = active_hypotheses[j]
286
+ if hypo_i.text and hypo_j.text:
287
+ sim = similarity_score(hypo_i.text, hypo_j.text)
288
+ adjacency[hypo_i.hypothesis_id].append({
289
+ "other_id": hypo_j.hypothesis_id,
290
+ "similarity": sim
291
+ })
292
+ else:
293
+ logger.warning(f"Skipping similarity for {hypo_i.hypothesis_id} or {hypo_j.hypothesis_id} due to empty text.")
294
+
295
+ visjs_data = generate_visjs_data(adjacency) # Use utility function
296
+ logger.info("Built proximity graph adjacency with %d nodes.", len(active_hypotheses))
297
+ return {
298
+ "adjacency_graph": adjacency,
299
+ "nodes_str": visjs_data["nodes_str"],
300
+ "edges_str": visjs_data["edges_str"]
301
+ }
302
+
303
+ class MetaReviewAgent:
304
+ def summarize_and_feedback(self, context: ContextMemory, adjacency: Dict) -> Dict:
305
+ """Summarizes research state and provides feedback."""
306
+ active_hypotheses = context.get_active_hypotheses()
307
+ if not active_hypotheses:
308
+ return {"meta_review_critique": ["No active hypotheses."], "research_overview": {"top_ranked_hypotheses": [], "suggested_next_steps": []}}
309
+
310
+ comment_summary = set()
311
+ for h in active_hypotheses:
312
+ # Example critique based on reviews
313
+ if h.novelty_review == "LOW":
314
+ comment_summary.add("Some ideas lack novelty.")
315
+ if h.feasibility_review == "LOW":
316
+ comment_summary.add("Some ideas may have low feasibility.")
317
+ # Could add critiques based on adjacency graph (e.g., clusters, outliers)
318
+
319
+ best_hypotheses = sorted(active_hypotheses, key=lambda h: h.elo_score, reverse=True)[:3]
320
+ logger.info("Top hypotheses for meta-review: %s", [h.hypothesis_id for h in best_hypotheses])
321
+
322
+ # Example suggested next steps
323
+ next_steps = [
324
+ "Refine top hypotheses based on review comments.",
325
+ "Consider exploring areas with fewer, less connected hypotheses (if any).",
326
+ "Seek external expert feedback on top candidates."
327
+ ]
328
+ if not comment_summary:
329
+ comment_summary.add("Overall hypothesis quality seems reasonable based on automated review.")
330
+
331
+
332
+ overview = {
333
+ "meta_review_critique": list(comment_summary),
334
+ "research_overview": {
335
+ "top_ranked_hypotheses": [h.to_dict() for h in best_hypotheses], # Use to_dict for serialization
336
+ "suggested_next_steps": next_steps
337
+ }
338
+ }
339
+ context.meta_review_feedback.append(overview) # Store feedback in context
340
+ logger.info("Meta-review complete: %s", overview)
341
+ return overview
342
+
343
+ class SupervisorAgent:
344
+ """Orchestrates the AI Co-Scientist workflow."""
345
+ def __init__(self):
346
+ self.generation_agent = GenerationAgent()
347
+ self.reflection_agent = ReflectionAgent()
348
+ self.ranking_agent = RankingAgent()
349
+ self.evolution_agent = EvolutionAgent()
350
+ self.proximity_agent = ProximityAgent()
351
+ self.meta_review_agent = MetaReviewAgent()
352
+
353
+ def run_cycle(self, research_goal: ResearchGoal, context: ContextMemory) -> Dict:
354
+ """Runs a single cycle of hypothesis generation and refinement."""
355
+ logger.info("--- Starting Cycle %d ---", context.iteration_number + 1)
356
+ cycle_details = {"iteration": context.iteration_number + 1, "steps": {}, "meta_review": {}}
357
+
358
+ # 1. Generation
359
+ logger.info("Step 1: Generation")
360
+ new_hypotheses = self.generation_agent.generate_new_hypotheses(research_goal, context)
361
+ for nh in new_hypotheses:
362
+ context.add_hypothesis(nh) # Add to central context
363
+ cycle_details["steps"]["generation"] = {"hypotheses": [h.to_dict() for h in new_hypotheses]}
364
+
365
+ # Get all active hypotheses for subsequent steps
366
+ active_hypos = context.get_active_hypotheses()
367
+
368
+ # 2. Reflection
369
+ logger.info("Step 2: Reflection")
370
+ self.reflection_agent.review_hypotheses(active_hypos, context) # Review all active hypotheses
371
+ cycle_details["steps"]["reflection"] = {"hypotheses": [h.to_dict() for h in active_hypos]} # Log state after review
372
+
373
+ # 3. Ranking (Tournament 1)
374
+ logger.info("Step 3: Ranking 1")
375
+ self.ranking_agent.run_tournament(active_hypos, context)
376
+ # Log state after ranking (Elo scores updated)
377
+ cycle_details["steps"]["ranking1"] = {"hypotheses": [h.to_dict() for h in active_hypos]}
378
+
379
+ # 4. Evolution
380
+ logger.info("Step 4: Evolution")
381
+ evolved_hypotheses = self.evolution_agent.evolve_hypotheses(context)
382
+ if evolved_hypotheses:
383
+ for eh in evolved_hypotheses:
384
+ context.add_hypothesis(eh)
385
+ # 4a. Review newly evolved hypotheses immediately
386
+ logger.info("Step 4a: Reviewing Evolved Hypotheses")
387
+ self.reflection_agent.review_hypotheses(evolved_hypotheses, context)
388
+ # Update active list for next steps
389
+ active_hypos = context.get_active_hypotheses()
390
+ cycle_details["steps"]["evolution"] = {"hypotheses": [h.to_dict() for h in evolved_hypotheses]}
391
+
392
+ # 5. Ranking (Tournament 2 - includes evolved)
393
+ logger.info("Step 5: Ranking 2")
394
+ self.ranking_agent.run_tournament(active_hypos, context)
395
+ cycle_details["steps"]["ranking2"] = {"hypotheses": [h.to_dict() for h in active_hypos]}
396
+
397
+ # 6. Proximity Analysis
398
+ logger.info("Step 6: Proximity Analysis")
399
+ proximity_result = self.proximity_agent.build_proximity_graph(context) # Pass context
400
+ cycle_details["steps"]["proximity"] = {
401
+ "adjacency_graph": proximity_result["adjacency_graph"],
402
+ "nodes_str": proximity_result["nodes_str"],
403
+ "edges_str": proximity_result["edges_str"]
404
+ }
405
+
406
+ # 7. Meta-review
407
+ logger.info("Step 7: Meta-Review")
408
+ overview = self.meta_review_agent.summarize_and_feedback(context, proximity_result["adjacency_graph"])
409
+ cycle_details["meta_review"] = overview
410
+
411
+ # Increment iteration number at the end of the cycle
412
+ context.iteration_number += 1
413
+ logger.info("--- Cycle %d Complete ---", context.iteration_number)
414
+ return cycle_details
app/api.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from typing import List, Optional, Dict
3
+
4
+ from fastapi import FastAPI, HTTPException, responses
5
+ from fastapi.staticfiles import StaticFiles
6
+
7
+ # Import components from other modules in the package
8
+ from .models import (
9
+ ContextMemory, ResearchGoal, ResearchGoalRequest,
10
+ HypothesisResponse, Hypothesis # Hypothesis needed by ContextMemory
11
+ )
12
+ from .agents import SupervisorAgent
13
+ from .utils import logger # Use the configured logger
14
+ # from .config import config # Config might be needed if endpoints use it directly
15
+
16
+ ###############################################################################
17
+ # FastAPI Application Setup
18
+ ###############################################################################
19
+
20
+ app = FastAPI(title="AI Co-Scientist System", version="1.0")
21
+
22
+ # --- Global State (Consider alternatives for production) ---
23
+ # These globals make the app stateful, which can be problematic for scaling.
24
+ # For simple cases or demos, it might be acceptable.
25
+ # Alternatives: Dependency Injection with classes, external storage (DB, Redis).
26
+ global_context = ContextMemory()
27
+ supervisor = SupervisorAgent()
28
+ current_research_goal: Optional[ResearchGoal] = None
29
+
30
+ # --- Static Files ---
31
+ # Assuming a 'static' directory exists at the project root
32
+ # If it should be inside 'app', adjust the path: StaticFiles(directory="app/static")
33
+ try:
34
+ app.mount("/static", StaticFiles(directory="static"), name="static")
35
+ logger.info("Mounted static files directory.")
36
+ except RuntimeError as e:
37
+ logger.warning(f"Could not mount static directory (may not exist): {e}")
38
+
39
+
40
+ ###############################################################################
41
+ # API Endpoints
42
+ ###############################################################################
43
+
44
+ @app.post("/research_goal", response_model=dict)
45
+ def set_research_goal(goal: ResearchGoalRequest):
46
+ """Sets the research goal and resets the context."""
47
+ global current_research_goal, global_context
48
+ logger.info(f"Received new research goal: {goal.description}")
49
+ current_research_goal = ResearchGoal(goal.description, goal.constraints)
50
+ # Reset context for the new goal
51
+ global_context = ContextMemory()
52
+ logger.info("Global context reset for new research goal.")
53
+ # Note: Logger setup per request might be better handled via middleware or dependency
54
+ # timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
55
+ # log_filename = f"log_{timestamp}.txt" # This will create many log files
56
+ # setup_logger(log_filename) # Consider if logger needs reconfiguration per goal
57
+
58
+ return {"message": "Research goal successfully set. Ready to run cycles."}
59
+
60
+ @app.post("/run_cycle", response_model=Dict) # Return type might be more specific, e.g., CycleResponse
61
+ def run_cycle_endpoint():
62
+ """Runs a single cycle of the AI Co-Scientist workflow."""
63
+ global current_research_goal, global_context, supervisor
64
+ if not current_research_goal:
65
+ logger.error("Run cycle called before setting research goal.")
66
+ raise HTTPException(status_code=400, detail="No research goal set. Please POST to /research_goal first.")
67
+
68
+ logger.info(f"Running cycle {global_context.iteration_number + 1} for goal: {current_research_goal.description}")
69
+ try:
70
+ # The supervisor agent now handles the full cycle logic
71
+ cycle_details = supervisor.run_cycle(current_research_goal, global_context)
72
+ logger.info(f"Cycle {global_context.iteration_number} complete.") # Iteration number was incremented in run_cycle
73
+ return cycle_details
74
+ except Exception as e:
75
+ logger.error(f"Error during cycle execution: {e}", exc_info=True)
76
+ raise HTTPException(status_code=500, detail=f"An internal error occurred during cycle execution: {e}")
77
+
78
+
79
+ @app.get("/hypotheses", response_model=List[HypothesisResponse])
80
+ def list_hypotheses_endpoint():
81
+ """Retrieves a list of all currently active hypotheses."""
82
+ global global_context
83
+ active_hypotheses = global_context.get_active_hypotheses()
84
+ logger.info(f"Retrieving {len(active_hypotheses)} active hypotheses.")
85
+ # Convert Hypothesis objects to dicts using .to_dict() before creating HypothesisResponse
86
+ # Pydantic should handle the conversion if the fields match, but explicit is safer
87
+ return [HypothesisResponse(**h.to_dict()) for h in active_hypotheses]
88
+
89
+ @app.get("/")
90
+ async def root_endpoint():
91
+ """Serves the main HTML page."""
92
+ logger.debug("Serving root HTML page.")
93
+ # HTML content remains largely the same, ensure JS function names match
94
+ return responses.HTMLResponse(content="""
95
+ <!DOCTYPE html>
96
+ <html>
97
+ <head>
98
+ <title>AI Co-Scientist</title>
99
+ <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
100
+ <style>
101
+ body { font-family: sans-serif; margin: 20px; }
102
+ textarea { width: 90%; }
103
+ button { margin-top: 10px; padding: 8px 15px; }
104
+ #results { margin-top: 20px; border-top: 1px solid #eee; padding-top: 20px; }
105
+ #errors { color: red; margin-top: 10px; }
106
+ h2, h3, h4, h5 { margin-top: 1.5em; }
107
+ ul { padding-left: 20px; }
108
+ li { margin-bottom: 10px; }
109
+ #mynetwork {
110
+ width: 100%;
111
+ height: 500px; /* Explicit height */
112
+ border: 1px solid lightgray;
113
+ margin-bottom: 10px;
114
+ }
115
+ .graph-explanation p {
116
+ margin-top: 0;
117
+ margin-bottom: 20px;
118
+ font-size: 0.9em;
119
+ color: #555;
120
+ }
121
+ </style>
122
+ </head>
123
+ <body>
124
+ <h1>Welcome to the AI Co-Scientist System</h1>
125
+ <p>Set your research goal and run cycles to generate hypotheses.</p>
126
+
127
+ <label for="researchGoal">Research Goal:</label><br>
128
+ <textarea id="researchGoal" name="researchGoal" rows="4" cols="50"></textarea><br><br>
129
+ <button onclick="submitResearchGoal()">Submit Research Goal</button>
130
+ <button onclick="runCycle()">Run Next Cycle</button> <!-- Added manual run button -->
131
+
132
+ <h2>Results</h2>
133
+ <div id="results"><p>Submit a research goal to begin.</p></div>
134
+
135
+ <h2>Errors</h2>
136
+ <div id="errors"></div>
137
+
138
+ <script>
139
+ let currentIteration = 0; // Keep track of the iteration
140
+
141
+ async function submitResearchGoal() {
142
+ const researchGoal = document.getElementById('researchGoal').value;
143
+ if (!researchGoal.trim()) {
144
+ document.getElementById('errors').innerHTML = '<p>Please enter a research goal.</p>';
145
+ return;
146
+ }
147
+ document.getElementById('results').innerHTML = '<p>Setting research goal...</p>';
148
+ document.getElementById('errors').innerHTML = '';
149
+ currentIteration = 0; // Reset iteration count
150
+
151
+ try {
152
+ const response = await fetch('/research_goal', {
153
+ method: 'POST',
154
+ headers: {'Content-Type': 'application/json'},
155
+ body: JSON.stringify({ description: researchGoal })
156
+ });
157
+
158
+ if (!response.ok) {
159
+ const errorData = await response.json();
160
+ throw new Error(errorData.detail || `HTTP error! status: ${response.status}`);
161
+ }
162
+
163
+ const data = await response.json();
164
+ document.getElementById('results').innerHTML = `<p>${data.message}</p><p>Running first cycle...</p>`;
165
+ runCycle(); // Automatically run the first cycle
166
+ } catch (error) {
167
+ console.error('Error submitting research goal:', error);
168
+ document.getElementById('errors').innerHTML = `<p>Error: ${error.message}</p>`;
169
+ document.getElementById('results').innerHTML = ''; // Clear results area on error
170
+ }
171
+ }
172
+
173
+ async function runCycle() {
174
+ document.getElementById('errors').innerHTML = ''; // Clear previous errors
175
+ const resultsDiv = document.getElementById('results');
176
+ // Append status message if it's not the first auto-run
177
+ if (currentIteration > 0 || !resultsDiv.innerHTML.includes("Running first cycle")) {
178
+ resultsDiv.innerHTML += `<p>Running cycle ${currentIteration + 1}...</p>`;
179
+ }
180
+
181
+
182
+ try {
183
+ const response = await fetch('/run_cycle', { method: 'POST' });
184
+
185
+ if (!response.ok) {
186
+ const errorData = await response.json();
187
+ throw new Error(errorData.detail || `HTTP error! status: ${response.status}`);
188
+ }
189
+
190
+ const data = await response.json();
191
+ currentIteration = data.iteration; // Update iteration count
192
+
193
+ let resultsHTML = `<h3>Iteration: ${data.iteration}</h3>`;
194
+ let graphData = null; // To store graph data for initialization later
195
+
196
+ const stepExplanations = { /* ... explanations ... */ }; // Keep explanations if desired
197
+
198
+ for (const stepName in data.steps) {
199
+ if (data.steps.hasOwnProperty(stepName)) {
200
+ const step = data.steps[stepName];
201
+ resultsHTML += `<h4>Step: ${stepName}</h4>`;
202
+ // Add explanation if available
203
+ // if (stepExplanations[stepName]) { resultsHTML += `<p>${stepExplanations[stepName]}</p>`; }
204
+
205
+ if (step.hypotheses && step.hypotheses.length > 0) {
206
+ resultsHTML += `<h5>Hypotheses:</h5><ul>`;
207
+ // Sort hypotheses by Elo score descending for display
208
+ step.hypotheses.sort((a, b) => b.elo_score - a.elo_score).forEach(hypo => {
209
+ resultsHTML += \`<li>
210
+ <strong>\${hypo.title}</strong> (ID: \${hypo.id}, Elo: \${hypo.elo_score.toFixed(2)})<br>\`;
211
+ if (hypo.parent_ids && hypo.parent_ids.length > 0) {
212
+ resultsHTML += \`<em>Parents: \${hypo.parent_ids.join(', ')}</em><br>\`;
213
+ }
214
+ resultsHTML += \`<p>\${hypo.text}</p>\`;
215
+ if (hypo.novelty_review) { resultsHTML += \`<p>Novelty: \${hypo.novelty_review}</p>\`; }
216
+ if (hypo.feasibility_review){ resultsHTML += \`<p>Feasibility: \${hypo.feasibility_review}</p>\`; }
217
+ // Add comments and references if needed
218
+ resultsHTML += \`</li>\`;
219
+ });
220
+ resultsHTML += `</ul>`;
221
+ } else if (step.hypotheses) {
222
+ resultsHTML += `<p>No hypotheses generated or active in this step.</p>`;
223
+ }
224
+
225
+ // Handle graph data specifically from the 'proximity' step
226
+ if (stepName === "proximity" && step.nodes_str && step.edges_str) {
227
+ resultsHTML += \`<h5>Hypothesis Similarity Graph:</h5>\`;
228
+ resultsHTML += \`<div id="mynetwork"></div>\`; // Container for the graph
229
+ resultsHTML += \`<div class="graph-explanation"><p>
230
+ <b>How to read:</b> Nodes are hypotheses. Edges show similarity > 0.2.
231
+ </p></div>\`;
232
+ // Store data for initialization after HTML is rendered
233
+ graphData = { nodesStr: step.nodes_str, edgesStr: step.edges_str };
234
+ } else if (stepName === "proximity" && step.adjacency_graph) {
235
+ resultsHTML += \`<p>Adjacency Graph (raw): \${JSON.stringify(step.adjacency_graph)}</p>\`;
236
+ }
237
+ }
238
+ }
239
+
240
+ // Display meta-review
241
+ if (data.meta_review) {
242
+ resultsHTML += `<h4>Meta-Review:</h4>`;
243
+ if (data.meta_review.meta_review_critique && data.meta_review.meta_review_critique.length > 0) {
244
+ resultsHTML += `<h5>Critique:</h5><ul>\${data.meta_review.meta_review_critique.map(item => \`<li>\${item}</li>\`).join('')}</ul>`;
245
+ }
246
+ if (data.meta_review.research_overview && data.meta_review.research_overview.suggested_next_steps.length > 0) {
247
+ resultsHTML += `<h5>Suggested Next Steps:</h5><ul>\${data.meta_review.research_overview.suggested_next_steps.map(item => \`<li>\${item}</li>\`).join('')}</ul>`;
248
+ }
249
+ }
250
+
251
+ // Update the results div content
252
+ resultsDiv.innerHTML = resultsHTML;
253
+
254
+ // Initialize the graph *after* its container is in the DOM
255
+ if (graphData) {
256
+ initializeGraph(graphData.nodesStr, graphData.edgesStr);
257
+ }
258
+
259
+ } catch (error) {
260
+ console.error('Error running cycle:', error);
261
+ document.getElementById('errors').innerHTML = `<p>Error during cycle ${currentIteration + 1}: ${error.message}</p>`;
262
+ // Optionally clear or update resultsDiv on error
263
+ resultsDiv.innerHTML += `<p>Cycle failed. See errors above.</p>`;
264
+ }
265
+ }
266
+
267
+ // Function to initialize the Vis.js graph (remains the same)
268
+ function initializeGraph(nodesStr, edgesStr) {
269
+ // Check if vis is loaded
270
+ if (typeof vis === 'undefined') {
271
+ console.error("Vis.js library not loaded!");
272
+ document.getElementById('errors').innerHTML += '<p>Error: Vis.js library failed to load.</p>';
273
+ return;
274
+ }
275
+ const container = document.getElementById('mynetwork');
276
+ if (!container) {
277
+ console.error("Graph container #mynetwork not found in DOM!");
278
+ return; // Don't proceed if container doesn't exist
279
+ }
280
+
281
+ try {
282
+ // Use Function constructor for safe parsing of JS object strings
283
+ const nodesArray = nodesStr ? new Function(\`return [\${nodesStr}]\`)() : [];
284
+ const edgesArray = edgesStr ? new Function(\`return [\${edgesStr}]\`)() : [];
285
+
286
+ var nodes = new vis.DataSet(nodesArray);
287
+ var edges = new vis.DataSet(edgesArray);
288
+
289
+ var data = { nodes: nodes, edges: edges };
290
+ var options = { /* ... vis options ... */
291
+ edges: {
292
+ smooth: { enabled: true, type: "dynamic" },
293
+ font: { size: 12, align: 'middle' }
294
+ },
295
+ nodes: {
296
+ shape: 'circle',
297
+ font: { size: 14 }
298
+ },
299
+ physics: {
300
+ stabilization: true,
301
+ barnesHut: { gravitationalConstant: -2000, centralGravity: 0.3, springLength: 150, springConstant: 0.04 }
302
+ }
303
+ };
304
+ var network = new vis.Network(container, data, options);
305
+ } catch (e) {
306
+ console.error("Error initializing Vis.js graph:", e);
307
+ document.getElementById('errors').innerHTML += `<p>Error initializing graph: ${e.message}</p>`;
308
+ // Optionally clear the graph container on error
309
+ container.innerHTML = '<p style="color:red;">Could not render graph.</p>';
310
+ }
311
+ }
312
+ </script>
313
+ </body>
314
+ </html>
315
+ """)
app/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import logging
3
+ from typing import Dict
4
+
5
+ def load_config(config_path: str = "config.yaml") -> Dict:
6
+ """Loads the configuration from the specified YAML file."""
7
+ try:
8
+ with open(config_path, "r") as f:
9
+ config_data = yaml.safe_load(f)
10
+ if not isinstance(config_data, dict):
11
+ print(f"Error: Configuration file {config_path} did not load as a dictionary.")
12
+ exit(1)
13
+ # Convert logging level string to actual level
14
+ log_level_str = config_data.get("logging_level", "INFO").upper()
15
+ config_data["logging_level"] = getattr(logging, log_level_str, logging.INFO)
16
+ return config_data
17
+ except FileNotFoundError:
18
+ print(f"Error: Configuration file not found at {config_path}")
19
+ exit(1)
20
+ except yaml.YAMLError as e:
21
+ print(f"Error parsing YAML in {config_path}: {e}")
22
+ exit(1)
23
+ except AttributeError as e:
24
+ print(f"Error: Invalid logging level '{log_level_str}' in config file")
25
+ exit(1)
26
+ except KeyError as e:
27
+ print(f"Error: Missing key in config file: {e}")
28
+ exit(1)
29
+ except Exception as e:
30
+ print(f"An unexpected error occurred while loading config: {e}")
31
+ exit(1)
32
+
33
+ # Load configuration at the start when this module is imported
34
+ config = load_config()
35
+
36
+ # Example of accessing config values (optional, for clarity)
37
+ # print(f"LLM Model from config: {config.get('llm_model')}")
app/main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ import os
3
+
4
+ # Import the FastAPI app instance from the api module
5
+ from .api import app
6
+ # Import the config dictionary from the config module
7
+ from .config import config
8
+ # Import the logger from utils (optional, if main needs logging)
9
+ from .utils import logger
10
+
11
+ # Ensure OPENROUTER_API_KEY is set before starting (optional check)
12
+ if not os.getenv("OPENROUTER_API_KEY"):
13
+ logger.warning("OPENROUTER_API_KEY environment variable is not set.")
14
+ # Depending on requirements, you might exit here or let the app handle it.
15
+ # print("Error: OPENROUTER_API_KEY environment variable must be set.")
16
+ # exit(1)
17
+
18
+ if __name__ == "__main__":
19
+ host = config.get("fastapi_host", "0.0.0.0")
20
+ port = config.get("fastapi_port", 8000)
21
+ reload_flag = config.get("uvicorn_reload", False) # Add a config option for reload
22
+
23
+ logger.info(f"Starting Uvicorn server on {host}:{port} (Reload: {reload_flag})")
24
+
25
+ # Note: When running this script directly (python app/main.py),
26
+ # Uvicorn needs the app location string relative to the execution directory.
27
+ # If run from project root: "app.api:app"
28
+ # If run from inside 'app': "api:app"
29
+ # The string "app.api:app" assumes you run `python -m app.main` from the project root,
30
+ # or configure the run environment correctly.
31
+ # A simpler approach for direct execution `python app/main.py` might be needed
32
+ # if relative imports cause issues depending on how it's run.
33
+
34
+ # Let's assume running from project root for now.
35
+ # If issues arise, might need to adjust how uvicorn is called or the project structure.
36
+ uvicorn.run(app, host=host, port=port, reload=reload_flag)
37
+
38
+ # Alternative if running `python app/main.py` directly causes import issues:
39
+ # uvicorn.run("app.api:app", host=host, port=port, reload=reload_flag)
40
+ # This tells uvicorn where to find the app object.
app/models.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List, Dict, Optional
3
+ from pydantic import BaseModel
4
+
5
+ # Assuming logger is configured elsewhere or passed in if needed within methods
6
+ # If models need logging, consider passing a logger instance during initialization
7
+ # or using a globally accessible logger configured in utils.py or config.py.
8
+ # For simplicity, direct logging calls are removed from models for now.
9
+ # logger = logging.getLogger(__name__) # Example if models needed their own logger
10
+
11
+ ###############################################################################
12
+ # Data Models
13
+ ###############################################################################
14
+
15
+ class Hypothesis:
16
+ def __init__(self, hypothesis_id: str, title: str, text: str):
17
+ self.hypothesis_id = hypothesis_id
18
+ self.title = title
19
+ self.text = text
20
+ self.novelty_review: Optional[str] = None # "HIGH", "MEDIUM", "LOW"
21
+ self.feasibility_review: Optional[str] = None
22
+ self.elo_score: float = 1200.0 # initial Elo score
23
+ self.review_comments: List[str] = []
24
+ self.references: List[str] = []
25
+ self.is_active: bool = True
26
+ self.parent_ids: List[str] = [] # Store IDs of parent hypotheses
27
+
28
+ def to_dict(self) -> dict:
29
+ return {
30
+ "id": self.hypothesis_id,
31
+ "title": self.title,
32
+ "text": self.text,
33
+ "novelty_review": self.novelty_review,
34
+ "feasibility_review": self.feasibility_review,
35
+ "elo_score": self.elo_score,
36
+ "review_comments": self.review_comments,
37
+ "references": self.references,
38
+ "is_active": self.is_active,
39
+ "parent_ids": self.parent_ids, # Include parent IDs
40
+ }
41
+
42
+ class ResearchGoal:
43
+ def __init__(self, description: str, constraints: Dict = None):
44
+ self.description = description
45
+ self.constraints = constraints if constraints else {}
46
+
47
+ class ContextMemory:
48
+ """
49
+ A simple in-memory context storage.
50
+ """
51
+ def __init__(self):
52
+ self.hypotheses: Dict[str, Hypothesis] = {} # key: hypothesis_id
53
+ self.tournament_results: List[Dict] = []
54
+ self.meta_review_feedback: List[Dict] = []
55
+ self.iteration_number: int = 0
56
+
57
+ def add_hypothesis(self, hypothesis: Hypothesis):
58
+ self.hypotheses[hypothesis.hypothesis_id] = hypothesis
59
+ # Consider moving logging out of the model if possible
60
+ # logger.info(f"Added hypothesis {hypothesis.hypothesis_id}")
61
+
62
+ def get_active_hypotheses(self) -> List[Hypothesis]:
63
+ return [h for h in self.hypotheses.values() if h.is_active]
64
+
65
+
66
+ ###############################################################################
67
+ # Pydantic Schemas for API
68
+ ###############################################################################
69
+
70
+ class ResearchGoalRequest(BaseModel):
71
+ description: str
72
+ constraints: Optional[Dict] = {}
73
+
74
+ class HypothesisResponse(BaseModel):
75
+ id: str
76
+ title: str
77
+ text: str
78
+ novelty_review: Optional[str]
79
+ feasibility_review: Optional[str]
80
+ elo_score: float
81
+ review_comments: List[str]
82
+ references: List[str]
83
+ is_active: bool
84
+ # parent_ids: List[str] # Add if needed in API response
85
+
86
+ class OverviewResponse(BaseModel):
87
+ iteration: int
88
+ meta_review_critique: List[str]
89
+ top_hypotheses: List[HypothesisResponse]
90
+ suggested_next_steps: List[str]
app/utils.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+ import os
4
+ import random
5
+ import json
6
+ from typing import List, Dict
7
+ import openai
8
+ from openai import OpenAI
9
+ from sentence_transformers import SentenceTransformer
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ import numpy as np
12
+
13
+ # Import config loading function and config object
14
+ from .config import config, load_config
15
+
16
+ # --- Logging Setup ---
17
+ # Configure a root logger or a specific logger for the app
18
+ # Using a basic configuration here, can be enhanced
19
+ logging.basicConfig(level=config.get("logging_level", logging.INFO),
20
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s")
21
+ logger = logging.getLogger("aicoscientist") # Use a specific name for the app logger
22
+
23
+ # Optional: Add file handler based on config (if needed globally)
24
+ # log_filename_base = config.get('log_file_name', 'app')
25
+ # timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
26
+ # file_handler = logging.FileHandler(f"{log_filename_base}_{timestamp}.txt")
27
+ # formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
28
+ # file_handler.setFormatter(formatter)
29
+ # logger.addHandler(file_handler)
30
+
31
+ # --- LLM Interaction ---
32
+ def call_llm(prompt: str, temperature: float = 0.7) -> str:
33
+ """
34
+ Calls an LLM via the OpenRouter API and returns the response. Handles retries.
35
+ """
36
+ client = OpenAI(
37
+ base_url=config.get("openrouter_base_url"),
38
+ api_key=os.getenv("OPENROUTER_API_KEY"),
39
+ )
40
+ llm_model = config.get("llm_model")
41
+ max_retries = config.get("max_retries", 3)
42
+ initial_delay = config.get("initial_retry_delay", 1)
43
+
44
+ if not llm_model:
45
+ logger.error("LLM model not configured in config.yaml")
46
+ return "Error: LLM model not configured."
47
+ if not client.api_key:
48
+ logger.error("OPENROUTER_API_KEY environment variable not set.")
49
+ return "Error: OpenRouter API key not set."
50
+
51
+ last_error_message = "API call failed after multiple retries." # Default error
52
+
53
+ for attempt in range(max_retries):
54
+ try:
55
+ completion = client.chat.completions.create(
56
+ model=llm_model,
57
+ messages=[{"role": "user", "content": prompt}],
58
+ temperature=temperature,
59
+ )
60
+ if completion.choices and len(completion.choices) > 0:
61
+ return completion.choices[0].message.content or "" # Return empty string if content is None
62
+ else:
63
+ logger.error("No choices in the LLM response: %s", completion)
64
+ last_error_message = f"No choices in the response: {completion}"
65
+ # Continue to retry if possible
66
+
67
+ except Exception as e:
68
+ error_str = str(e)
69
+ if "Rate limit exceeded" in error_str:
70
+ logger.warning(f"Rate limit exceeded (attempt {attempt + 1}/{max_retries}): {e}")
71
+ last_error_message = f"Rate limit exceeded: {e}"
72
+ else:
73
+ logger.error(f"API call failed (attempt {attempt + 1}/{max_retries}): {e}")
74
+ last_error_message = f"API call failed: {e}"
75
+
76
+ if attempt < max_retries - 1:
77
+ wait_time = initial_delay * (2 ** attempt)
78
+ logger.info(f"Retrying in {wait_time} seconds...")
79
+ time.sleep(wait_time)
80
+ else:
81
+ logger.error("Max retries reached. Giving up.")
82
+ break # Exit loop after last attempt
83
+
84
+ return f"Error: {last_error_message}" # Return the last recorded error
85
+
86
+
87
+ # --- ID Generation ---
88
+ def generate_unique_id(prefix="H") -> str:
89
+ """Generates a unique identifier string."""
90
+ return f"{prefix}{random.randint(1000, 9999)}"
91
+
92
+
93
+ # --- VIS.JS Graph Data Generation ---
94
+ def generate_visjs_data(adjacency_graph: Dict) -> Dict[str, str]:
95
+ """Generates node and edge data strings for vis.js graph."""
96
+ nodes = []
97
+ edges = []
98
+
99
+ if not isinstance(adjacency_graph, dict):
100
+ logger.error(f"Invalid adjacency_graph type: {type(adjacency_graph)}. Expected dict.")
101
+ return {"nodes_str": "", "edges_str": ""}
102
+
103
+ for node_id, connections in adjacency_graph.items():
104
+ nodes.append(f"{{id: '{node_id}', label: '{node_id}'}}")
105
+ if isinstance(connections, list):
106
+ for connection in connections:
107
+ if isinstance(connection, dict) and 'similarity' in connection and 'other_id' in connection:
108
+ similarity_val = connection.get('similarity')
109
+ if isinstance(similarity_val, (int, float)) and similarity_val > 0.2:
110
+ edges.append(f"{{from: '{node_id}', to: '{connection['other_id']}', label: '{similarity_val:.2f}', arrows: 'to'}}")
111
+ # Optional: Log skipped edges due to low similarity
112
+ # else:
113
+ # logger.debug(f"Skipping edge from {node_id} to {connection['other_id']} due to low/invalid similarity: {similarity_val}")
114
+ else:
115
+ logger.warning(f"Skipping invalid connection format for node {node_id}: {connection}")
116
+ else:
117
+ logger.warning(f"Skipping invalid connections format for node {node_id}: {connections}")
118
+
119
+ nodes_str = ",\n".join(nodes)
120
+ edges_str = ",\n".join(edges)
121
+
122
+ return {
123
+ "nodes_str": nodes_str,
124
+ "edges_str": edges_str
125
+ }
126
+
127
+ # --- Similarity Calculation ---
128
+ _sentence_transformer_model = None
129
+
130
+ def get_sentence_transformer_model():
131
+ """Loads and returns a singleton instance of the sentence transformer model."""
132
+ global _sentence_transformer_model
133
+ if _sentence_transformer_model is None:
134
+ model_name = config.get('sentence_transformer_model', 'all-MiniLM-L6-v2')
135
+ try:
136
+ logger.info(f"Loading sentence transformer model: {model_name}...")
137
+ _sentence_transformer_model = SentenceTransformer(model_name)
138
+ logger.info("Sentence transformer model loaded successfully.")
139
+ except ImportError:
140
+ logger.error("Failed to import sentence_transformers. Please install it: pip install sentence-transformers")
141
+ raise
142
+ except Exception as e:
143
+ logger.error(f"Failed to load sentence transformer model '{model_name}': {e}")
144
+ raise # Re-raise after logging
145
+ return _sentence_transformer_model
146
+
147
+ def similarity_score(textA: str, textB: str) -> float:
148
+ """Calculates cosine similarity between two texts using sentence embeddings."""
149
+ try:
150
+ if not textA or not textB:
151
+ logger.warning("Empty string provided to similarity_score.")
152
+ return 0.0
153
+
154
+ model = get_sentence_transformer_model()
155
+ if model is None: # Check if model loading failed previously
156
+ return 0.0 # Or handle error appropriately
157
+
158
+ embedding_a = model.encode(textA, convert_to_tensor=True)
159
+ embedding_b = model.encode(textB, convert_to_tensor=True)
160
+
161
+ # Ensure embeddings are 2D numpy arrays for cosine_similarity
162
+ embedding_a_np = embedding_a.cpu().numpy().reshape(1, -1)
163
+ embedding_b_np = embedding_b.cpu().numpy().reshape(1, -1)
164
+
165
+ similarity = cosine_similarity(embedding_a_np, embedding_b_np)[0][0]
166
+
167
+ # Clamp the value between 0.0 and 1.0
168
+ similarity = float(np.clip(similarity, 0.0, 1.0))
169
+
170
+ # logger.debug(f"Similarity score: {similarity:.4f}") # Use debug level
171
+ return similarity
172
+ except Exception as e:
173
+ logger.error(f"Error calculating similarity score: {e}", exc_info=True) # Log traceback
174
+ return 0.0 # Return 0 on error instead of 0.5
main.py DELETED
@@ -1,1174 +0,0 @@
1
- # Generated by o3-mini-high
2
- # https://gist.github.com/chunhualiao/f90c48a0bdac24ba686c25c86150cca8
3
- import math
4
- import random
5
- import logging
6
- from typing import List, Dict, Optional
7
- import openai
8
- from openai import OpenAI
9
- import os
10
- import datetime
11
- from fastapi import FastAPI, HTTPException, responses
12
- from fastapi.staticfiles import StaticFiles
13
- from pydantic import BaseModel
14
- import uvicorn
15
- import yaml
16
-
17
- ################################################################################
18
- # Utility Functions
19
- ################################################################################
20
-
21
- import time
22
-
23
- # Configure logging
24
- def load_config(config_path: str) -> Dict:
25
- """Loads the configuration from the specified YAML file."""
26
- try:
27
- with open(config_path, "r") as f:
28
- config = yaml.safe_load(f)
29
- # Convert logging level string to actual level
30
- config["logging_level"] = getattr(logging, config["logging_level"].upper(), logging.INFO)
31
- return config
32
- except FileNotFoundError:
33
- print(f"Error: Configuration file not found at {config_path}")
34
- exit(1)
35
- except yaml.YAMLError as e:
36
- print(f"Error parsing YAML in {config_path}: {e}")
37
- exit(1)
38
- except AttributeError as e:
39
- print("Error: Invalid logging level in config file")
40
- exit(1)
41
- except KeyError as e:
42
- print(f"Error: Missing key in config file: {e}")
43
- exit(1)
44
-
45
-
46
- def setup_logger(log_filename):
47
- logger = logging.getLogger(log_filename) # Create a logger with the filename
48
- logger.setLevel(config["logging_level"])
49
- formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
50
-
51
- # Remove existing handlers to avoid duplicate logs
52
- for handler in logger.handlers[:]:
53
- logger.removeHandler(handler)
54
-
55
- file_handler = logging.FileHandler(f"{config['log_file_name']}_{log_filename}")
56
- file_handler.setFormatter(formatter)
57
- logger.addHandler(file_handler)
58
- return logger
59
-
60
- # Load configuration at the start
61
- config = load_config("config.yaml")
62
-
63
- def call_llm(prompt: str, temperature: float = 0.7) -> str:
64
- """
65
- Calls an LLM via the OpenRouter API and returns the response.
66
-
67
- Args:
68
- prompt (str): The input prompt for the LLM.
69
- temperature (float, optional): The temperature setting for the LLM. Defaults to 0.7.
70
-
71
- Returns:
72
- str: The LLM's response.
73
-
74
- Args:
75
- prompt (str): The input prompt for the LLM.
76
-
77
- Returns:
78
- str: The LLM's response.
79
- """
80
- client = OpenAI(
81
- base_url=config["openrouter_base_url"],
82
- api_key=os.getenv("OPENROUTER_API_KEY"),
83
- )
84
-
85
- try:
86
- completion = client.chat.completions.create(
87
- model=config["llm_model"],
88
- messages=[{"role": "user", "content": prompt}],
89
- temperature=temperature, # Pass temperature to the API call
90
- )
91
- except Exception as e:
92
- retries = config.get("max_retries", 3)
93
- delay = config.get("initial_retry_delay", 1) # seconds
94
-
95
- if "Rate limit exceeded" in str(e):
96
- logger.warning(f"Rate limit exceeded: {e}")
97
- error_message = f"Rate limit exceeded: {e}"
98
- else:
99
- logger.error(f"API call failed with exception: {e}")
100
- error_message = f"API call failed with exception: {e}"
101
-
102
- for attempt in range(retries):
103
- try:
104
- wait_time = delay * (2 ** attempt) # Exponential backoff
105
- logger.info(f"Retrying in {wait_time} seconds (attempt {attempt + 1}/{retries})")
106
- time.sleep(wait_time)
107
- completion = client.chat.completions.create(
108
- model=config["llm_model"],
109
- messages=[{"role": "user", "content": prompt}],
110
- temperature=temperature, # Pass temperature to the API call
111
- )
112
- if completion.choices and len(completion.choices) > 0:
113
- return completion.choices[0].message.content
114
- except Exception as inner_e:
115
- if "Rate limit exceeded" in str(inner_e):
116
- logger.warning(f"Rate limit exceeded (retry attempt {attempt + 1}): {inner_e}")
117
- error_message = f"Rate limit exceeded: {inner_e}"
118
- else:
119
- logger.error(f"API call failed with exception (retry attempt {attempt + 1}): {inner_e}")
120
- error_message = f"API call failed with exception: {inner_e}"
121
-
122
- if attempt == retries - 1:
123
- logger.error("Max retries reached. Giving up.")
124
- return f"API call failed after multiple retries. Error: {error_message}" # Detailed error
125
-
126
- logger.error("Max retries reached without a successful response.")
127
- return f"API call failed after multiple retries. Error: {error_message}" # Detailed error
128
-
129
- # If no exception, you can safely access attributes
130
- if completion.choices and len(completion.choices) > 0:
131
- return completion.choices[0].message.content
132
- else:
133
- logger.error("No choices in the response: %s", completion)
134
- return f"No choices in the response: {completion}"
135
-
136
-
137
-
138
- ###############################################################################
139
- # Data Models and Pydantic Schemas
140
- ###############################################################################
141
-
142
- class Hypothesis:
143
- def __init__(self, hypothesis_id: str, title: str, text: str):
144
- self.hypothesis_id = hypothesis_id
145
- self.title = title
146
- self.text = text
147
- self.novelty_review: Optional[str] = None # "HIGH", "MEDIUM", "LOW"
148
- self.feasibility_review: Optional[str] = None
149
- self.elo_score: float = 1200.0 # initial Elo score
150
- self.review_comments: List[str] = []
151
- self.references: List[str] = []
152
- self.is_active: bool = True
153
- self.parent_ids: List[str] = [] # Store IDs of parent hypotheses
154
-
155
- def to_dict(self) -> dict:
156
- return {
157
- "id": self.hypothesis_id,
158
- "title": self.title,
159
- "text": self.text,
160
- "novelty_review": self.novelty_review,
161
- "feasibility_review": self.feasibility_review,
162
- "elo_score": self.elo_score,
163
- "review_comments": self.review_comments,
164
- "references": self.references,
165
- "is_active": self.is_active,
166
- "parent_ids": self.parent_ids, # Include parent IDs
167
- }
168
-
169
- class ResearchGoal:
170
- def __init__(self, description: str, constraints: Dict = None):
171
- self.description = description
172
- self.constraints = constraints if constraints else {}
173
-
174
- class ContextMemory:
175
- """
176
- A simple in-memory context storage.
177
- """
178
- def __init__(self):
179
- self.hypotheses: Dict[str, Hypothesis] = {} # key: hypothesis_id
180
- self.tournament_results: List[Dict] = []
181
- self.meta_review_feedback: List[Dict] = []
182
- self.iteration_number: int = 0
183
-
184
- def add_hypothesis(self, hypothesis: Hypothesis):
185
- self.hypotheses[hypothesis.hypothesis_id] = hypothesis
186
- logger.info(f"Added hypothesis {hypothesis.hypothesis_id}")
187
-
188
- def get_active_hypotheses(self) -> List[Hypothesis]:
189
- return [h for h in self.hypotheses.values() if h.is_active]
190
-
191
-
192
- # Pydantic schemas for API endpoints.
193
- class ResearchGoalRequest(BaseModel):
194
- description: str
195
- constraints: Optional[Dict] = {}
196
-
197
- class HypothesisResponse(BaseModel):
198
- id: str
199
- title: str
200
- text: str
201
- novelty_review: Optional[str]
202
- feasibility_review: Optional[str]
203
- elo_score: float
204
- review_comments: List[str]
205
- references: List[str]
206
- is_active: bool
207
-
208
- class OverviewResponse(BaseModel):
209
- iteration: int
210
- meta_review_critique: List[str]
211
- top_hypotheses: List[HypothesisResponse]
212
- suggested_next_steps: List[str]
213
-
214
-
215
- ###############################################################################
216
- # Utility Functions (Placeholders for LLM Calls and Similarity Measures)
217
- ###############################################################################
218
-
219
- def generate_unique_id(prefix="H") -> str:
220
- """
221
- Generates a unique identifier string.
222
-
223
- Args:
224
- prefix (str, optional): A prefix for the ID. Defaults to "H".
225
-
226
- Returns:
227
- str: A unique identifier string consisting of the prefix and a random 4-digit number.
228
- """
229
- return f"{prefix}{random.randint(1000, 9999)}"
230
-
231
- import json
232
-
233
- # --- VIS.JS INTEGRATION ---
234
- def generate_visjs_data(adjacency_graph: Dict) -> Dict[str, str]:
235
- """
236
- Generates node and edge data strings for vis.js graph.
237
-
238
- Args:
239
- adjacency_graph (Dict): The adjacency graph data.
240
-
241
- Returns:
242
- Dict[str, str]: A dictionary containing 'nodes_str' and 'edges_str'.
243
- """
244
- nodes = []
245
- edges = []
246
-
247
- # Check if adjacency_graph is a dictionary
248
- if not isinstance(adjacency_graph, dict):
249
- logger.error(f"Invalid adjacency_graph type: {type(adjacency_graph)}. Expected dict.")
250
- return {"nodes_str": "", "edges_str": ""}
251
-
252
- for node_id, connections in adjacency_graph.items():
253
- # Ensure node_id is treated as a string for JS
254
- nodes.append(f"{{id: '{node_id}', label: '{node_id}'}}")
255
- # Check if connections is a list
256
- if isinstance(connections, list):
257
- for connection in connections:
258
- # Check if connection is a dictionary and has 'similarity'
259
- if isinstance(connection, dict) and 'similarity' in connection:
260
- # Ensure similarity is checked correctly
261
- if isinstance(connection.get('similarity'), (int, float)) and connection['similarity'] > 0.2:
262
- # Ensure 'from' and 'to' are strings for JS and 'other_id' exists
263
- if 'other_id' in connection:
264
- edges.append(f"{{from: '{node_id}', to: '{connection['other_id']}', label: '{connection['similarity']:.2f}', arrows: 'to'}}")
265
- else:
266
- logger.warning(f"Skipping edge from {node_id} due to missing 'other_id' in connection: {connection}")
267
- # Log skipped edges due to low similarity or non-numeric similarity
268
- elif not (isinstance(connection.get('similarity'), (int, float)) and connection['similarity'] > 0.2):
269
- logger.debug(f"Skipping edge from {node_id} to {connection.get('other_id', 'N/A')} due to low/invalid similarity: {connection.get('similarity', 'N/A')}")
270
- else:
271
- logger.warning(f"Skipping invalid connection format for node {node_id}: {connection}")
272
- else:
273
- logger.warning(f"Skipping invalid connections format for node {node_id}: {connections}")
274
-
275
-
276
- nodes_str = ",\n".join(nodes)
277
- edges_str = ",\n".join(edges)
278
-
279
- return {
280
- "nodes_str": nodes_str,
281
- "edges_str": edges_str
282
- }
283
-
284
-
285
- def call_llm_for_generation(prompt: str, num_hypotheses: int = 3) -> List[Dict]:
286
- """
287
- Calls a Large Language Model (LLM) for generating hypotheses.
288
-
289
- Args:
290
- prompt (str): The input prompt for the LLM.
291
- num_hypotheses (int, optional): The number of hypotheses to generate. Defaults to 3.
292
-
293
- Returns:
294
- List[Dict]: A list of dictionaries, each representing a generated hypothesis.
295
- Each dictionary contains "title" and "text" keys.
296
- """
297
- logger.info("LLM generation called with prompt: %s, num_hypotheses: %d", prompt, num_hypotheses)
298
-
299
- # Modify the prompt to request JSON output
300
- prompt += "\n\nPlease return the response as a JSON array of objects, where each object has a 'title' and 'text' key."
301
-
302
- # Call LLM with the appropriate temperature
303
- response = call_llm(prompt, temperature=config["step_temperatures"]["generation"])
304
- logger.info("LLM response: %s", response)
305
-
306
- if "API call failed" in response:
307
- # If the call failed, log it and return the error message
308
- logger.error(f"LLM call failed: {response}")
309
- return [{"title": "Error", "text": response}] # Return error as a hypothesis
310
-
311
- try:
312
- # Remove potential Markdown code block formatting
313
- response = response.strip()
314
- if response.startswith("```json"):
315
- response = response[7:]
316
- if response.endswith("```"):
317
- response = response[:-3]
318
- response = response.strip()
319
-
320
- # Attempt to parse the response as JSON
321
- hypotheses = json.loads(response)
322
- logger.info("Parsed hypotheses: %s", hypotheses)
323
-
324
- # Basic validation: Check if the response is a list and each item has 'title' and 'text'
325
- if not isinstance(hypotheses, list) or not all(isinstance(h, dict) and "title" in h and "text" in h for h in hypotheses):
326
- error_message = "Invalid JSON format: Expected a list of objects with 'title' and 'text' keys."
327
- raise ValueError(error_message)
328
- except (json.JSONDecodeError, ValueError) as e:
329
- logger.error("Could not parse LLM response as JSON: %s", response)
330
- logger.error(f"Error: {e}")
331
- return [{"title": "Error", "text": f"Could not parse LLM response: {e}"}] # Return error as a hypothesis
332
-
333
- return hypotheses
334
-
335
- def call_llm_for_reflection(hypothesis_text: str) -> Dict:
336
- """
337
- Calls a Large Language Model (LLM) for reviewing a hypothesis.
338
-
339
- Args:
340
- hypothesis_text (str): The text of the hypothesis to be reviewed.
341
-
342
- Returns:
343
- Dict: A dictionary containing the review results, including novelty and feasibility
344
- assessments (HIGH, MEDIUM, or LOW), a comment, and a list of references.
345
- """
346
- prompt = (
347
- f"Review the following hypothesis and provide a novelty assessment (HIGH, MEDIUM, or LOW), "
348
- f"a feasibility assessment (HIGH, MEDIUM, or LOW), a comment, and a list of references (PMIDs) in JSON format:\n\n"
349
- f"Hypothesis: {hypothesis_text}\n\n"
350
- f"Return the response as a JSON object with the following keys: 'novelty_review', 'feasibility_review', 'comment', 'references'."
351
-
352
- )
353
- # Call LLM with the appropriate temperature
354
- response = call_llm(prompt, temperature=config["step_temperatures"]["reflection"])
355
- logger.info("LLM reflection for hypothesis: %s, response: %s", hypothesis_text, response)
356
-
357
- if "API call failed" in response:
358
- # If the call failed, log it and return the error message
359
- logger.error(f"LLM call failed: {response}")
360
- return {
361
- "novelty_review": "ERROR",
362
- "feasibility_review": "ERROR",
363
- "comment": response, # Return the error message
364
- "references": [],
365
- }
366
-
367
- # Initialize default values
368
- novelty_review = "MEDIUM"
369
- feasibility_review = "MEDIUM"
370
- comment = "Could not parse LLM response."
371
- references = []
372
-
373
- try:
374
- # Remove potential Markdown code block formatting
375
- response = response.strip()
376
- if response.startswith("```json"):
377
- response = response[7:]
378
- if response.endswith("```"):
379
- response = response[:-3]
380
- response = response.strip()
381
-
382
- # Parse the JSON response
383
- data = json.loads(response)
384
- novelty_review = data.get("novelty_review", "MEDIUM")
385
- feasibility_review = data.get("feasibility_review", "MEDIUM")
386
- comment = data.get("comment", "Could not parse LLM response.")
387
- references = data.get("references", [])
388
-
389
- # Basic validation of review values
390
- if not any(level in novelty_review.upper() for level in ["HIGH", "MEDIUM", "LOW"]):
391
- logger.warning("Invalid novelty review value: %s", novelty_review)
392
- novelty_review = "MEDIUM"
393
- if not any(level in feasibility_review.upper() for level in ["HIGH", "MEDIUM", "LOW"]):
394
- logger.warning("Invalid feasibility review value: %s", feasibility_review)
395
- feasibility_review = "MEDIUM"
396
- if not isinstance(comment, str):
397
- logger.warning("Invalid comment value: %s", comment)
398
- comment = "Could not parse LLM response."
399
-
400
- except (json.JSONDecodeError, AttributeError, KeyError) as e:
401
- logger.warning("Error parsing LLM response: %s", e)
402
- logger.warning("Response: %s", response)
403
- comment = f"Could not parse LLM response: {e}"
404
-
405
- return {
406
- "novelty_review": novelty_review,
407
- "feasibility_review": feasibility_review,
408
- "comment": comment,
409
- "references": references,
410
- }
411
-
412
- def run_pairwise_debate(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
413
- """
414
- Compares two hypotheses based on their novelty and feasibility review scores.
415
-
416
- Args:
417
- hypoA (Hypothesis): The first hypothesis.
418
- hypoB (Hypothesis): The second hypothesis.
419
-
420
- Returns:
421
- Hypothesis: The winning hypothesis. If scores are tied, a winner is chosen randomly.
422
- """
423
- def score(h: Hypothesis) -> int:
424
- """
425
- Calculates a numerical score for a hypothesis based on its novelty and feasibility reviews.
426
-
427
- Args:
428
- h (Hypothesis): The hypothesis to score.
429
-
430
- Returns:
431
- int: The calculated score. HIGH=3, MEDIUM=2, LOW=1, None=0. The score is the sum of
432
- the novelty and feasibility scores.
433
- """
434
- mapping = {"HIGH": 3, "MEDIUM": 2, "LOW": 1, None: 0}
435
- score_novelty = 0
436
- if isinstance(h.novelty_review, str):
437
- score_novelty = mapping.get(h.novelty_review, 0)
438
- else:
439
- logger.error(f"Invalid novelty_review type: {type(h.novelty_review)}, value: {h.novelty_review}")
440
-
441
- score_feasibility = 0
442
- if isinstance(h.feasibility_review, str):
443
- score_feasibility = mapping.get(h.feasibility_review, 0)
444
- else:
445
- logger.error(f"Invalid feasibility_review type: {type(h.feasibility_review)}, value: {h.feasibility_review}")
446
-
447
- return score_novelty + score_feasibility
448
- scoreA = score(hypoA)
449
- scoreB = score(hypoB)
450
- winner = hypoA if scoreA > scoreB else hypoB if scoreB > scoreA else random.choice([hypoA, hypoB])
451
- logger.info("Debate: %s (score %d) vs %s (score %d) => Winner: %s",
452
- hypoA.hypothesis_id, scoreA, hypoB.hypothesis_id, scoreB, winner.hypothesis_id)
453
- return winner
454
-
455
- def update_elo(winner: Hypothesis, loser: Hypothesis, k_factor: int = config["elo_k_factor"]):
456
- """
457
- Updates the Elo scores of two hypotheses after a pairwise comparison.
458
-
459
- Args:
460
- winner (Hypothesis): The winning hypothesis.
461
- loser (Hypothesis): The losing hypothesis.
462
- k_factor (int, optional): The K-factor used in the Elo calculation. Defaults to 32.
463
-
464
- Returns:
465
- None
466
- """
467
- ratingA = winner.elo_score
468
- ratingB = loser.elo_score
469
- expectedA = 1 / (1 + math.pow(10, (ratingB - ratingA) / 400))
470
- expectedB = 1 - expectedA
471
- winner.elo_score = ratingA + k_factor * (1 - expectedA)
472
- loser.elo_score = ratingB + k_factor * (0 - expectedB)
473
- logger.info("Updated Elo: Winner %s -> %.2f, Loser %s -> %.2f",
474
- winner.hypothesis_id, winner.elo_score, loser.hypothesis_id, loser.elo_score)
475
-
476
- def combine_hypotheses(hypoA: Hypothesis, hypoB: Hypothesis) -> Hypothesis:
477
- """
478
- Combines two hypotheses into a new, evolved hypothesis.
479
-
480
- Args:
481
- hypoA (Hypothesis): The first hypothesis.
482
- hypoB (Hypothesis): The second hypothesis.
483
-
484
- Returns:
485
- Hypothesis: A new hypothesis combining the two input hypotheses. The new ID is prefixed with "E".
486
- """
487
- new_id = generate_unique_id("E")
488
- combined_title = f"Combined: {hypoA.title} & {hypoB.title}"
489
- combined_text = f"{hypoA.text}\n\nAdditionally, {hypoB.text}"
490
- logger.info("Combined hypotheses %s and %s into %s", hypoA.hypothesis_id, hypoB.hypothesis_id, new_id)
491
- new_hypothesis = Hypothesis(new_id, combined_title, combined_text)
492
- new_hypothesis.parent_ids = [hypoA.hypothesis_id, hypoB.hypothesis_id] # Store parent IDs
493
- logger.info("New hypothesis parent_ids: %s", new_hypothesis.parent_ids) # Added logging
494
- return new_hypothesis
495
-
496
- # Global variable to store the sentence transformer model
497
- _sentence_transformer_model = None
498
-
499
- def get_sentence_transformer_model():
500
- """
501
- Returns a singleton instance of the sentence transformer model.
502
- Loads the model only once to improve performance.
503
-
504
- Returns:
505
- SentenceTransformer: The sentence transformer model.
506
- """
507
- global _sentence_transformer_model
508
- if _sentence_transformer_model is None:
509
- try:
510
- from sentence_transformers import SentenceTransformer
511
- logger.info("Loading sentence transformer model...")
512
- # Using a smaller model for efficiency, can be replaced with larger models for better accuracy
513
- _sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
514
- logger.info("Sentence transformer model loaded successfully")
515
- except ImportError as e:
516
- logger.error(f"Failed to import sentence_transformers: {e}")
517
- raise
518
- except Exception as e:
519
- logger.error(f"Failed to load sentence transformer model: {e}")
520
- raise
521
- return _sentence_transformer_model
522
-
523
- def similarity_score(textA: str, textB: str) -> float:
524
- """
525
- Calculates a similarity score between two text strings using sentence embeddings
526
- and cosine similarity.
527
-
528
- Args:
529
- textA (str): The first text string.
530
- textB (str): The second text string.
531
-
532
- Returns:
533
- float: A similarity score between 0 and 1 (inclusive), where 1 indicates
534
- identical semantic meaning and 0 indicates completely different meanings.
535
- """
536
- try:
537
- # Handle empty strings
538
- if not textA.strip() or not textB.strip():
539
- logger.warning("Empty string provided to similarity_score")
540
- return 0.0
541
-
542
- # Get the model
543
- model = get_sentence_transformer_model()
544
-
545
- # Generate embeddings
546
- embedding_a = model.encode(textA, convert_to_tensor=True)
547
- embedding_b = model.encode(textB, convert_to_tensor=True)
548
-
549
- # Calculate cosine similarity
550
- from sklearn.metrics.pairwise import cosine_similarity
551
- import numpy as np
552
-
553
- # Convert to numpy arrays if they're tensors
554
- if hasattr(embedding_a, 'cpu') and hasattr(embedding_b, 'cpu'):
555
- embedding_a = embedding_a.cpu().numpy().reshape(1, -1)
556
- embedding_b = embedding_b.cpu().numpy().reshape(1, -1)
557
-
558
- similarity = cosine_similarity(embedding_a, embedding_b)[0][0]
559
-
560
- # Ensure the result is between 0 and 1
561
- similarity = float(max(0.0, min(1.0, similarity)))
562
-
563
- logger.info(f"Similarity score between texts: {similarity:.4f}")
564
- return similarity
565
- except Exception as e:
566
- logger.error(f"Error calculating similarity score: {e}")
567
- # Fallback to a default value in case of error
568
- return 0.5
569
-
570
-
571
- ###############################################################################
572
- # Agent Implementations
573
- ###############################################################################
574
-
575
- class GenerationAgent:
576
- def generate_new_hypotheses(self, research_goal: ResearchGoal, context: ContextMemory) -> List[Hypothesis]:
577
- """
578
- Generates new hypotheses based on the given research goal and context.
579
-
580
- Args:
581
- research_goal (ResearchGoal): The research goal.
582
- context (ContextMemory): The current context memory.
583
-
584
- Returns:
585
- List[Hypothesis]: A list of newly generated hypotheses.
586
- """
587
- prompt = (
588
- f"Research Goal: {research_goal.description}\n"
589
- f"Constraints: {research_goal.constraints}\n"
590
- f"Please propose {config['num_hypotheses']} new hypotheses with rationale.\n"
591
- )
592
- raw_output = call_llm_for_generation(prompt, num_hypotheses=config["num_hypotheses"])
593
- new_hypos = []
594
- for idea in raw_output:
595
- hypo_id = generate_unique_id("G")
596
- h = Hypothesis(hypo_id, idea["title"], idea["text"])
597
- logger.info("Generated hypothesis: %s", h.to_dict())
598
- new_hypos.append(h)
599
- return new_hypos
600
-
601
- class ReflectionAgent:
602
- def review_hypotheses(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
603
- """
604
- Reviews a list of hypotheses, updating their novelty, feasibility, comments, and references.
605
-
606
- Args:
607
- hypotheses (List[Hypothesis]): The list of hypotheses to review.
608
- context (ContextMemory): The current context memory.
609
-
610
- Returns:
611
- None
612
- """
613
- for h in hypotheses:
614
- result = call_llm_for_reflection(h.text)
615
- h.novelty_review = result["novelty_review"]
616
- h.feasibility_review = result["feasibility_review"]
617
- h.review_comments.append(result["comment"])
618
- h.references.extend(result["references"])
619
- logger.info("Reviewed hypothesis: %s, Novelty: %s, Feasibility: %s", h.hypothesis_id, h.novelty_review, h.feasibility_review)
620
-
621
- class RankingAgent:
622
- def run_tournament(self, hypotheses: List[Hypothesis], context: ContextMemory) -> None:
623
- """
624
- Runs a tournament among the given hypotheses, updating their Elo scores and recording results.
625
-
626
- Args:
627
- hypotheses (List[Hypothesis]): The list of hypotheses to participate in the tournament.
628
- context (ContextMemory): The current context memory.
629
-
630
- Returns:
631
- None
632
- """
633
- random.shuffle(hypotheses)
634
- pairs = []
635
- for i in range(len(hypotheses)):
636
- for j in range(i + 1, len(hypotheses)):
637
- pairs.append((hypotheses[i], hypotheses[j]))
638
- for hA, hB in pairs:
639
- if hA.is_active and hB.is_active:
640
- winner = run_pairwise_debate(hA, hB)
641
- loser = hB if winner == hA else hA
642
- update_elo(winner, loser)
643
- logger.info("Ran pairwise debate between %s and %s. Winner: %s", hA.hypothesis_id, hB.hypothesis_id, winner.hypothesis_id)
644
- context.tournament_results.append({
645
- "winner": winner.hypothesis_id,
646
- "loser": loser.hypothesis_id,
647
- "winner_score": winner.elo_score,
648
- "loser_score": loser.elo_score
649
- })
650
-
651
- class EvolutionAgent:
652
- def evolve_hypotheses(self, top_k: int, context: ContextMemory) -> List[Hypothesis]:
653
- """
654
- Evolves hypotheses by combining the top-k hypotheses based on Elo score.
655
-
656
- Args:
657
- top_k (int): The number of top hypotheses to consider for evolution.
658
- context (ContextMemory): The current context memory.
659
-
660
- Returns:
661
- List[Hypothesis]: A list of new, evolved hypotheses. Currently, at most one
662
- new hypothesis is generated by combining the top two.
663
- """
664
- active = context.get_active_hypotheses()
665
- sorted_by_elo = sorted(active, key=lambda h: h.elo_score, reverse=True)
666
- top_candidates = sorted_by_elo[:config["top_k_hypotheses"]]
667
- new_hypotheses = []
668
- if len(top_candidates) >= 2:
669
- new_h = combine_hypotheses(top_candidates[0], top_candidates[1])
670
- logger.info("Evolved hypothesis: %s", new_h.to_dict())
671
- logger.info("top_candidates: %s", [h.to_dict() for h in top_candidates]) # Added logging
672
- new_hypotheses.append(new_h)
673
- return new_hypotheses
674
-
675
- class ProximityAgent:
676
- def build_proximity_graph(self, hypotheses: List[Hypothesis], context: ContextMemory) -> Dict:
677
- """
678
- Builds a proximity graph representing the similarity between hypotheses.
679
-
680
- Args:
681
- hypotheses (List[Hypothesis]): The list of hypotheses.
682
- context (ContextMemory): The current context memory.
683
-
684
- Returns:
685
- Dict: A dictionary containing:
686
- - "adjacency_graph": An adjacency list representing the proximity graph.
687
- - "nodes_str": JavaScript string for vis.js nodes.
688
- - "edges_str": JavaScript string for vis.js edges.
689
- """
690
- adjacency = {}
691
- # Ensure we only process active hypotheses if needed, or all as currently done
692
- active_hypotheses = context.get_active_hypotheses() # Use context to get active ones
693
-
694
- for i in range(len(active_hypotheses)):
695
- hypo_i = active_hypotheses[i]
696
- adjacency[hypo_i.hypothesis_id] = []
697
- for j in range(len(active_hypotheses)):
698
- if i == j:
699
- continue
700
- hypo_j = active_hypotheses[j]
701
- # Ensure text is not empty before calculating similarity
702
- if hypo_i.text and hypo_j.text:
703
- sim = similarity_score(hypo_i.text, hypo_j.text)
704
- adjacency[hypo_i.hypothesis_id].append({
705
- "other_id": hypo_j.hypothesis_id,
706
- "similarity": sim
707
- })
708
- else:
709
- logger.warning(f"Skipping similarity for hypothesis {hypo_i.hypothesis_id} or {hypo_j.hypothesis_id} due to empty text.")
710
-
711
-
712
- # Generate the data strings for the graph visualization
713
- visjs_data = generate_visjs_data(adjacency)
714
-
715
- logger.info("Built proximity graph adjacency: %s", adjacency)
716
- return {
717
- "adjacency_graph": adjacency,
718
- "nodes_str": visjs_data["nodes_str"],
719
- "edges_str": visjs_data["edges_str"]
720
- }
721
-
722
- class MetaReviewAgent:
723
- def summarize_and_feedback(self, context: ContextMemory, adjacency: Dict) -> Dict:
724
- """
725
- Summarizes the current state of research and provides feedback.
726
-
727
- Args:
728
- context (ContextMemory): The current context memory.
729
- adjacency (Dict): The proximity graph of hypotheses.
730
-
731
- Returns:
732
- Dict: A dictionary containing a meta-review critique, a research overview
733
- (including top-ranked hypotheses and suggested next steps).
734
- """
735
- reflection_comments = []
736
- for h in context.get_active_hypotheses():
737
- reflection_comments.extend(h.review_comments)
738
- comment_summary = set()
739
- for c in reflection_comments:
740
- if "novelty=LOW" in c:
741
- comment_summary.add("Some ideas are not very novel.")
742
- if "feasibility=LOW" in c:
743
- comment_summary.add("Some ideas may be infeasible.")
744
- best_hypotheses = sorted(context.get_active_hypotheses(), key=lambda h: h.elo_score, reverse=True)[:3]
745
- logger.info("Top hypotheses: %s", [h.to_dict() for h in best_hypotheses])
746
-
747
- overview = {
748
- "meta_review_critique": list(comment_summary),
749
- "research_overview": {
750
- "top_ranked_hypotheses": [h.to_dict() for h in best_hypotheses],
751
- "suggested_next_steps": [
752
- "Conduct further in experiments on top hypotheses.",
753
- "Collect domain expert feedback and refine constraints."
754
- ]
755
- }
756
- }
757
- context.meta_review_feedback.append(overview)
758
- logger.info("Meta-review and feedback: %s", overview)
759
- return overview
760
-
761
- class SupervisorAgent:
762
- def __init__(self):
763
- self.generation_agent = GenerationAgent()
764
- self.reflection_agent = ReflectionAgent()
765
- self.ranking_agent = RankingAgent()
766
- self.evolution_agent = EvolutionAgent()
767
- self.proximity_agent = ProximityAgent()
768
- self.meta_review_agent = MetaReviewAgent()
769
-
770
- def run_cycle(self, research_goal: ResearchGoal, context: ContextMemory) -> Dict:
771
- """
772
- Runs a single cycle of the hypothesis generation, review, ranking, and evolution process.
773
-
774
- Args:
775
- research_goal (ResearchGoal): The research goal.
776
- context (ContextMemory): The current context memory.
777
-
778
- Returns:
779
- Dict: A dictionary containing detailed information about each step of the cycle.
780
- """
781
- logger.info("Starting a new cycle, iteration %d", context.iteration_number + 1)
782
-
783
- # Initialize a dictionary to store cycle details
784
- cycle_details = {
785
- "iteration": context.iteration_number + 1,
786
- "steps": {},
787
- "meta_review": {}
788
- }
789
-
790
- # 1. Generation
791
- new_hypotheses = self.generation_agent.generate_new_hypotheses(research_goal, context)
792
- for nh in new_hypotheses:
793
- context.add_hypothesis(nh)
794
- cycle_details["steps"]["generation"] = {
795
- "hypotheses": [h.to_dict() for h in new_hypotheses]
796
- }
797
-
798
- # 2. Reflection
799
- active_hypos = context.get_active_hypotheses()
800
- self.reflection_agent.review_hypotheses(active_hypos, context)
801
- cycle_details["steps"]["reflection"] = {
802
- "hypotheses": [h.to_dict() for h in active_hypos]
803
- }
804
-
805
- # 3. Ranking (Tournament)
806
- active_hypos = context.get_active_hypotheses()
807
- self.ranking_agent.run_tournament(active_hypos, context)
808
- cycle_details["steps"]["ranking1"] = {
809
- "tournament_results": context.tournament_results,
810
- "hypotheses": [h.to_dict() for h in active_hypos]
811
- }
812
-
813
- # 4. Evolution (Improve top ideas)
814
- new_evolved = self.evolution_agent.evolve_hypotheses(top_k=2, context=context)
815
- for nh in new_evolved:
816
- context.add_hypothesis(nh)
817
- if new_evolved:
818
- self.reflection_agent.review_hypotheses(new_evolved, context)
819
- cycle_details["steps"]["evolution"] = {
820
- "hypotheses": [h.to_dict() for h in new_evolved]
821
- }
822
-
823
- # 5. Ranking again
824
- active_hypos = context.get_active_hypotheses()
825
- self.ranking_agent.run_tournament(active_hypos, context)
826
- cycle_details["steps"]["ranking2"] = {
827
- "tournament_results": context.tournament_results,
828
- "hypotheses": [h.to_dict() for h in active_hypos]
829
-
830
- }
831
-
832
- # 6. Proximity Analysis
833
- # Pass active_hypos directly, ProximityAgent now gets active ones from context
834
- proximity_result = self.proximity_agent.build_proximity_graph(active_hypos, context)
835
- cycle_details["steps"]["proximity"] = {
836
- "adjacency_graph": proximity_result["adjacency_graph"],
837
- "nodes_str": proximity_result["nodes_str"],
838
- "edges_str": proximity_result["edges_str"]
839
- }
840
-
841
- # 7. Meta-review
842
- overview = self.meta_review_agent.summarize_and_feedback(context, proximity_result["adjacency_graph"])
843
- cycle_details["meta_review"] = overview
844
- context.iteration_number += 1
845
-
846
- logger.info("Cycle complete, iteration now %d", context.iteration_number)
847
- return cycle_details
848
-
849
- ###############################################################################
850
- # FastAPI Application
851
- ###############################################################################
852
-
853
- app = FastAPI(title="AI Co-Scientist System", version="1.0")
854
-
855
- # Global context and supervisor (in production, consider persistent storage)
856
- global_context = ContextMemory()
857
- supervisor = SupervisorAgent()
858
- current_research_goal: Optional[ResearchGoal] = None
859
-
860
- app.mount("/static", StaticFiles(directory="static"), name="static")
861
-
862
- @app.post("/research_goal", response_model=dict)
863
- def set_research_goal(goal: ResearchGoalRequest):
864
- """
865
- Sets the research goal for the AI Co-Scientist.
866
-
867
- Args:
868
- goal (ResearchGoalRequest): The research goal, including a description and optional constraints.
869
-
870
- Returns:
871
- dict: A confirmation message.
872
- """
873
- global current_research_goal, global_context, logger
874
- current_research_goal = ResearchGoal(goal.description, goal.constraints)
875
- # Reset context for new research goal
876
- global_context = ContextMemory()
877
-
878
- # Create a new logger for this submission
879
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
880
- log_filename = f"log_{timestamp}.txt"
881
- logger = setup_logger(log_filename)
882
-
883
- logger.info("Research goal set: %s", goal.description)
884
- return {"message": "Research goal successfully set. Please wait for results. This may take a few minutes. Please be patient."}
885
-
886
- @app.post("/run_cycle")
887
- def run_cycle():
888
- """
889
- Runs a single cycle of hypothesis generation, review, ranking, and evolution.
890
-
891
- Raises:
892
- HTTPException: If no research goal has been set.
893
-
894
- Returns:
895
- Dict: A dictionary containing detailed information about each step of the cycle.
896
- """
897
- global current_research_goal, global_context
898
- if not current_research_goal:
899
- raise HTTPException(status_code=400, detail="No research goal set.")
900
- cycle_details = supervisor.run_cycle(current_research_goal, global_context)
901
- logger.info("Run cycle complete. Overview: %s", cycle_details)
902
- return cycle_details
903
-
904
- @app.get("/hypotheses", response_model=List[HypothesisResponse])
905
- def list_hypotheses():
906
- """
907
- Retrieves a list of all currently active hypotheses.
908
-
909
- Returns:
910
- List[HypothesisResponse]: A list of active hypotheses, each including its ID, title, text,
911
- novelty/feasibility reviews, Elo score, comments, references,
912
- and active status.
913
- """
914
- global global_context
915
- return [HypothesisResponse(**h.to_dict()) for h in global_context.get_active_hypotheses()]
916
-
917
- @app.get("/")
918
- async def root():
919
- """
920
- Root endpoint for the API. Returns an HTML page with a form to input the research goal.
921
- """
922
- return responses.HTMLResponse(content="""
923
- <!DOCTYPE html>
924
- <html>
925
- <head>
926
- <title>AI Co-Scientist</title>
927
- <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
928
- <style>
929
- #mynetwork {
930
- width: 100%;
931
- height: 500px; /* Explicit height for the graph container */
932
- border: 1px solid lightgray;
933
- margin-bottom: 20px; /* Add some space below the graph */
934
- }
935
- .graph-container p { /* Style the explanation text */
936
- margin-top: 5px;
937
- font-size: 0.9em;
938
- color: #555;
939
- }
940
- </style>
941
- </head>
942
- <body>
943
- <h1>Welcome to the AI Co-Scientist System</h1>
944
- <p>Set your research goal and run cycles to generate hypotheses.</p>
945
-
946
- <label for="researchGoal">Research Goal:</label><br>
947
- <textarea id="researchGoal" name="researchGoal" rows="4" cols="50"></textarea><br><br>
948
- <button onclick="submitResearchGoal()">Submit Research Goal</button>
949
-
950
- <h2>Results</h2>
951
- <div id="results"></div>
952
-
953
- <h2>Errors</h2>
954
- <div id="errors" style="color: red;"></div>
955
-
956
- <script>
957
- async function submitResearchGoal() {
958
- const researchGoal = document.getElementById('researchGoal').value;
959
- const response = await fetch('/research_goal', {
960
- method: 'POST',
961
- headers: {
962
- 'Content-Type': 'application/json'
963
- },
964
- body: JSON.stringify({ description: researchGoal })
965
- });
966
-
967
- // Clear previous errors
968
- document.getElementById('errors').innerHTML = '';
969
-
970
- if (!response.ok) {
971
- const errorData = await response.json();
972
- document.getElementById('errors').innerHTML = `<p>Error: ${errorData.detail}</p>`;
973
- return; // Stop execution if there's an error
974
- }
975
-
976
- const data = await response.json();
977
- document.getElementById('results').innerHTML = `<p>${data.message}</p>`;
978
- runCycle(); // Automatically run a cycle after setting the goal
979
- }
980
-
981
- async function runCycle() {
982
- const response = await fetch('/run_cycle', { method: 'POST' });
983
-
984
- // Clear previous errors
985
- document.getElementById('errors').innerHTML = '';
986
-
987
- if (!response.ok) {
988
- const errorData = await response.json();
989
- document.getElementById('errors').innerHTML = `<p>Error: ${errorData.detail}</p>`;
990
- return; // Stop execution if there's an error
991
- }
992
-
993
- const data = await response.json();
994
-
995
- let resultsHTML = `<h3>Iteration: ${data.iteration}</h3>`;
996
-
997
- // Define step explanations
998
- const stepExplanations = {
999
- generation: "Generates new hypotheses based on the research goal and current context.",
1000
- reflection: "Reviews the generated hypotheses for novelty and feasibility.",
1001
- ranking1: "Ranks hypotheses based on a pairwise comparison (tournament).",
1002
- evolution: "Combines the top-ranked hypotheses to create new, evolved hypotheses.",
1003
- ranking2: "Ranks hypotheses again after the evolution step.",
1004
- proximity: "Analyzes the similarity between hypotheses.",
1005
- };
1006
-
1007
- // Display details for each step
1008
- for (const stepName in data.steps) {
1009
- if (data.steps.hasOwnProperty(stepName)) {
1010
- const step = data.steps[stepName];
1011
- resultsHTML += `<h4>Step: ${stepName}</h4>`;
1012
-
1013
- // Add explanation if available
1014
- if (stepExplanations[stepName]) {
1015
- resultsHTML += `<p>${stepExplanations[stepName]}</p>`;
1016
- }
1017
-
1018
- if (step.hypotheses) {
1019
- resultsHTML += `<h5>Hypotheses:</h5><ul>`;
1020
- step.hypotheses.sort((a, b) => b.elo_score - a.elo_score).forEach(hypo => {
1021
- resultsHTML += `<li>
1022
- <strong>${hypo.title}</strong> (ID: ${hypo.id}, Elo: ${hypo.elo_score.toFixed(2)})<br>`;
1023
- if (hypo.parent_ids && hypo.parent_ids.length > 0) {
1024
- resultsHTML += `<em>Parent IDs: ${hypo.parent_ids.join(', ')}</em><br>`;
1025
- }
1026
- resultsHTML += `<p>${hypo.text}</p>`;
1027
- if (hypo.novelty_review) {
1028
- resultsHTML += `<p>Novelty: ${hypo.novelty_review}</p>`;
1029
- }
1030
- if (hypo.feasibility_review){
1031
- resultsHTML += `<p>Feasibility: ${hypo.feasibility_review}</p>`;
1032
- }
1033
-
1034
- if (hypo.review_comments && hypo.review_comments.length > 0) {
1035
- resultsHTML += `<p>Review Comments:</p><ul>`;
1036
- hypo.review_comments.forEach(comment => {
1037
- resultsHTML += `<li>${comment}</li>`;
1038
- });
1039
- resultsHTML += `</ul>`;
1040
- }
1041
- if (hypo.references && hypo.references.length > 0) {
1042
- resultsHTML += `<p>References:</p><ul>`;
1043
- hypo.references.forEach(ref => {
1044
- resultsHTML += `<li>${ref}</li>`;
1045
- });
1046
- resultsHTML += `</ul>`;
1047
- }
1048
- resultsHTML += `</li>`;
1049
-
1050
- });
1051
- resultsHTML += `</ul>`;
1052
- }
1053
- if (stepName.startsWith("ranking") && step.tournament_results){
1054
- resultsHTML += '<h5>Ranking Results</h5>';
1055
- resultsHTML += '<ul>';
1056
- for (let i = 0; i < step.tournament_results.length; i++){
1057
- const result = step.tournament_results[i];
1058
- resultsHTML += `<li>${result.winner} beat ${result.loser}</li>`;
1059
- }
1060
- resultsHTML += '</ul>';
1061
- }
1062
-
1063
- // Handle graph data from proximity step
1064
- if (stepName === "proximity" && step.nodes_str && step.edges_str) {
1065
- resultsHTML += `<h5>Hypothesis Similarity Graph:</h5>`;
1066
- // Add the container div for the graph
1067
- resultsHTML += `<div id="mynetwork"></div>`;
1068
- resultsHTML += `<p>
1069
- <b>How to read the graph:</b><br>
1070
- - Each node (circle) represents an item.<br>
1071
- - Lines (edges) between nodes indicate a relationship.<br>
1072
- - The number on each edge represents the similarity score between the connected nodes. Higher numbers mean greater similarity. Only similarities above 0.2 are shown.<br>
1073
- </p>`;
1074
- // Store data for later initialization
1075
- graphData = { nodesStr: step.nodes_str, edgesStr: step.edges_str };
1076
- } else if (stepName === "proximity" && step.adjacency_graph) {
1077
- // Fallback if only adjacency graph is available
1078
- resultsHTML += `<p>Adjacency Graph (raw): ${JSON.stringify(step.adjacency_graph)}</p>`;
1079
- }
1080
- }
1081
- }
1082
-
1083
- // Display meta-review information
1084
- if (data.meta_review.meta_review_critique && data.meta_review.meta_review_critique.length > 0) {
1085
- resultsHTML += `<h4>Meta-Review Critique:</h4><ul>`;
1086
- data.meta_review.meta_review_critique.forEach(item => {
1087
- resultsHTML += `<li>${item}</li>`;
1088
- });
1089
- resultsHTML += `</ul>`;
1090
- }
1091
-
1092
- if (data.meta_review.research_overview && data.meta_review.research_overview.suggested_next_steps.length > 0) {
1093
- resultsHTML += `<h4>Suggested Next Steps:</h4><ul>`;
1094
- data.meta_review.research_overview.suggested_next_steps.forEach(item => {
1095
- resultsHTML += `<li>${item}</li>`;
1096
- });
1097
- resultsHTML += `</ul>`;
1098
- }
1099
-
1100
- document.getElementById('results').innerHTML = resultsHTML;
1101
-
1102
- // Initialize the graph if data is available
1103
- if (typeof graphData !== 'undefined' && graphData.nodesStr && graphData.edgesStr) {
1104
- initializeGraph(graphData.nodesStr, graphData.edgesStr);
1105
- }
1106
- }
1107
-
1108
- // Function to initialize the Vis.js graph
1109
- function initializeGraph(nodesStr, edgesStr) {
1110
- try {
1111
- // IMPORTANT: Need to parse the string data into actual JS arrays/objects
1112
- // This assumes the strings are valid JS array content (e.g., "{id: 'H1'}, {id: 'H2'}")
1113
- // We wrap them in [] and use Function constructor for safe evaluation
1114
- const nodesArray = new Function(`return [${nodesStr}]`)();
1115
- const edgesArray = new Function(`return [${edgesStr}]`)();
1116
-
1117
- var nodes = new vis.DataSet(nodesArray);
1118
- var edges = new vis.DataSet(edgesArray);
1119
-
1120
- var container = document.getElementById('mynetwork');
1121
- if (!container) {
1122
- console.error("Graph container #mynetwork not found!");
1123
- return;
1124
- }
1125
- var data = {
1126
- nodes: nodes,
1127
- edges: edges
1128
- };
1129
- var options = {
1130
- edges: {
1131
- smooth: {
1132
- enabled: true,
1133
- type: "dynamic",
1134
- },
1135
- font: {
1136
- size: 12,
1137
- align: 'middle'
1138
- }
1139
- },
1140
- nodes: {
1141
- shape: 'circle',
1142
- font: {
1143
- size: 14
1144
- }
1145
- },
1146
- physics: { // Add physics for better layout
1147
- stabilization: true,
1148
- barnesHut: {
1149
- gravitationalConstant: -2000,
1150
- centralGravity: 0.3,
1151
- springLength: 150,
1152
- springConstant: 0.04,
1153
- }
1154
- }
1155
- };
1156
- var network = new vis.Network(container, data, options);
1157
- } catch (e) {
1158
- console.error("Error initializing Vis.js graph:", e);
1159
- document.getElementById('errors').innerHTML += `<p>Error initializing graph: ${e.message}</p>`;
1160
- }
1161
- }
1162
- </script>
1163
- </body>
1164
- </html>
1165
- """)
1166
-
1167
-
1168
- ###############################################################################
1169
- # Main Entrypoint
1170
- ###############################################################################
1171
-
1172
- if __name__ == "__main__":
1173
- # Run with: uvicorn this_script:app --host 0.0.0.0 --port 8000
1174
- uvicorn.run("main:app", host=config["fastapi_host"], port=config["fastapi_port"], reload=False)