Yiqiao Jin commited on
Commit
53709ed
Β·
1 Parent(s): bdafe83

Update demo

Browse files
README.md CHANGED
@@ -1,3 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # AgentReview
2
 
3
  Official implementation for the πŸ”—[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
 
1
+ ---
2
+ title: AgentReview
3
+ emoji: πŸŽ“
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.4.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: EMNLP 2024
12
+ ---
13
+
14
  # AgentReview
15
 
16
  Official implementation for the πŸ”—[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
agentreview/agent.py CHANGED
@@ -69,8 +69,10 @@ class Player(Agent):
69
  self.data_dir = kwargs.pop("data_dir", None)
70
  self.args = args
71
 
 
72
  if isinstance(backend, BackendConfig):
73
  backend_config = backend
 
74
  backend = load_backend(backend_config)
75
  elif isinstance(backend, IntelligenceBackend):
76
  backend_config = backend.to_config()
 
69
  self.data_dir = kwargs.pop("data_dir", None)
70
  self.args = args
71
 
72
+
73
  if isinstance(backend, BackendConfig):
74
  backend_config = backend
75
+ backend_config['openai_client_type'] = args.openai_client_type
76
  backend = load_backend(backend_config)
77
  elif isinstance(backend, IntelligenceBackend):
78
  backend_config = backend.to_config()
arguments.py β†’ agentreview/arguments.py RENAMED
@@ -26,7 +26,8 @@ def parse_args():
26
 
27
 
28
  parser.add_argument(
29
- "--api_version", type=str, default="2023-03-15-preview", help="API version to be used for making requests. Required for Azure OpenAI clients."
 
30
  )
31
 
32
  # Experiment configuration
@@ -54,11 +55,10 @@ def parse_args():
54
  )
55
 
56
  parser.add_argument(
57
- "--ignore_missing_metareviews", action="store_true", help="If set, missing metareviews are ignored, allowing the experiment to continue without them."
58
  )
59
-
60
  parser.add_argument(
61
- "--overwrite", action="store_true", help="If set, existing results or output files will be overwritten without prompting."
62
  )
63
 
64
  parser.add_argument(
 
26
 
27
 
28
  parser.add_argument(
29
+ "--api_version", type=str, default="2023-05-15", help="API version to be used for making requests. Required "
30
+ "for Azure OpenAI clients."
31
  )
32
 
33
  # Experiment configuration
 
55
  )
56
 
57
  parser.add_argument(
58
+ "--overwrite", action="store_true", help="If set, existing results or output files will be overwritten without prompting."
59
  )
 
60
  parser.add_argument(
61
+ "--skip_logging", action="store_true", help="If set, we do not log the messages in the console."
62
  )
63
 
64
  parser.add_argument(
agentreview/backends/openai.py CHANGED
@@ -3,17 +3,11 @@ from typing import List
3
 
4
  from tenacity import retry, stop_after_attempt, wait_random_exponential
5
 
6
- from arguments import parse_args
7
- from utility.authentication_utils import get_openai_client
8
  from .base import IntelligenceBackend
9
  from ..message import SYSTEM_NAME, Message
10
 
11
- args = parse_args()
12
-
13
- client = get_openai_client(client_type=args.openai_client_type)
14
-
15
- OPENAI_CLIENT_TYPE = args.openai_client_type
16
-
17
  # Default config follows the OpenAI playground
18
  DEFAULT_TEMPERATURE = 1.0
19
  DEFAULT_MAX_TOKENS = 4096
@@ -57,19 +51,22 @@ class OpenAIChat(IntelligenceBackend):
57
  merge_other_agents_as_one_user=merge_other_agents_as_one_user,
58
  **kwargs,
59
  )
60
-
 
61
  self.temperature = temperature
62
  self.max_tokens = max_tokens
63
  self.model = model
64
  self.merge_other_agent_as_user = merge_other_agents_as_one_user
65
 
 
 
66
  @retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
67
  def _get_response(self, messages):
68
  # Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
69
  # make API calls
70
 
71
- if OPENAI_CLIENT_TYPE == "openai":
72
- completion = client.chat.completions.create(
73
  model=self.model,
74
  messages=messages,
75
  temperature=self.temperature,
@@ -77,8 +74,8 @@ class OpenAIChat(IntelligenceBackend):
77
  stop=STOP,
78
  )
79
 
80
- elif OPENAI_CLIENT_TYPE == "azure_openai":
81
- completion = client.chat.completions.create(
82
  model=self.model,
83
  messages=messages,
84
  temperature=self.temperature,
@@ -90,6 +87,7 @@ class OpenAIChat(IntelligenceBackend):
90
  raise NotImplementedError
91
 
92
  response = completion.choices[0].message.content
 
93
  response = response.strip()
94
  return response
95
 
 
3
 
4
  from tenacity import retry, stop_after_attempt, wait_random_exponential
5
 
6
+ from agentreview.arguments import parse_args
7
+ from agentreview.utility.authentication_utils import get_openai_client
8
  from .base import IntelligenceBackend
9
  from ..message import SYSTEM_NAME, Message
10
 
 
 
 
 
 
 
11
  # Default config follows the OpenAI playground
12
  DEFAULT_TEMPERATURE = 1.0
13
  DEFAULT_MAX_TOKENS = 4096
 
51
  merge_other_agents_as_one_user=merge_other_agents_as_one_user,
52
  **kwargs,
53
  )
54
+ self.client_type = kwargs.get("openai_client_type", None)
55
+ self.client = get_openai_client(self.client_type)
56
  self.temperature = temperature
57
  self.max_tokens = max_tokens
58
  self.model = model
59
  self.merge_other_agent_as_user = merge_other_agents_as_one_user
60
 
61
+
62
+
63
  @retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
64
  def _get_response(self, messages):
65
  # Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
66
  # make API calls
67
 
68
+ if self.client_type == "openai":
69
+ completion = self.client.chat.completions.create(
70
  model=self.model,
71
  messages=messages,
72
  temperature=self.temperature,
 
74
  stop=STOP,
75
  )
76
 
77
+ elif self.client_type == "azure_openai":
78
+ completion = self.client.chat.completions.create(
79
  model=self.model,
80
  messages=messages,
81
  temperature=self.temperature,
 
87
  raise NotImplementedError
88
 
89
  response = completion.choices[0].message.content
90
+
91
  response = response.strip()
92
  return response
93
 
const.py β†’ agentreview/const.py RENAMED
@@ -10,6 +10,8 @@ PAPER_DECISIONS_ICLR2019 = ["Accept-oral", "Accept-poster", "Reject"]
10
 
11
  AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
12
 
 
 
13
  # These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
14
  FILTERED_PAPER_IDS = {
15
  "ICLR2020": [],
 
10
 
11
  AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
12
 
13
+ GLOBAL_PROMPT = "This is a realistic simulation of academic peer review."
14
+
15
  # These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
16
  FILTERED_PAPER_IDS = {
17
  "ICLR2020": [],
agentreview/dataset/download_openreview_paper.py CHANGED
@@ -15,14 +15,14 @@ import os
15
  import time
16
  import requests
17
 
18
- from arguments import parse_args
19
 
20
  try:
21
  import openreview
22
  except ImportError:
23
  raise ImportError("Please install openreview package using `pip install openreview-py`")
24
 
25
- def download_papers():
26
  """Downloads all papers from ICLR 2023 using OpenReview API.
27
 
28
  This function authenticates with the OpenReview API using environment
@@ -36,8 +36,6 @@ def download_papers():
36
  AssertionError: If the conference argument is not for ICLR.
37
  """
38
 
39
- args = parse_args()
40
-
41
  openreview_username = os.environ.get("OPENREVIEW_USERNAME")
42
  openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
43
 
@@ -133,4 +131,5 @@ def download_papers():
133
 
134
 
135
  if __name__ == "__main__":
136
- download_papers()
 
 
15
  import time
16
  import requests
17
 
18
+ from agentreview.arguments import parse_args
19
 
20
  try:
21
  import openreview
22
  except ImportError:
23
  raise ImportError("Please install openreview package using `pip install openreview-py`")
24
 
25
+ def download_papers(args):
26
  """Downloads all papers from ICLR 2023 using OpenReview API.
27
 
28
  This function authenticates with the OpenReview API using environment
 
36
  AssertionError: If the conference argument is not for ICLR.
37
  """
38
 
 
 
39
  openreview_username = os.environ.get("OPENREVIEW_USERNAME")
40
  openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
41
 
 
131
 
132
 
133
  if __name__ == "__main__":
134
+ args = parse_args()
135
+ download_papers(args)
agentreview/dataset/process_submissions.py CHANGED
@@ -22,9 +22,8 @@ from tqdm import tqdm
22
 
23
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
24
 
25
- import const
26
- from arguments import parse_args
27
- from utility.utils import print_colored
28
 
29
  decision_map = {
30
  # ICLR 2023
 
22
 
23
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
24
 
25
+ from agentreview.arguments import parse_args
26
+ from agentreview.utility.utils import print_colored
 
27
 
28
  decision_map = {
29
  # ICLR 2023
agentreview/environments/paper_review.py CHANGED
@@ -1,11 +1,10 @@
1
  import json
2
- import json
3
  import logging
4
  import os.path as osp
5
  from typing import List
6
 
7
  from agentreview.environments import Conversation
8
- from utility.utils import get_rebuttal_dir
9
  from .base import TimeStep
10
  from ..message import Message
11
  from ..paper_review_message import PaperReviewMessagePool
 
1
  import json
 
2
  import logging
3
  import os.path as osp
4
  from typing import List
5
 
6
  from agentreview.environments import Conversation
7
+ from agentreview.utility.utils import get_rebuttal_dir
8
  from .base import TimeStep
9
  from ..message import Message
10
  from ..paper_review_message import PaperReviewMessagePool
agentreview/paper_processor.py CHANGED
@@ -148,7 +148,7 @@ def convert_text_into_dict(text: str) -> dict:
148
 
149
 
150
  if __name__ == "__main__":
151
- from utility.authentication_utils import read_and_set_openai_key
152
  from agentreview.review import get_lm_review
153
 
154
  read_and_set_openai_key()
 
148
 
149
 
150
  if __name__ == "__main__":
151
+ from agentreview.utility.authentication_utils import read_and_set_openai_key
152
  from agentreview.review import get_lm_review
153
 
154
  read_and_set_openai_key()
agentreview/paper_review_arena.py CHANGED
@@ -1,14 +1,11 @@
1
  import csv
2
- import glob
3
  import json
4
  import logging
5
- import os
6
  from typing import Union
7
 
8
  from agentreview.arena import Arena, TooManyInvalidActions
9
  from agentreview.role_descriptions import get_reviewer_description
10
- from utility.utils import get_next_review_id, get_reviewer_type_from_profile, \
11
- get_paper_review_and_rebuttal_dir, format_metareviews
12
  from .agent import Player
13
  from .config import ArenaConfig
14
  from .environments import TimeStep, load_environment
 
1
  import csv
 
2
  import json
3
  import logging
 
4
  from typing import Union
5
 
6
  from agentreview.arena import Arena, TooManyInvalidActions
7
  from agentreview.role_descriptions import get_reviewer_description
8
+ from agentreview.utility.utils import format_metareviews
 
9
  from .agent import Player
10
  from .config import ArenaConfig
11
  from .environments import TimeStep, load_environment
agentreview/paper_review_player.py CHANGED
@@ -56,6 +56,8 @@ class Reviewer(Player):
56
  global_prompt: str = None,
57
  **kwargs,
58
  ):
 
 
59
  super().__init__(name, role_desc, backend, global_prompt, **kwargs)
60
 
61
  def act(self, observation: List[Message]) -> str:
@@ -94,7 +96,7 @@ class PaperExtractorPlayer(Player):
94
  Returns:
95
  str: The action (response) of the player.
96
  """
97
- print("Improve paper loading")
98
  logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
99
 
100
  loader = PDFReader()
 
56
  global_prompt: str = None,
57
  **kwargs,
58
  ):
59
+ print("kwargs")
60
+ print(kwargs)
61
  super().__init__(name, role_desc, backend, global_prompt, **kwargs)
62
 
63
  def act(self, observation: List[Message]) -> str:
 
96
  Returns:
97
  str: The action (response) of the player.
98
  """
99
+
100
  logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
101
 
102
  loader = PDFReader()
agentreview/paper_review_settings.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  default_reviewer_setting = {
2
  "is_benign": None,
3
  "is_knowledgeable": None,
@@ -6,7 +8,7 @@ default_reviewer_setting = {
6
  }
7
 
8
 
9
- def get_experiment_settings(setting: dict):
10
  """
11
  Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
12
 
@@ -19,7 +21,8 @@ def get_experiment_settings(setting: dict):
19
  """
20
 
21
  experiment_setting = {
22
- "id": None,
 
23
  "players": {
24
 
25
  # Paper Extractor is a special player that extracts a paper from the dataset.
 
1
+ from typing import Union
2
+
3
  default_reviewer_setting = {
4
  "is_benign": None,
5
  "is_knowledgeable": None,
 
8
  }
9
 
10
 
11
+ def get_experiment_settings(paper_id: Union[int, None] = None, paper_decision: Union[str, None] = None, setting: dict = None):
12
  """
13
  Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
14
 
 
21
  """
22
 
23
  experiment_setting = {
24
+ "paper_id": paper_id,
25
+ "paper_decision": paper_decision,
26
  "players": {
27
 
28
  # Paper Extractor is a special player that extracts a paper from the dataset.
agentreview/role_descriptions.py CHANGED
@@ -5,7 +5,7 @@ import numpy as np
5
 
6
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
7
 
8
- import const
9
  from agentreview.config import AgentConfig
10
 
11
  PLAYER_BACKEND = {
 
5
 
6
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
7
 
8
+ from agentreview import const
9
  from agentreview.config import AgentConfig
10
 
11
  PLAYER_BACKEND = {
agentreview/ui/cli.py CHANGED
@@ -1,5 +1,4 @@
1
  import logging
2
- import logging
3
  import os
4
  import os.path as osp
5
  from typing import Union
@@ -11,8 +10,8 @@ from prompt_toolkit.completion import WordCompleter
11
  from prompt_toolkit.styles import Style
12
  from rich.console import Console
13
 
14
- from utility.utils import get_rebuttal_dir, load_gpt4_generated_ac_decisions, \
15
- save_gpt4_generated_ac_decisions
16
  from ..arena import Arena, TooManyInvalidActions
17
  from ..backends.human import HumanBackendError
18
  from ..environments import PaperReview, PaperDecision
@@ -222,7 +221,8 @@ class ArenaCLI:
222
  # Print the new messages
223
  for msg in messages:
224
  message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
225
- console.print(color_dict[name_to_color[msg.agent_name]] + message_str + CRStyle.RESET_ALL)
 
226
  msg.logged = True
227
 
228
  step += 1
@@ -251,7 +251,7 @@ class ArenaCLI:
251
  self.arena.save_history(path_review_history)
252
 
253
  elif env.type_name == "paper_decision":
254
- ac_decisions = load_gpt4_generated_ac_decisions(output_dir=args.output_dir,
255
  conference=args.conference,
256
  model_name=args.model_name,
257
  ac_scoring_method=args.ac_scoring_method,
@@ -261,9 +261,9 @@ class ArenaCLI:
261
 
262
  ac_decisions += [env.ac_decisions]
263
 
264
- save_gpt4_generated_ac_decisions(ac_decisions,
265
- output_dir=args.output_dir,
266
- conference=args.conference,
267
- model_name=args.model_name,
268
- ac_scoring_method=args.ac_scoring_method,
269
- experiment_name=args.experiment_name)
 
1
  import logging
 
2
  import os
3
  import os.path as osp
4
  from typing import Union
 
10
  from prompt_toolkit.styles import Style
11
  from rich.console import Console
12
 
13
+ from agentreview.utility.utils import get_rebuttal_dir, load_llm_ac_decisions, \
14
+ save_llm_ac_decisions
15
  from ..arena import Arena, TooManyInvalidActions
16
  from ..backends.human import HumanBackendError
17
  from ..environments import PaperReview, PaperDecision
 
221
  # Print the new messages
222
  for msg in messages:
223
  message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
224
+ if self.args.skip_logging:
225
+ console.print(color_dict[name_to_color[msg.agent_name]] + message_str + CRStyle.RESET_ALL)
226
  msg.logged = True
227
 
228
  step += 1
 
251
  self.arena.save_history(path_review_history)
252
 
253
  elif env.type_name == "paper_decision":
254
+ ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
255
  conference=args.conference,
256
  model_name=args.model_name,
257
  ac_scoring_method=args.ac_scoring_method,
 
261
 
262
  ac_decisions += [env.ac_decisions]
263
 
264
+ save_llm_ac_decisions(ac_decisions,
265
+ output_dir=args.output_dir,
266
+ conference=args.conference,
267
+ model_name=args.model_name,
268
+ ac_scoring_method=args.ac_scoring_method,
269
+ experiment_name=args.experiment_name)
agentreview/utility/__init__.py ADDED
File without changes
{utility β†’ agentreview/utility}/authentication_utils.py RENAMED
@@ -16,6 +16,16 @@ def get_openai_client(client_type: str):
16
 
17
  assert client_type in ["azure_openai", "openai"]
18
 
 
 
 
 
 
 
 
 
 
 
19
  if client_type == "openai":
20
  client = openai.OpenAI(
21
  api_key=os.environ['OPENAI_API_KEY']
 
16
 
17
  assert client_type in ["azure_openai", "openai"]
18
 
19
+ endpoint: str = os.environ['AZURE_ENDPOINT']
20
+
21
+ if not endpoint.startswith("https://"):
22
+ endpoint = f"https://{endpoint}.openai.azure.com"
23
+
24
+ os.environ['AZURE_ENDPOINT'] = endpoint
25
+
26
+ if not os.environ.get('OPENAI_API_VERSION'):
27
+ os.environ['OPENAI_API_VERSION'] = "2023-05-15"
28
+
29
  if client_type == "openai":
30
  client = openai.OpenAI(
31
  api_key=os.environ['OPENAI_API_KEY']
{utility β†’ agentreview/utility}/data_utils.py RENAMED
File without changes
agentreview/utility/experiment_utils.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
+ from agentreview.agent import Player
7
+ from agentreview.paper_review_player import PaperExtractorPlayer, AreaChair, Reviewer
8
+ from agentreview.role_descriptions import get_ac_config, get_reviewer_player_config, get_author_config, \
9
+ get_paper_extractor_config
10
+
11
+
12
+ def initialize_players(experiment_setting: dict, args):
13
+ paper_id = experiment_setting['paper_id']
14
+ paper_decision = experiment_setting['paper_decision']
15
+
16
+ if args.task == "paper_decision":
17
+ experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
18
+
19
+ players = []
20
+
21
+ for role, players_list in experiment_setting["players"].items():
22
+
23
+ for i, player_config in enumerate(players_list):
24
+ if role == "AC":
25
+
26
+ # For AC, `env_type` is either "paper_decision" or "paper_review"
27
+ player_config = get_ac_config(env_type=args.task,
28
+ scoring_method=args.ac_scoring_method,
29
+ num_papers_per_area_chair=args.num_papers_per_area_chair,
30
+ global_settings=experiment_setting['global_settings'],
31
+ acceptance_rate=args.acceptance_rate,
32
+ **player_config)
33
+
34
+ player_config['model'] = args.model_name
35
+
36
+ player = AreaChair(data_dir=args.data_dir,
37
+ conference=args.conference,
38
+ args=args,
39
+ **player_config)
40
+
41
+
42
+ elif args.task == "paper_review":
43
+
44
+
45
+ if role == "Paper Extractor":
46
+
47
+ player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'])
48
+
49
+ player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
50
+ paper_decision=paper_decision,
51
+ args=args,
52
+ conference=args.conference, **player_config)
53
+
54
+
55
+
56
+ elif role == "Author":
57
+
58
+ # Author requires no behavior customization.
59
+ # So we directly use the Player class
60
+ player_config = get_author_config()
61
+ player = Player(data_dir=args.data_dir,
62
+ conference=args.conference,
63
+ args=args,
64
+ **player_config)
65
+
66
+
67
+
68
+ elif role == "Reviewer":
69
+ player_config = get_reviewer_player_config(reviewer_index=i + 1,
70
+ global_settings=experiment_setting['global_settings'],
71
+ **player_config)
72
+ player_config['model'] = args.model_name
73
+ player = Reviewer(data_dir=args.data_dir, conference=args.conference, args=args, **player_config)
74
+
75
+
76
+ else:
77
+ raise NotImplementedError(f"Unknown role for paper review (stage 1-4): {role}")
78
+
79
+ else:
80
+ raise NotImplementedError(f"Unknown role for paper decision (stage 5): {role}")
81
+
82
+ players.append(player)
83
+
84
+ return players
{utility β†’ agentreview/utility}/general_utils.py RENAMED
File without changes
{utility β†’ agentreview/utility}/metrics_utils.py RENAMED
File without changes
{utility β†’ agentreview/utility}/text_utils.py RENAMED
File without changes
{utility β†’ agentreview/utility}/utils.py RENAMED
@@ -9,8 +9,8 @@ from typing import Union, List, Dict, Tuple
9
  import numpy as np
10
  import pandas as pd
11
 
12
- import const
13
- from utility.general_utils import check_cwd, set_seed
14
 
15
 
16
  def generate_num_papers_to_accept(n, batch_number, shuffle=True):
@@ -36,25 +36,25 @@ def generate_num_papers_to_accept(n, batch_number, shuffle=True):
36
  return array
37
 
38
 
39
- def get_papers_accepted_by_gpt4(gpt4_generated_ac_decisions) -> list:
40
- papers_accepted_by_gpt4 = []
41
 
42
- num_papers = sum([len(batch) for batch in gpt4_generated_ac_decisions])
43
 
44
  if num_papers == 0:
45
  raise ValueError("No papers found in batch")
46
 
47
- num_papers_to_accept = generate_num_papers_to_accept(n=paper_review_config.ACCEPTANCE_RATE * num_papers,
48
- batch_number=len(gpt4_generated_ac_decisions))
49
 
50
- for idx_batch, batch in enumerate(gpt4_generated_ac_decisions):
51
  tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
52
 
53
  paper_ids = [int(paper_id) for paper_id, rank in tups]
54
 
55
- papers_accepted_by_gpt4 += paper_ids[:num_papers_to_accept[idx_batch]]
56
 
57
- return papers_accepted_by_gpt4
58
 
59
 
60
  def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
@@ -151,6 +151,8 @@ def get_rebuttal_dir(output_dir: str,
151
 
152
 
153
  def print_colored(text, color='red'):
 
 
154
  foreground_colors = {
155
  'black': 30,
156
  'red': 31,
@@ -161,7 +163,16 @@ def print_colored(text, color='red'):
161
  'cyan': 36,
162
  'white': 37,
163
  }
164
- print(f"\033[{foreground_colors[color]}m{text}\033[0m")
 
 
 
 
 
 
 
 
 
165
 
166
 
167
  def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
@@ -351,71 +362,116 @@ def get_experiment_names(conference: str = "ICLR2023"):
351
  return experiment_names
352
 
353
 
354
- def load_gpt4_generated_ac_decisions_as_array(experiment_name, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
355
- ac_scoring_method = kwargs.pop('ac_scoring_method')
356
- acceptance_rate = kwargs.pop('acceptance_rate')
357
- conference = kwargs.pop('conference')
358
- model_name = kwargs.pop('model_name')
359
- num_papers_per_area_chair = kwargs.pop('num_papers_per_area_chair')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
 
 
 
361
  print("=" * 30)
362
  print(f"Experiment Name: {experiment_name}")
363
 
364
- gpt4_generated_ac_decisions = load_gpt4_generated_ac_decisions(conference=conference,
365
- model_name=model_name,
366
- ac_scoring_method=ac_scoring_method,
367
- experiment_name=experiment_name,
368
- num_papers_per_area_chair=num_papers_per_area_chair)
 
 
 
369
 
370
- paper_ids = sorted([int(paper_id) for batch in gpt4_generated_ac_decisions for paper_id, rank in batch.items()])
371
- # ac_decisions['paper_ids'] = paper_ids
 
372
 
373
  if ac_scoring_method == "ranking":
374
- assert len(paper_ids) == len(set(paper_ids)), (f"Duplicate paper_ids found in the AC decisions. "
375
- f"{Counter(paper_ids)}")
376
 
377
- papers_accepted_by_gpt4 = get_papers_accepted_by_gpt4(gpt4_generated_ac_decisions, acceptance_rate)
378
-
379
- # True means accept, False means reject
380
- decisions_gpt4 = np.array(
381
- [True if paper_id in papers_accepted_by_gpt4 else False for paper_id in paper_ids])
382
 
383
  elif ac_scoring_method == "recommendation":
384
- gpt4_generated_ac_decisions = {int(k): v for batch in gpt4_generated_ac_decisions for k, v in batch.items()}
385
- decisions_gpt4 = np.array(
386
- [True if gpt4_generated_ac_decisions[paper_id].startswith("Accept") else False for paper_id in
387
- paper_ids])
388
-
389
  else:
390
- raise NotImplementedError
391
 
392
- return decisions_gpt4, paper_ids
393
 
394
 
395
- def load_gpt4_generated_ac_decisions(**kwargs) -> List[Dict]:
396
- num_papers_per_area_chair = kwargs.pop('num_papers_per_area_chair')
397
- path = get_ac_decision_path(**kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
  if osp.exists(path):
400
- ac_decision = json.load(open(path, 'r', encoding='utf-8'))
 
401
  print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
402
-
403
  else:
404
  ac_decision = []
405
  print(f"No existing AC decisions found at {path}")
406
 
407
- ac_decision = [batch for batch in ac_decision if len(batch) > 0]
408
 
409
  for i, batch in enumerate(ac_decision):
410
  if i != len(ac_decision) - 1:
411
- assert len(batch) == num_papers_per_area_chair, (f"Batch {i} has {len(batch)} papers, "
412
- f"but each AC should be assigned"
413
- f" {num_papers_per_area_chair} "
414
- f"unless it is the last batch.")
415
 
416
  return ac_decision
417
 
418
-
419
  def write_to_excel(data, file_path, sheet_name):
420
  """
421
  Write data to an Excel file.
@@ -436,7 +492,7 @@ def write_to_excel(data, file_path, sheet_name):
436
  data.to_excel(writer, sheet_name=sheet_name, index=False)
437
 
438
 
439
- def save_gpt4_generated_ac_decisions(ac_decisions: List[dict], **kwargs):
440
  path = get_ac_decision_path(**kwargs)
441
 
442
  json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
 
9
  import numpy as np
10
  import pandas as pd
11
 
12
+ from agentreview import const
13
+ from agentreview.utility.general_utils import check_cwd, set_seed
14
 
15
 
16
  def generate_num_papers_to_accept(n, batch_number, shuffle=True):
 
36
  return array
37
 
38
 
39
+ def get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate: float) -> list:
40
+ papers_accepted_by_llm = []
41
 
42
+ num_papers = sum([len(batch) for batch in llm_ac_decisions])
43
 
44
  if num_papers == 0:
45
  raise ValueError("No papers found in batch")
46
 
47
+ num_papers_to_accept = generate_num_papers_to_accept(n=acceptance_rate * num_papers,
48
+ batch_number=len(llm_ac_decisions))
49
 
50
+ for idx_batch, batch in enumerate(llm_ac_decisions):
51
  tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
52
 
53
  paper_ids = [int(paper_id) for paper_id, rank in tups]
54
 
55
+ papers_accepted_by_llm += paper_ids[:num_papers_to_accept[idx_batch]]
56
 
57
+ return papers_accepted_by_llm
58
 
59
 
60
  def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
 
151
 
152
 
153
  def print_colored(text, color='red'):
154
+
155
+ # Dictionary of ANSI color codes for terminal
156
  foreground_colors = {
157
  'black': 30,
158
  'red': 31,
 
163
  'cyan': 36,
164
  'white': 37,
165
  }
166
+ try:
167
+
168
+ # get_ipython is specific to Jupyter and IPython.
169
+ # We use this to decide whether we are running a Jupyter notebook or not.
170
+ get_ipython
171
+ print(text) # Plain text in Jupyter
172
+ except:
173
+ # If not Jupyter, print with color codes
174
+ color_code = foreground_colors.get(color, 31) # Default to red if color not found
175
+ print(f"\033[{color_code}m{text}\033[0m")
176
 
177
 
178
  def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
 
362
  return experiment_names
363
 
364
 
365
+ def load_llm_ac_decisions_as_array(
366
+ output_dir: str,
367
+ experiment_name: str,
368
+ ac_scoring_method: str,
369
+ acceptance_rate: float,
370
+ conference: str,
371
+ model_name: str,
372
+ num_papers_per_area_chair: int
373
+ ) -> Tuple[np.ndarray, np.ndarray]:
374
+ """Loads and processes GPT-4 generated area chair (AC) decisions for an experiment.
375
+
376
+ Args:
377
+ experiment_name (str): Name of the experiment.
378
+ ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
379
+ acceptance_rate (float): Acceptance rate for the conference.
380
+ conference (str): Name of the conference.
381
+ model_name (str): Model name used to generate AC decisions.
382
+ num_papers_per_area_chair (int): Number of papers assigned to each area chair.
383
+
384
+ Returns:
385
+ Tuple[np.ndarray, np.ndarray]: An array of decisions (True for accept, False for reject)
386
+ and an array of paper IDs in the order processed.
387
 
388
+ Raises:
389
+ NotImplementedError: If `ac_scoring_method` is not 'ranking' or 'recommendation'.
390
+ """
391
  print("=" * 30)
392
  print(f"Experiment Name: {experiment_name}")
393
 
394
+ llm_ac_decisions = load_llm_ac_decisions(
395
+ output_dir=output_dir,
396
+ conference=conference,
397
+ model_name=model_name,
398
+ ac_scoring_method=ac_scoring_method,
399
+ experiment_name=experiment_name,
400
+ num_papers_per_area_chair=num_papers_per_area_chair
401
+ )
402
 
403
+ paper_ids = sorted(
404
+ int(paper_id) for batch in llm_ac_decisions for paper_id in batch
405
+ )
406
 
407
  if ac_scoring_method == "ranking":
408
+ if len(paper_ids) != len(set(paper_ids)):
409
+ raise ValueError(f"Duplicate paper_ids found in the AC decisions: {Counter(paper_ids)}")
410
 
411
+ papers_accepted_by_llm = get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate)
412
+ decisions_llm = np.array([paper_id in papers_accepted_by_llm for paper_id in paper_ids])
 
 
 
413
 
414
  elif ac_scoring_method == "recommendation":
415
+ llm_ac_decisions_flat = {int(k): v for batch in llm_ac_decisions for k, v in batch.items()}
416
+ decisions_llm = np.array(
417
+ [llm_ac_decisions_flat[paper_id].startswith("Accept") for paper_id in paper_ids]
418
+ )
 
419
  else:
420
+ raise NotImplementedError(f"Scoring method '{ac_scoring_method}' not implemented.")
421
 
422
+ return decisions_llm, np.array(paper_ids)
423
 
424
 
425
+ def load_llm_ac_decisions(
426
+ output_dir: str,
427
+ conference: str,
428
+ model_name: str,
429
+ ac_scoring_method: str,
430
+ experiment_name: str,
431
+ num_papers_per_area_chair: int
432
+ ) -> List[Dict[str, str]]:
433
+ """Loads GPT-4 generated area chair (AC) decisions from a specified path.
434
+
435
+ Args:
436
+ conference (str): Name of the conference.
437
+ model_name (str): Model name used to generate AC decisions.
438
+ ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
439
+ experiment_name (str): Name of the experiment.
440
+ num_papers_per_area_chair (int): Number of papers assigned to each area chair.
441
+
442
+ Returns:
443
+ List[Dict[str, str]]: List of batches, where each batch contains paper ID and decision.
444
+
445
+ Raises:
446
+ AssertionError: If a non-final batch has a paper count different from `num_papers_per_area_chair`.
447
+ """
448
+ path = get_ac_decision_path(
449
+ output_dir=output_dir,
450
+ conference=conference,
451
+ model_name=model_name,
452
+ ac_scoring_method=ac_scoring_method,
453
+ experiment_name=experiment_name
454
+ )
455
 
456
  if osp.exists(path):
457
+ with open(path, 'r', encoding='utf-8') as file:
458
+ ac_decision = json.load(file)
459
  print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
 
460
  else:
461
  ac_decision = []
462
  print(f"No existing AC decisions found at {path}")
463
 
464
+ ac_decision = [batch for batch in ac_decision if batch] # Remove empty batches
465
 
466
  for i, batch in enumerate(ac_decision):
467
  if i != len(ac_decision) - 1:
468
+ if len(batch) != num_papers_per_area_chair:
469
+ raise AssertionError(
470
+ f"Batch {i} has {len(batch)} papers, expected {num_papers_per_area_chair} for non-final batches."
471
+ )
472
 
473
  return ac_decision
474
 
 
475
  def write_to_excel(data, file_path, sheet_name):
476
  """
477
  Write data to an Excel file.
 
492
  data.to_excel(writer, sheet_name=sheet_name, index=False)
493
 
494
 
495
+ def save_llm_ac_decisions(ac_decisions: List[dict], **kwargs):
496
  path = get_ac_decision_path(**kwargs)
497
 
498
  json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
data ADDED
@@ -0,0 +1 @@
 
 
1
+ ../agent4reviews/data
demo.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # # AgentReview
5
+ #
6
+ #
7
+ #
8
+ # In this tutorial, you will explore customizing the AgentReview experiment.
9
+ #
10
+ # πŸ“‘ Venue: EMNLP 2024 (Oral)
11
+ #
12
+ # πŸ”— arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
13
+ #
14
+ # 🌐 Website: [https://agentreview.github.io/](https://agentreview.github.io/)
15
+ #
16
+ # ```bibtex
17
+ # @inproceedings{jin2024agentreview,
18
+ # title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
19
+ # author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
20
+ # booktitle={EMNLP},
21
+ # year={2024}
22
+ # }
23
+ # ```
24
+ #
25
+
26
+ # In[2]:
27
+
28
+
29
+ import os
30
+
31
+ import numpy as np
32
+
33
+ from agentreview import const
34
+
35
+ os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"
36
+
37
+
38
+ # ## Overview
39
+ #
40
+ # AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms
41
+
42
+ # In[3]:
43
+
44
+
45
+
46
+ # ## Review Pipeline
47
+ #
48
+ # The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
49
+ #
50
+ # * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
51
+ # * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
52
+ # * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
53
+ # * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
54
+ # * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.
55
+
56
+ # In[2]:
57
+
58
+
59
+
60
+ # In[4]:
61
+
62
+
63
+ import os
64
+
65
+ if os.path.basename(os.getcwd()) == "notebooks":
66
+ os.chdir("..")
67
+ # Change the working directory to AgentReview
68
+ print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")
69
+
70
+
71
+ # In[5]:
72
+
73
+
74
+ from argparse import Namespace
75
+
76
+ args = Namespace(openai_key=None,
77
+ deployment=None,
78
+ openai_client_type='azure_openai',
79
+ endpoint=None,
80
+ api_version='2023-05-15',
81
+ ac_scoring_method='ranking',
82
+ conference='ICLR2024',
83
+ num_reviewers_per_paper=3,
84
+ ignore_missing_metareviews=False,
85
+ overwrite=False,
86
+ num_papers_per_area_chair=10,
87
+ model_name='gpt-4o',
88
+ output_dir='outputs',
89
+ max_num_words=16384,
90
+ visual_dir='outputs/visual',
91
+ device='cuda',
92
+ data_dir='./data', # Directory to all paper PDF
93
+ acceptance_rate=0.32,
94
+ task='paper_review')
95
+
96
+ os.environ['OPENAI_API_VERSION'] = args.api_version
97
+
98
+ # In[13]:
99
+
100
+
101
+ malicious_Rx1_setting = {
102
+ "AC": [
103
+ "BASELINE"
104
+ ],
105
+
106
+ "reviewer": [
107
+ "malicious",
108
+ "BASELINE",
109
+ "BASELINE"
110
+ ],
111
+
112
+ "author": [
113
+ "BASELINE"
114
+ ],
115
+ "global_settings":{
116
+ "provides_numeric_rating": ['reviewer', 'ac'],
117
+ "persons_aware_of_authors_identities": []
118
+ }
119
+ }
120
+
121
+ all_settings = {"malicious_Rx1": malicious_Rx1_setting}
122
+ args.experiment_name = "malicious_Rx1_setting"
123
+
124
+
125
+ #
126
+ # `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
127
+ #
128
+ #
129
+
130
+ # ## Reviews
131
+ #
132
+ # Define the review pipeline
133
+
134
+ # In[10]:
135
+
136
+
137
+ from agentreview.environments import PaperReview
138
+
139
+ def review_one_paper(paper_id, setting):
140
+ paper_decision = paper_id2decision[paper_id]
141
+
142
+ experiment_setting = get_experiment_settings(paper_id=paper_id,
143
+ paper_decision=paper_decision,
144
+ setting=setting)
145
+ print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
146
+
147
+ players = initialize_players(experiment_setting=experiment_setting, args=args)
148
+
149
+ player_names = [player.name for player in players]
150
+
151
+ env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
152
+ args=args, experiment_setting=experiment_setting)
153
+
154
+ arena = PaperReviewArena(players=players, environment=env, args=args)
155
+ arena.launch_cli(interactive=False)
156
+
157
+
158
+ # In[11]:
159
+
160
+
161
+ import os
162
+ import sys
163
+
164
+ sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))
165
+
166
+ from agentreview.paper_review_settings import get_experiment_settings
167
+ from agentreview.paper_review_arena import PaperReviewArena
168
+ from agentreview.utility.experiment_utils import initialize_players
169
+ from agentreview.utility.utils import project_setup, get_paper_decision_mapping
170
+
171
+
172
+ # In[14]:
173
+
174
+
175
+ sampled_paper_ids = [39]
176
+
177
+ paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)
178
+
179
+ for paper_id in sampled_paper_ids:
180
+ review_one_paper(paper_id, malicious_Rx1_setting)
181
+
182
+
183
+
184
+ def run_paper_decision():
185
+ args.task = "paper_decision"
186
+
187
+ # Make sure the same set of papers always go through the same AC no matter which setting we choose
188
+ NUM_PAPERS = len(const.year2paper_ids[args.conference])
189
+ order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
190
+
191
+
192
+ # Paper IDs we actually used in experiments
193
+ experimental_paper_ids = []
194
+
195
+ # For papers that have not been decided yet, load their metareviews
196
+ metareviews = []
197
+ print("Shuffling paper IDs")
198
+ sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
199
+
200
+ # Exclude papers that already have AC decisions
201
+ existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
202
+ conference=args.conference,
203
+ model_name=args.model_name,
204
+ ac_scoring_method=args.ac_scoring_method,
205
+ experiment_name=args.experiment_name,
206
+ num_papers_per_area_chair=args.num_papers_per_area_chair)
207
+
208
+ sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
209
+
210
+
211
+
212
+
213
+ # In[ ]:
214
+
215
+
216
+
217
+
notebooks/demo.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -16,4 +16,4 @@ transformers
16
  tenacity
17
  openai
18
  gradio
19
-
 
16
  tenacity
17
  openai
18
  gradio
19
+ jupyter
run_paper_decision_cli.py CHANGED
@@ -6,17 +6,15 @@ import numpy as np
6
 
7
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
 
9
- import const
 
10
  from agentreview.experiment_config import all_settings
11
  from agentreview.paper_review_settings import get_experiment_settings
12
- from agentreview.config import AgentConfig
13
  from agentreview.environments import PaperDecision
14
  from agentreview.paper_review_arena import PaperReviewArena
15
- from agentreview.paper_review_player import AreaChair
16
- from arguments import parse_args
17
- from agentreview.role_descriptions import get_ac_config
18
- from utility.utils import project_setup, get_paper_decision_mapping, \
19
- load_metareview, load_gpt4_generated_ac_decisions
20
 
21
  # Set up logging configuration
22
  logging.basicConfig(
@@ -27,6 +25,8 @@ logging.basicConfig(
27
  ]
28
  )
29
 
 
 
30
 
31
  def main(args):
32
  """
@@ -46,18 +46,16 @@ def main(args):
46
  NUM_PAPERS = len(const.year2paper_ids[args.conference])
47
  order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
48
 
49
- metareviews = []
50
-
51
  # Paper IDs we actually used in experiments
52
  experimental_paper_ids = []
53
 
54
  # For papers that have not been decided yet, load their metareviews
55
-
56
  print("Shuffling paper IDs")
57
  sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
58
 
59
  # Exclude papers that already have AC decisions
60
- existing_ac_decisions = load_gpt4_generated_ac_decisions(output_dir=args.output_dir,
61
  conference=args.conference,
62
  model_name=args.model_name,
63
  ac_scoring_method=args.ac_scoring_method,
@@ -68,65 +66,62 @@ def main(args):
68
 
69
  sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
70
 
71
- print("TODO: set paper_ids to existing values")
 
72
 
73
- sampled_paper_ids = [396, 729, 816]
74
 
75
  for paper_id in sampled_paper_ids:
76
 
77
- experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
78
-
79
  # Load meta-reviews
80
  metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
81
  experiment_name=args.experiment_name,
82
  model_name=args.model_name, conference=args.conference)
83
 
84
  if metareview is None:
 
 
 
85
 
86
- if args.ignore_missing_metareviews:
87
-
88
- print(f"Metareview for {paper_id} does not exist. This may happen because the conversation is "
89
- f"completely filtered out due to content policy. "
90
- f"Loading the BASELINE metareview...")
91
-
92
- metareview = load_metareview(paper_id=paper_id, experiment_name="BASELINE",
93
- model_name=args.model_name, conference=args.conference)
94
 
95
- else:
96
- raise ValueError(f"Metareview for {paper_id} does not exist")
97
 
98
- metareviews += [metareview]
99
- experimental_paper_ids += [paper_id]
100
 
101
  num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
102
 
103
  for batch_index in range(num_batches):
104
- experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
105
-
106
- players = []
107
-
108
- for role, players_li in experiment_setting["players"].items():
109
-
110
- for i, player_config in enumerate(players_li):
111
-
112
- # This phase should only contain the Area Chair
113
- if role == "AC":
114
-
115
- player_config = get_ac_config(env_type="paper_decision",
116
- scoring_method=args.ac_scoring_method,
117
- num_papers_per_area_chair=args.num_papers_per_area_chair,
118
- global_settings=experiment_setting['global_settings'],
119
- acceptance_rate=args.acceptance_rate
120
- **player_config)
121
-
122
- player_config = AgentConfig(**player_config)
123
- player_config['model'] = args.model_name
124
- player = AreaChair(**player_config)
125
-
126
- else:
127
- raise NotImplementedError(f"Unknown role: {role}")
128
 
129
- players.append(player)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  player_names = [player.name for player in players]
132
 
@@ -141,7 +136,7 @@ def main(args):
141
  metareviews=metareviews,
142
  experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
143
 
144
- arena = PaperReviewArena(players=players, environment=env, args=args)
145
  arena.launch_cli(interactive=False)
146
 
147
 
 
6
 
7
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
 
9
+ from agentreview import const
10
+ from agentreview.utility.experiment_utils import initialize_players
11
  from agentreview.experiment_config import all_settings
12
  from agentreview.paper_review_settings import get_experiment_settings
 
13
  from agentreview.environments import PaperDecision
14
  from agentreview.paper_review_arena import PaperReviewArena
15
+ from agentreview.arguments import parse_args
16
+ from agentreview.utility.utils import project_setup, get_paper_decision_mapping, \
17
+ load_metareview, load_llm_ac_decisions
 
 
18
 
19
  # Set up logging configuration
20
  logging.basicConfig(
 
25
  ]
26
  )
27
 
28
+ logger = logging.getLogger(__name__)
29
+
30
 
31
  def main(args):
32
  """
 
46
  NUM_PAPERS = len(const.year2paper_ids[args.conference])
47
  order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
48
 
 
 
49
  # Paper IDs we actually used in experiments
50
  experimental_paper_ids = []
51
 
52
  # For papers that have not been decided yet, load their metareviews
53
+ metareviews = []
54
  print("Shuffling paper IDs")
55
  sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
56
 
57
  # Exclude papers that already have AC decisions
58
+ existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
59
  conference=args.conference,
60
  model_name=args.model_name,
61
  ac_scoring_method=args.ac_scoring_method,
 
66
 
67
  sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
68
 
69
+ experiment_setting = get_experiment_settings(paper_id=None, paper_decision=None, setting=all_settings[
70
+ args.experiment_name])
71
 
72
+ logger.info(f"Loading metareview!")
73
 
74
  for paper_id in sampled_paper_ids:
75
 
 
 
76
  # Load meta-reviews
77
  metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
78
  experiment_name=args.experiment_name,
79
  model_name=args.model_name, conference=args.conference)
80
 
81
  if metareview is None:
82
+ print(f"Metareview for {paper_id} does not exist. This may happen because the conversation is "
83
+ f"completely filtered out due to content policy. "
84
+ f"Loading the BASELINE metareview...")
85
 
86
+ metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
87
+ experiment_name="BASELINE",
88
+ model_name=args.model_name, conference=args.conference)
 
 
 
 
 
89
 
90
+ if metareview is not None:
 
91
 
92
+ metareviews += [metareview]
93
+ experimental_paper_ids += [paper_id]
94
 
95
  num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
96
 
97
  for batch_index in range(num_batches):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ players = initialize_players(experiment_setting=experiment_setting, args=args)
100
+
101
+ # players = []
102
+ #
103
+ # for role, players_li in experiment_setting["players"].items():
104
+ #
105
+ # for i, player_config in enumerate(players_li):
106
+ #
107
+ # # This phase should only contain the Area Chair
108
+ # if role == "AC":
109
+ #
110
+ # player_config = get_ac_config(env_type="paper_decision",
111
+ # scoring_method=args.ac_scoring_method,
112
+ # num_papers_per_area_chair=args.num_papers_per_area_chair,
113
+ # global_settings=experiment_setting['global_settings'],
114
+ # acceptance_rate=args.acceptance_rate
115
+ # ** player_config)
116
+ #
117
+ # # player_config = AgentConfig(**player_config)
118
+ # player_config['model'] = args.model_name
119
+ # player = AreaChair(**player_config)
120
+ #
121
+ # else:
122
+ # raise NotImplementedError(f"Unknown role: {role}")
123
+ #
124
+ # players.append(player)
125
 
126
  player_names = [player.name for player in players]
127
 
 
136
  metareviews=metareviews,
137
  experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
138
 
139
+ arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
140
  arena.launch_cli(interactive=False)
141
 
142
 
run_paper_review_cli.py CHANGED
@@ -4,18 +4,17 @@ import os
4
  import sys
5
  from argparse import Namespace
6
 
 
7
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
 
9
- from arguments import parse_args
 
10
  from agentreview.experiment_config import all_settings
11
- from agentreview.agent import Player
12
  from agentreview.environments import PaperReview
13
  from agentreview.paper_review_settings import get_experiment_settings
14
  from agentreview.paper_review_arena import PaperReviewArena
15
- from agentreview.paper_review_player import PaperExtractorPlayer, AreaChair, Reviewer
16
- from agentreview.role_descriptions import get_ac_config, get_reviewer_player_config, get_author_config, \
17
- get_paper_extractor_config
18
- from utility.utils import project_setup, get_paper_decision_mapping
19
 
20
  # Set up logging configuration
21
  logging.basicConfig(
@@ -53,81 +52,84 @@ def main(args: Namespace):
53
  sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
54
 
55
  for paper_id in sampled_paper_ids:
56
-
57
- experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
58
-
59
  # Ground-truth decision in the conference.
60
  # We use this to partition the papers into different quality.
61
  paper_decision = paper_id2decision[paper_id]
62
 
63
- logger.info(f"Experiment Started")
64
- logger.info(f"Paper ID: {paper_id} ({paper_decision})")
65
-
66
- player_names, players = [], []
67
-
68
- for role, players_list in experiment_setting["players"].items():
69
-
70
- for i, player_config in enumerate(players_list):
71
- if role == "Paper Extractor":
72
-
73
- player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'], )
74
-
75
- player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
76
- paper_decision=paper_decision,
77
- args=args,
78
- conference=args.conference, **player_config)
79
-
80
- player_names.append(player.name)
81
-
82
-
83
- elif role == "AC":
84
-
85
- player_config = get_ac_config(env_type="paper_review",
86
- scoring_method=args.ac_scoring_method,
87
- num_papers_per_area_chair=args.num_papers_per_area_chair,
88
- global_settings=experiment_setting['global_settings'],
89
- acceptance_rate=args.acceptance_rate,
90
- **player_config)
91
-
92
- player_config['model'] = args.model_name
93
-
94
- player = AreaChair(data_dir=args.data_dir,
95
- conference=args.conference,
96
- args=args,
97
- **player_config)
98
-
99
- player_names.append(player.name)
100
-
101
-
102
- elif role == "Author":
103
-
104
- # Author requires no behavior customization.
105
- # So we directly use the Player class
106
- player_config = get_author_config()
107
- player = Player(data_dir=args.data_dir,
108
- conference=args.conference,
109
- args=args,
110
- **player_config)
111
-
112
- player_names.append(player.name)
113
-
114
- elif role == "Reviewer":
115
- player_config = get_reviewer_player_config(reviewer_index=i + 1,
116
- global_settings=experiment_setting['global_settings'],
117
- **player_config)
118
- player_config['model'] = args.model_name
119
- player = Reviewer(data_dir=args.data_dir, conference=args.conference, **player_config)
120
- player_names.append(player.name)
121
-
122
- else:
123
- raise NotImplementedError(f"Unknown role: {role}")
124
-
125
- players.append(player)
 
 
 
 
 
 
126
 
127
  env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
128
  args=args, experiment_setting=experiment_setting)
129
 
130
- arena = PaperReviewArena(players=players, environment=env, args=args)
131
  arena.launch_cli(interactive=False)
132
 
133
  logger.info("Done!")
 
4
  import sys
5
  from argparse import Namespace
6
 
7
+
8
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
9
 
10
+ from agentreview import const
11
+ from agentreview.arguments import parse_args
12
  from agentreview.experiment_config import all_settings
 
13
  from agentreview.environments import PaperReview
14
  from agentreview.paper_review_settings import get_experiment_settings
15
  from agentreview.paper_review_arena import PaperReviewArena
16
+ from agentreview.utility.experiment_utils import initialize_players
17
+ from agentreview.utility.utils import project_setup, get_paper_decision_mapping
 
 
18
 
19
  # Set up logging configuration
20
  logging.basicConfig(
 
52
  sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
53
 
54
  for paper_id in sampled_paper_ids:
 
 
 
55
  # Ground-truth decision in the conference.
56
  # We use this to partition the papers into different quality.
57
  paper_decision = paper_id2decision[paper_id]
58
 
59
+ experiment_setting = get_experiment_settings(paper_id=paper_id,
60
+ paper_decision=paper_decision,
61
+ setting=all_settings[args.experiment_name])
62
+
63
+ logger.info(f"Experiment Started!")
64
+ logger.info(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
65
+
66
+ players = initialize_players(experiment_setting=experiment_setting, args=args)
67
+
68
+ player_names = [player.name for player in players]
69
+
70
+ # for role, players_list in experiment_setting["players"].items():
71
+ #
72
+ # for i, player_config in enumerate(players_list):
73
+ # if role == "Paper Extractor":
74
+ #
75
+ # player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'], )
76
+ #
77
+ # player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
78
+ # paper_decision=paper_decision,
79
+ # args=args,
80
+ # conference=args.conference, **player_config)
81
+ #
82
+ # player_names.append(player.name)
83
+ #
84
+ #
85
+ # elif role == "AC":
86
+ #
87
+ # player_config = get_ac_config(env_type="paper_review",
88
+ # scoring_method=args.ac_scoring_method,
89
+ # num_papers_per_area_chair=args.num_papers_per_area_chair,
90
+ # global_settings=experiment_setting['global_settings'],
91
+ # acceptance_rate=args.acceptance_rate,
92
+ # **player_config)
93
+ #
94
+ # player_config['model'] = args.model_name
95
+ #
96
+ # player = AreaChair(data_dir=args.data_dir,
97
+ # conference=args.conference,
98
+ # args=args,
99
+ # **player_config)
100
+ #
101
+ # player_names.append(player.name)
102
+ #
103
+ #
104
+ # elif role == "Author":
105
+ #
106
+ # # Author requires no behavior customization.
107
+ # # So we directly use the Player class
108
+ # player_config = get_author_config()
109
+ # player = Player(data_dir=args.data_dir,
110
+ # conference=args.conference,
111
+ # args=args,
112
+ # **player_config)
113
+ #
114
+ # player_names.append(player.name)
115
+ #
116
+ # elif role == "Reviewer":
117
+ # player_config = get_reviewer_player_config(reviewer_index=i + 1,
118
+ # global_settings=experiment_setting['global_settings'],
119
+ # **player_config)
120
+ # player_config['model'] = args.model_name
121
+ # player = Reviewer(data_dir=args.data_dir, conference=args.conference, **player_config)
122
+ # player_names.append(player.name)
123
+ #
124
+ # else:
125
+ # raise NotImplementedError(f"Unknown role: {role}")
126
+ #
127
+ # players.append(player)
128
 
129
  env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
130
  args=args, experiment_setting=experiment_setting)
131
 
132
+ arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
133
  arena.launch_cli(interactive=False)
134
 
135
  logger.info("Done!")