Spaces:
Running
Running
Yiqiao Jin
commited on
Commit
Β·
53709ed
1
Parent(s):
bdafe83
Update demo
Browse files- README.md +13 -0
- agentreview/agent.py +2 -0
- arguments.py β agentreview/arguments.py +4 -4
- agentreview/backends/openai.py +11 -13
- const.py β agentreview/const.py +2 -0
- agentreview/dataset/download_openreview_paper.py +4 -5
- agentreview/dataset/process_submissions.py +2 -3
- agentreview/environments/paper_review.py +1 -2
- agentreview/paper_processor.py +1 -1
- agentreview/paper_review_arena.py +1 -4
- agentreview/paper_review_player.py +3 -1
- agentreview/paper_review_settings.py +5 -2
- agentreview/role_descriptions.py +1 -1
- agentreview/ui/cli.py +11 -11
- agentreview/utility/__init__.py +0 -0
- {utility β agentreview/utility}/authentication_utils.py +10 -0
- {utility β agentreview/utility}/data_utils.py +0 -0
- agentreview/utility/experiment_utils.py +84 -0
- {utility β agentreview/utility}/general_utils.py +0 -0
- {utility β agentreview/utility}/metrics_utils.py +0 -0
- {utility β agentreview/utility}/text_utils.py +0 -0
- {utility β agentreview/utility}/utils.py +106 -50
- data +1 -0
- demo.py +217 -0
- notebooks/demo.ipynb +0 -0
- requirements.txt +1 -1
- run_paper_decision_cli.py +48 -53
- run_paper_review_cli.py +75 -73
README.md
CHANGED
@@ -1,3 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# AgentReview
|
2 |
|
3 |
Official implementation for the π[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
|
|
|
1 |
+
---
|
2 |
+
title: AgentReview
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.4.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
short_description: EMNLP 2024
|
12 |
+
---
|
13 |
+
|
14 |
# AgentReview
|
15 |
|
16 |
Official implementation for the π[EMNLP 2024](https://2024.emnlp.org/) (main) paper: [AgentReview: Exploring Peer Review Dynamics with LLM Agents](https://arxiv.org/abs/2406.12708)
|
agentreview/agent.py
CHANGED
@@ -69,8 +69,10 @@ class Player(Agent):
|
|
69 |
self.data_dir = kwargs.pop("data_dir", None)
|
70 |
self.args = args
|
71 |
|
|
|
72 |
if isinstance(backend, BackendConfig):
|
73 |
backend_config = backend
|
|
|
74 |
backend = load_backend(backend_config)
|
75 |
elif isinstance(backend, IntelligenceBackend):
|
76 |
backend_config = backend.to_config()
|
|
|
69 |
self.data_dir = kwargs.pop("data_dir", None)
|
70 |
self.args = args
|
71 |
|
72 |
+
|
73 |
if isinstance(backend, BackendConfig):
|
74 |
backend_config = backend
|
75 |
+
backend_config['openai_client_type'] = args.openai_client_type
|
76 |
backend = load_backend(backend_config)
|
77 |
elif isinstance(backend, IntelligenceBackend):
|
78 |
backend_config = backend.to_config()
|
arguments.py β agentreview/arguments.py
RENAMED
@@ -26,7 +26,8 @@ def parse_args():
|
|
26 |
|
27 |
|
28 |
parser.add_argument(
|
29 |
-
"--api_version", type=str, default="2023-
|
|
|
30 |
)
|
31 |
|
32 |
# Experiment configuration
|
@@ -54,11 +55,10 @@ def parse_args():
|
|
54 |
)
|
55 |
|
56 |
parser.add_argument(
|
57 |
-
"--
|
58 |
)
|
59 |
-
|
60 |
parser.add_argument(
|
61 |
-
"--
|
62 |
)
|
63 |
|
64 |
parser.add_argument(
|
|
|
26 |
|
27 |
|
28 |
parser.add_argument(
|
29 |
+
"--api_version", type=str, default="2023-05-15", help="API version to be used for making requests. Required "
|
30 |
+
"for Azure OpenAI clients."
|
31 |
)
|
32 |
|
33 |
# Experiment configuration
|
|
|
55 |
)
|
56 |
|
57 |
parser.add_argument(
|
58 |
+
"--overwrite", action="store_true", help="If set, existing results or output files will be overwritten without prompting."
|
59 |
)
|
|
|
60 |
parser.add_argument(
|
61 |
+
"--skip_logging", action="store_true", help="If set, we do not log the messages in the console."
|
62 |
)
|
63 |
|
64 |
parser.add_argument(
|
agentreview/backends/openai.py
CHANGED
@@ -3,17 +3,11 @@ from typing import List
|
|
3 |
|
4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
5 |
|
6 |
-
from arguments import parse_args
|
7 |
-
from utility.authentication_utils import get_openai_client
|
8 |
from .base import IntelligenceBackend
|
9 |
from ..message import SYSTEM_NAME, Message
|
10 |
|
11 |
-
args = parse_args()
|
12 |
-
|
13 |
-
client = get_openai_client(client_type=args.openai_client_type)
|
14 |
-
|
15 |
-
OPENAI_CLIENT_TYPE = args.openai_client_type
|
16 |
-
|
17 |
# Default config follows the OpenAI playground
|
18 |
DEFAULT_TEMPERATURE = 1.0
|
19 |
DEFAULT_MAX_TOKENS = 4096
|
@@ -57,19 +51,22 @@ class OpenAIChat(IntelligenceBackend):
|
|
57 |
merge_other_agents_as_one_user=merge_other_agents_as_one_user,
|
58 |
**kwargs,
|
59 |
)
|
60 |
-
|
|
|
61 |
self.temperature = temperature
|
62 |
self.max_tokens = max_tokens
|
63 |
self.model = model
|
64 |
self.merge_other_agent_as_user = merge_other_agents_as_one_user
|
65 |
|
|
|
|
|
66 |
@retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
|
67 |
def _get_response(self, messages):
|
68 |
# Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
|
69 |
# make API calls
|
70 |
|
71 |
-
if
|
72 |
-
completion = client.chat.completions.create(
|
73 |
model=self.model,
|
74 |
messages=messages,
|
75 |
temperature=self.temperature,
|
@@ -77,8 +74,8 @@ class OpenAIChat(IntelligenceBackend):
|
|
77 |
stop=STOP,
|
78 |
)
|
79 |
|
80 |
-
elif
|
81 |
-
completion = client.chat.completions.create(
|
82 |
model=self.model,
|
83 |
messages=messages,
|
84 |
temperature=self.temperature,
|
@@ -90,6 +87,7 @@ class OpenAIChat(IntelligenceBackend):
|
|
90 |
raise NotImplementedError
|
91 |
|
92 |
response = completion.choices[0].message.content
|
|
|
93 |
response = response.strip()
|
94 |
return response
|
95 |
|
|
|
3 |
|
4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
5 |
|
6 |
+
from agentreview.arguments import parse_args
|
7 |
+
from agentreview.utility.authentication_utils import get_openai_client
|
8 |
from .base import IntelligenceBackend
|
9 |
from ..message import SYSTEM_NAME, Message
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Default config follows the OpenAI playground
|
12 |
DEFAULT_TEMPERATURE = 1.0
|
13 |
DEFAULT_MAX_TOKENS = 4096
|
|
|
51 |
merge_other_agents_as_one_user=merge_other_agents_as_one_user,
|
52 |
**kwargs,
|
53 |
)
|
54 |
+
self.client_type = kwargs.get("openai_client_type", None)
|
55 |
+
self.client = get_openai_client(self.client_type)
|
56 |
self.temperature = temperature
|
57 |
self.max_tokens = max_tokens
|
58 |
self.model = model
|
59 |
self.merge_other_agent_as_user = merge_other_agents_as_one_user
|
60 |
|
61 |
+
|
62 |
+
|
63 |
@retry(stop=stop_after_attempt(6), wait=wait_random_exponential(min=1, max=60))
|
64 |
def _get_response(self, messages):
|
65 |
# Refer to https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/switching-endpoints for how to
|
66 |
# make API calls
|
67 |
|
68 |
+
if self.client_type == "openai":
|
69 |
+
completion = self.client.chat.completions.create(
|
70 |
model=self.model,
|
71 |
messages=messages,
|
72 |
temperature=self.temperature,
|
|
|
74 |
stop=STOP,
|
75 |
)
|
76 |
|
77 |
+
elif self.client_type == "azure_openai":
|
78 |
+
completion = self.client.chat.completions.create(
|
79 |
model=self.model,
|
80 |
messages=messages,
|
81 |
temperature=self.temperature,
|
|
|
87 |
raise NotImplementedError
|
88 |
|
89 |
response = completion.choices[0].message.content
|
90 |
+
|
91 |
response = response.strip()
|
92 |
return response
|
93 |
|
const.py β agentreview/const.py
RENAMED
@@ -10,6 +10,8 @@ PAPER_DECISIONS_ICLR2019 = ["Accept-oral", "Accept-poster", "Reject"]
|
|
10 |
|
11 |
AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
|
12 |
|
|
|
|
|
13 |
# These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
|
14 |
FILTERED_PAPER_IDS = {
|
15 |
"ICLR2020": [],
|
|
|
10 |
|
11 |
AREA_CHAIR_TYPES = ['inclusive', 'conformist', 'authoritarian', 'BASELINE']
|
12 |
|
13 |
+
GLOBAL_PROMPT = "This is a realistic simulation of academic peer review."
|
14 |
+
|
15 |
# These are papers that contain potentially sensitive content. GPT-4 refused to generate reviews for these papers.
|
16 |
FILTERED_PAPER_IDS = {
|
17 |
"ICLR2020": [],
|
agentreview/dataset/download_openreview_paper.py
CHANGED
@@ -15,14 +15,14 @@ import os
|
|
15 |
import time
|
16 |
import requests
|
17 |
|
18 |
-
from arguments import parse_args
|
19 |
|
20 |
try:
|
21 |
import openreview
|
22 |
except ImportError:
|
23 |
raise ImportError("Please install openreview package using `pip install openreview-py`")
|
24 |
|
25 |
-
def download_papers():
|
26 |
"""Downloads all papers from ICLR 2023 using OpenReview API.
|
27 |
|
28 |
This function authenticates with the OpenReview API using environment
|
@@ -36,8 +36,6 @@ def download_papers():
|
|
36 |
AssertionError: If the conference argument is not for ICLR.
|
37 |
"""
|
38 |
|
39 |
-
args = parse_args()
|
40 |
-
|
41 |
openreview_username = os.environ.get("OPENREVIEW_USERNAME")
|
42 |
openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
|
43 |
|
@@ -133,4 +131,5 @@ def download_papers():
|
|
133 |
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
-
|
|
|
|
15 |
import time
|
16 |
import requests
|
17 |
|
18 |
+
from agentreview.arguments import parse_args
|
19 |
|
20 |
try:
|
21 |
import openreview
|
22 |
except ImportError:
|
23 |
raise ImportError("Please install openreview package using `pip install openreview-py`")
|
24 |
|
25 |
+
def download_papers(args):
|
26 |
"""Downloads all papers from ICLR 2023 using OpenReview API.
|
27 |
|
28 |
This function authenticates with the OpenReview API using environment
|
|
|
36 |
AssertionError: If the conference argument is not for ICLR.
|
37 |
"""
|
38 |
|
|
|
|
|
39 |
openreview_username = os.environ.get("OPENREVIEW_USERNAME")
|
40 |
openreview_password = os.environ.get("OPENREVIEW_PASSWORD")
|
41 |
|
|
|
131 |
|
132 |
|
133 |
if __name__ == "__main__":
|
134 |
+
args = parse_args()
|
135 |
+
download_papers(args)
|
agentreview/dataset/process_submissions.py
CHANGED
@@ -22,9 +22,8 @@ from tqdm import tqdm
|
|
22 |
|
23 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
24 |
|
25 |
-
import
|
26 |
-
from
|
27 |
-
from utility.utils import print_colored
|
28 |
|
29 |
decision_map = {
|
30 |
# ICLR 2023
|
|
|
22 |
|
23 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
24 |
|
25 |
+
from agentreview.arguments import parse_args
|
26 |
+
from agentreview.utility.utils import print_colored
|
|
|
27 |
|
28 |
decision_map = {
|
29 |
# ICLR 2023
|
agentreview/environments/paper_review.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
import json
|
2 |
-
import json
|
3 |
import logging
|
4 |
import os.path as osp
|
5 |
from typing import List
|
6 |
|
7 |
from agentreview.environments import Conversation
|
8 |
-
from utility.utils import get_rebuttal_dir
|
9 |
from .base import TimeStep
|
10 |
from ..message import Message
|
11 |
from ..paper_review_message import PaperReviewMessagePool
|
|
|
1 |
import json
|
|
|
2 |
import logging
|
3 |
import os.path as osp
|
4 |
from typing import List
|
5 |
|
6 |
from agentreview.environments import Conversation
|
7 |
+
from agentreview.utility.utils import get_rebuttal_dir
|
8 |
from .base import TimeStep
|
9 |
from ..message import Message
|
10 |
from ..paper_review_message import PaperReviewMessagePool
|
agentreview/paper_processor.py
CHANGED
@@ -148,7 +148,7 @@ def convert_text_into_dict(text: str) -> dict:
|
|
148 |
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
-
from utility.authentication_utils import read_and_set_openai_key
|
152 |
from agentreview.review import get_lm_review
|
153 |
|
154 |
read_and_set_openai_key()
|
|
|
148 |
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
+
from agentreview.utility.authentication_utils import read_and_set_openai_key
|
152 |
from agentreview.review import get_lm_review
|
153 |
|
154 |
read_and_set_openai_key()
|
agentreview/paper_review_arena.py
CHANGED
@@ -1,14 +1,11 @@
|
|
1 |
import csv
|
2 |
-
import glob
|
3 |
import json
|
4 |
import logging
|
5 |
-
import os
|
6 |
from typing import Union
|
7 |
|
8 |
from agentreview.arena import Arena, TooManyInvalidActions
|
9 |
from agentreview.role_descriptions import get_reviewer_description
|
10 |
-
from utility.utils import
|
11 |
-
get_paper_review_and_rebuttal_dir, format_metareviews
|
12 |
from .agent import Player
|
13 |
from .config import ArenaConfig
|
14 |
from .environments import TimeStep, load_environment
|
|
|
1 |
import csv
|
|
|
2 |
import json
|
3 |
import logging
|
|
|
4 |
from typing import Union
|
5 |
|
6 |
from agentreview.arena import Arena, TooManyInvalidActions
|
7 |
from agentreview.role_descriptions import get_reviewer_description
|
8 |
+
from agentreview.utility.utils import format_metareviews
|
|
|
9 |
from .agent import Player
|
10 |
from .config import ArenaConfig
|
11 |
from .environments import TimeStep, load_environment
|
agentreview/paper_review_player.py
CHANGED
@@ -56,6 +56,8 @@ class Reviewer(Player):
|
|
56 |
global_prompt: str = None,
|
57 |
**kwargs,
|
58 |
):
|
|
|
|
|
59 |
super().__init__(name, role_desc, backend, global_prompt, **kwargs)
|
60 |
|
61 |
def act(self, observation: List[Message]) -> str:
|
@@ -94,7 +96,7 @@ class PaperExtractorPlayer(Player):
|
|
94 |
Returns:
|
95 |
str: The action (response) of the player.
|
96 |
"""
|
97 |
-
|
98 |
logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
|
99 |
|
100 |
loader = PDFReader()
|
|
|
56 |
global_prompt: str = None,
|
57 |
**kwargs,
|
58 |
):
|
59 |
+
print("kwargs")
|
60 |
+
print(kwargs)
|
61 |
super().__init__(name, role_desc, backend, global_prompt, **kwargs)
|
62 |
|
63 |
def act(self, observation: List[Message]) -> str:
|
|
|
96 |
Returns:
|
97 |
str: The action (response) of the player.
|
98 |
"""
|
99 |
+
|
100 |
logging.info(f"Loading {self.conference} paper {self.paper_id} ({self.paper_decision}) ...")
|
101 |
|
102 |
loader = PDFReader()
|
agentreview/paper_review_settings.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
default_reviewer_setting = {
|
2 |
"is_benign": None,
|
3 |
"is_knowledgeable": None,
|
@@ -6,7 +8,7 @@ default_reviewer_setting = {
|
|
6 |
}
|
7 |
|
8 |
|
9 |
-
def get_experiment_settings(setting: dict):
|
10 |
"""
|
11 |
Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
|
12 |
|
@@ -19,7 +21,8 @@ def get_experiment_settings(setting: dict):
|
|
19 |
"""
|
20 |
|
21 |
experiment_setting = {
|
22 |
-
"
|
|
|
23 |
"players": {
|
24 |
|
25 |
# Paper Extractor is a special player that extracts a paper from the dataset.
|
|
|
1 |
+
from typing import Union
|
2 |
+
|
3 |
default_reviewer_setting = {
|
4 |
"is_benign": None,
|
5 |
"is_knowledgeable": None,
|
|
|
8 |
}
|
9 |
|
10 |
|
11 |
+
def get_experiment_settings(paper_id: Union[int, None] = None, paper_decision: Union[str, None] = None, setting: dict = None):
|
12 |
"""
|
13 |
Generate experiment settings based on provided configurations for area chairs (AC) and reviewers.
|
14 |
|
|
|
21 |
"""
|
22 |
|
23 |
experiment_setting = {
|
24 |
+
"paper_id": paper_id,
|
25 |
+
"paper_decision": paper_decision,
|
26 |
"players": {
|
27 |
|
28 |
# Paper Extractor is a special player that extracts a paper from the dataset.
|
agentreview/role_descriptions.py
CHANGED
@@ -5,7 +5,7 @@ import numpy as np
|
|
5 |
|
6 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
7 |
|
8 |
-
import const
|
9 |
from agentreview.config import AgentConfig
|
10 |
|
11 |
PLAYER_BACKEND = {
|
|
|
5 |
|
6 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
7 |
|
8 |
+
from agentreview import const
|
9 |
from agentreview.config import AgentConfig
|
10 |
|
11 |
PLAYER_BACKEND = {
|
agentreview/ui/cli.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import logging
|
2 |
-
import logging
|
3 |
import os
|
4 |
import os.path as osp
|
5 |
from typing import Union
|
@@ -11,8 +10,8 @@ from prompt_toolkit.completion import WordCompleter
|
|
11 |
from prompt_toolkit.styles import Style
|
12 |
from rich.console import Console
|
13 |
|
14 |
-
from utility.utils import get_rebuttal_dir,
|
15 |
-
|
16 |
from ..arena import Arena, TooManyInvalidActions
|
17 |
from ..backends.human import HumanBackendError
|
18 |
from ..environments import PaperReview, PaperDecision
|
@@ -222,7 +221,8 @@ class ArenaCLI:
|
|
222 |
# Print the new messages
|
223 |
for msg in messages:
|
224 |
message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
|
225 |
-
|
|
|
226 |
msg.logged = True
|
227 |
|
228 |
step += 1
|
@@ -251,7 +251,7 @@ class ArenaCLI:
|
|
251 |
self.arena.save_history(path_review_history)
|
252 |
|
253 |
elif env.type_name == "paper_decision":
|
254 |
-
ac_decisions =
|
255 |
conference=args.conference,
|
256 |
model_name=args.model_name,
|
257 |
ac_scoring_method=args.ac_scoring_method,
|
@@ -261,9 +261,9 @@ class ArenaCLI:
|
|
261 |
|
262 |
ac_decisions += [env.ac_decisions]
|
263 |
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
1 |
import logging
|
|
|
2 |
import os
|
3 |
import os.path as osp
|
4 |
from typing import Union
|
|
|
10 |
from prompt_toolkit.styles import Style
|
11 |
from rich.console import Console
|
12 |
|
13 |
+
from agentreview.utility.utils import get_rebuttal_dir, load_llm_ac_decisions, \
|
14 |
+
save_llm_ac_decisions
|
15 |
from ..arena import Arena, TooManyInvalidActions
|
16 |
from ..backends.human import HumanBackendError
|
17 |
from ..environments import PaperReview, PaperDecision
|
|
|
221 |
# Print the new messages
|
222 |
for msg in messages:
|
223 |
message_str = f"[{msg.agent_name}->{msg.visible_to}]: {msg.content}"
|
224 |
+
if self.args.skip_logging:
|
225 |
+
console.print(color_dict[name_to_color[msg.agent_name]] + message_str + CRStyle.RESET_ALL)
|
226 |
msg.logged = True
|
227 |
|
228 |
step += 1
|
|
|
251 |
self.arena.save_history(path_review_history)
|
252 |
|
253 |
elif env.type_name == "paper_decision":
|
254 |
+
ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
255 |
conference=args.conference,
|
256 |
model_name=args.model_name,
|
257 |
ac_scoring_method=args.ac_scoring_method,
|
|
|
261 |
|
262 |
ac_decisions += [env.ac_decisions]
|
263 |
|
264 |
+
save_llm_ac_decisions(ac_decisions,
|
265 |
+
output_dir=args.output_dir,
|
266 |
+
conference=args.conference,
|
267 |
+
model_name=args.model_name,
|
268 |
+
ac_scoring_method=args.ac_scoring_method,
|
269 |
+
experiment_name=args.experiment_name)
|
agentreview/utility/__init__.py
ADDED
File without changes
|
{utility β agentreview/utility}/authentication_utils.py
RENAMED
@@ -16,6 +16,16 @@ def get_openai_client(client_type: str):
|
|
16 |
|
17 |
assert client_type in ["azure_openai", "openai"]
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
if client_type == "openai":
|
20 |
client = openai.OpenAI(
|
21 |
api_key=os.environ['OPENAI_API_KEY']
|
|
|
16 |
|
17 |
assert client_type in ["azure_openai", "openai"]
|
18 |
|
19 |
+
endpoint: str = os.environ['AZURE_ENDPOINT']
|
20 |
+
|
21 |
+
if not endpoint.startswith("https://"):
|
22 |
+
endpoint = f"https://{endpoint}.openai.azure.com"
|
23 |
+
|
24 |
+
os.environ['AZURE_ENDPOINT'] = endpoint
|
25 |
+
|
26 |
+
if not os.environ.get('OPENAI_API_VERSION'):
|
27 |
+
os.environ['OPENAI_API_VERSION'] = "2023-05-15"
|
28 |
+
|
29 |
if client_type == "openai":
|
30 |
client = openai.OpenAI(
|
31 |
api_key=os.environ['OPENAI_API_KEY']
|
{utility β agentreview/utility}/data_utils.py
RENAMED
File without changes
|
agentreview/utility/experiment_utils.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
|
4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
5 |
+
|
6 |
+
from agentreview.agent import Player
|
7 |
+
from agentreview.paper_review_player import PaperExtractorPlayer, AreaChair, Reviewer
|
8 |
+
from agentreview.role_descriptions import get_ac_config, get_reviewer_player_config, get_author_config, \
|
9 |
+
get_paper_extractor_config
|
10 |
+
|
11 |
+
|
12 |
+
def initialize_players(experiment_setting: dict, args):
|
13 |
+
paper_id = experiment_setting['paper_id']
|
14 |
+
paper_decision = experiment_setting['paper_decision']
|
15 |
+
|
16 |
+
if args.task == "paper_decision":
|
17 |
+
experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
|
18 |
+
|
19 |
+
players = []
|
20 |
+
|
21 |
+
for role, players_list in experiment_setting["players"].items():
|
22 |
+
|
23 |
+
for i, player_config in enumerate(players_list):
|
24 |
+
if role == "AC":
|
25 |
+
|
26 |
+
# For AC, `env_type` is either "paper_decision" or "paper_review"
|
27 |
+
player_config = get_ac_config(env_type=args.task,
|
28 |
+
scoring_method=args.ac_scoring_method,
|
29 |
+
num_papers_per_area_chair=args.num_papers_per_area_chair,
|
30 |
+
global_settings=experiment_setting['global_settings'],
|
31 |
+
acceptance_rate=args.acceptance_rate,
|
32 |
+
**player_config)
|
33 |
+
|
34 |
+
player_config['model'] = args.model_name
|
35 |
+
|
36 |
+
player = AreaChair(data_dir=args.data_dir,
|
37 |
+
conference=args.conference,
|
38 |
+
args=args,
|
39 |
+
**player_config)
|
40 |
+
|
41 |
+
|
42 |
+
elif args.task == "paper_review":
|
43 |
+
|
44 |
+
|
45 |
+
if role == "Paper Extractor":
|
46 |
+
|
47 |
+
player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'])
|
48 |
+
|
49 |
+
player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
|
50 |
+
paper_decision=paper_decision,
|
51 |
+
args=args,
|
52 |
+
conference=args.conference, **player_config)
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
elif role == "Author":
|
57 |
+
|
58 |
+
# Author requires no behavior customization.
|
59 |
+
# So we directly use the Player class
|
60 |
+
player_config = get_author_config()
|
61 |
+
player = Player(data_dir=args.data_dir,
|
62 |
+
conference=args.conference,
|
63 |
+
args=args,
|
64 |
+
**player_config)
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
elif role == "Reviewer":
|
69 |
+
player_config = get_reviewer_player_config(reviewer_index=i + 1,
|
70 |
+
global_settings=experiment_setting['global_settings'],
|
71 |
+
**player_config)
|
72 |
+
player_config['model'] = args.model_name
|
73 |
+
player = Reviewer(data_dir=args.data_dir, conference=args.conference, args=args, **player_config)
|
74 |
+
|
75 |
+
|
76 |
+
else:
|
77 |
+
raise NotImplementedError(f"Unknown role for paper review (stage 1-4): {role}")
|
78 |
+
|
79 |
+
else:
|
80 |
+
raise NotImplementedError(f"Unknown role for paper decision (stage 5): {role}")
|
81 |
+
|
82 |
+
players.append(player)
|
83 |
+
|
84 |
+
return players
|
{utility β agentreview/utility}/general_utils.py
RENAMED
File without changes
|
{utility β agentreview/utility}/metrics_utils.py
RENAMED
File without changes
|
{utility β agentreview/utility}/text_utils.py
RENAMED
File without changes
|
{utility β agentreview/utility}/utils.py
RENAMED
@@ -9,8 +9,8 @@ from typing import Union, List, Dict, Tuple
|
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
11 |
|
12 |
-
import const
|
13 |
-
from utility.general_utils import check_cwd, set_seed
|
14 |
|
15 |
|
16 |
def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
@@ -36,25 +36,25 @@ def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
|
36 |
return array
|
37 |
|
38 |
|
39 |
-
def
|
40 |
-
|
41 |
|
42 |
-
num_papers = sum([len(batch) for batch in
|
43 |
|
44 |
if num_papers == 0:
|
45 |
raise ValueError("No papers found in batch")
|
46 |
|
47 |
-
num_papers_to_accept = generate_num_papers_to_accept(n=
|
48 |
-
batch_number=len(
|
49 |
|
50 |
-
for idx_batch, batch in enumerate(
|
51 |
tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
|
52 |
|
53 |
paper_ids = [int(paper_id) for paper_id, rank in tups]
|
54 |
|
55 |
-
|
56 |
|
57 |
-
return
|
58 |
|
59 |
|
60 |
def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
|
@@ -151,6 +151,8 @@ def get_rebuttal_dir(output_dir: str,
|
|
151 |
|
152 |
|
153 |
def print_colored(text, color='red'):
|
|
|
|
|
154 |
foreground_colors = {
|
155 |
'black': 30,
|
156 |
'red': 31,
|
@@ -161,7 +163,16 @@ def print_colored(text, color='red'):
|
|
161 |
'cyan': 36,
|
162 |
'white': 37,
|
163 |
}
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
|
167 |
def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
|
@@ -351,71 +362,116 @@ def get_experiment_names(conference: str = "ICLR2023"):
|
|
351 |
return experiment_names
|
352 |
|
353 |
|
354 |
-
def
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
|
|
|
|
|
|
361 |
print("=" * 30)
|
362 |
print(f"Experiment Name: {experiment_name}")
|
363 |
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
|
|
|
|
|
|
369 |
|
370 |
-
paper_ids = sorted(
|
371 |
-
|
|
|
372 |
|
373 |
if ac_scoring_method == "ranking":
|
374 |
-
|
375 |
-
|
376 |
|
377 |
-
|
378 |
-
|
379 |
-
# True means accept, False means reject
|
380 |
-
decisions_gpt4 = np.array(
|
381 |
-
[True if paper_id in papers_accepted_by_gpt4 else False for paper_id in paper_ids])
|
382 |
|
383 |
elif ac_scoring_method == "recommendation":
|
384 |
-
|
385 |
-
|
386 |
-
[
|
387 |
-
|
388 |
-
|
389 |
else:
|
390 |
-
raise NotImplementedError
|
391 |
|
392 |
-
return
|
393 |
|
394 |
|
395 |
-
def
|
396 |
-
|
397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
if osp.exists(path):
|
400 |
-
|
|
|
401 |
print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
|
402 |
-
|
403 |
else:
|
404 |
ac_decision = []
|
405 |
print(f"No existing AC decisions found at {path}")
|
406 |
|
407 |
-
ac_decision = [batch for batch in ac_decision if
|
408 |
|
409 |
for i, batch in enumerate(ac_decision):
|
410 |
if i != len(ac_decision) - 1:
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
|
416 |
return ac_decision
|
417 |
|
418 |
-
|
419 |
def write_to_excel(data, file_path, sheet_name):
|
420 |
"""
|
421 |
Write data to an Excel file.
|
@@ -436,7 +492,7 @@ def write_to_excel(data, file_path, sheet_name):
|
|
436 |
data.to_excel(writer, sheet_name=sheet_name, index=False)
|
437 |
|
438 |
|
439 |
-
def
|
440 |
path = get_ac_decision_path(**kwargs)
|
441 |
|
442 |
json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
|
|
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
11 |
|
12 |
+
from agentreview import const
|
13 |
+
from agentreview.utility.general_utils import check_cwd, set_seed
|
14 |
|
15 |
|
16 |
def generate_num_papers_to_accept(n, batch_number, shuffle=True):
|
|
|
36 |
return array
|
37 |
|
38 |
|
39 |
+
def get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate: float) -> list:
|
40 |
+
papers_accepted_by_llm = []
|
41 |
|
42 |
+
num_papers = sum([len(batch) for batch in llm_ac_decisions])
|
43 |
|
44 |
if num_papers == 0:
|
45 |
raise ValueError("No papers found in batch")
|
46 |
|
47 |
+
num_papers_to_accept = generate_num_papers_to_accept(n=acceptance_rate * num_papers,
|
48 |
+
batch_number=len(llm_ac_decisions))
|
49 |
|
50 |
+
for idx_batch, batch in enumerate(llm_ac_decisions):
|
51 |
tups = sorted([(paper_id, rank) for paper_id, rank in batch.items()], key=lambda x: x[1], reverse=False)
|
52 |
|
53 |
paper_ids = [int(paper_id) for paper_id, rank in tups]
|
54 |
|
55 |
+
papers_accepted_by_llm += paper_ids[:num_papers_to_accept[idx_batch]]
|
56 |
|
57 |
+
return papers_accepted_by_llm
|
58 |
|
59 |
|
60 |
def get_paper_decision_mapping(data_dir: str, conference: str, verbose: bool = False):
|
|
|
151 |
|
152 |
|
153 |
def print_colored(text, color='red'):
|
154 |
+
|
155 |
+
# Dictionary of ANSI color codes for terminal
|
156 |
foreground_colors = {
|
157 |
'black': 30,
|
158 |
'red': 31,
|
|
|
163 |
'cyan': 36,
|
164 |
'white': 37,
|
165 |
}
|
166 |
+
try:
|
167 |
+
|
168 |
+
# get_ipython is specific to Jupyter and IPython.
|
169 |
+
# We use this to decide whether we are running a Jupyter notebook or not.
|
170 |
+
get_ipython
|
171 |
+
print(text) # Plain text in Jupyter
|
172 |
+
except:
|
173 |
+
# If not Jupyter, print with color codes
|
174 |
+
color_code = foreground_colors.get(color, 31) # Default to red if color not found
|
175 |
+
print(f"\033[{color_code}m{text}\033[0m")
|
176 |
|
177 |
|
178 |
def get_ac_decision_path(output_dir: str, conference: str, model_name: str, ac_scoring_method: str, experiment_name:
|
|
|
362 |
return experiment_names
|
363 |
|
364 |
|
365 |
+
def load_llm_ac_decisions_as_array(
|
366 |
+
output_dir: str,
|
367 |
+
experiment_name: str,
|
368 |
+
ac_scoring_method: str,
|
369 |
+
acceptance_rate: float,
|
370 |
+
conference: str,
|
371 |
+
model_name: str,
|
372 |
+
num_papers_per_area_chair: int
|
373 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
374 |
+
"""Loads and processes GPT-4 generated area chair (AC) decisions for an experiment.
|
375 |
+
|
376 |
+
Args:
|
377 |
+
experiment_name (str): Name of the experiment.
|
378 |
+
ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
|
379 |
+
acceptance_rate (float): Acceptance rate for the conference.
|
380 |
+
conference (str): Name of the conference.
|
381 |
+
model_name (str): Model name used to generate AC decisions.
|
382 |
+
num_papers_per_area_chair (int): Number of papers assigned to each area chair.
|
383 |
+
|
384 |
+
Returns:
|
385 |
+
Tuple[np.ndarray, np.ndarray]: An array of decisions (True for accept, False for reject)
|
386 |
+
and an array of paper IDs in the order processed.
|
387 |
|
388 |
+
Raises:
|
389 |
+
NotImplementedError: If `ac_scoring_method` is not 'ranking' or 'recommendation'.
|
390 |
+
"""
|
391 |
print("=" * 30)
|
392 |
print(f"Experiment Name: {experiment_name}")
|
393 |
|
394 |
+
llm_ac_decisions = load_llm_ac_decisions(
|
395 |
+
output_dir=output_dir,
|
396 |
+
conference=conference,
|
397 |
+
model_name=model_name,
|
398 |
+
ac_scoring_method=ac_scoring_method,
|
399 |
+
experiment_name=experiment_name,
|
400 |
+
num_papers_per_area_chair=num_papers_per_area_chair
|
401 |
+
)
|
402 |
|
403 |
+
paper_ids = sorted(
|
404 |
+
int(paper_id) for batch in llm_ac_decisions for paper_id in batch
|
405 |
+
)
|
406 |
|
407 |
if ac_scoring_method == "ranking":
|
408 |
+
if len(paper_ids) != len(set(paper_ids)):
|
409 |
+
raise ValueError(f"Duplicate paper_ids found in the AC decisions: {Counter(paper_ids)}")
|
410 |
|
411 |
+
papers_accepted_by_llm = get_papers_accepted_by_llm(llm_ac_decisions, acceptance_rate)
|
412 |
+
decisions_llm = np.array([paper_id in papers_accepted_by_llm for paper_id in paper_ids])
|
|
|
|
|
|
|
413 |
|
414 |
elif ac_scoring_method == "recommendation":
|
415 |
+
llm_ac_decisions_flat = {int(k): v for batch in llm_ac_decisions for k, v in batch.items()}
|
416 |
+
decisions_llm = np.array(
|
417 |
+
[llm_ac_decisions_flat[paper_id].startswith("Accept") for paper_id in paper_ids]
|
418 |
+
)
|
|
|
419 |
else:
|
420 |
+
raise NotImplementedError(f"Scoring method '{ac_scoring_method}' not implemented.")
|
421 |
|
422 |
+
return decisions_llm, np.array(paper_ids)
|
423 |
|
424 |
|
425 |
+
def load_llm_ac_decisions(
|
426 |
+
output_dir: str,
|
427 |
+
conference: str,
|
428 |
+
model_name: str,
|
429 |
+
ac_scoring_method: str,
|
430 |
+
experiment_name: str,
|
431 |
+
num_papers_per_area_chair: int
|
432 |
+
) -> List[Dict[str, str]]:
|
433 |
+
"""Loads GPT-4 generated area chair (AC) decisions from a specified path.
|
434 |
+
|
435 |
+
Args:
|
436 |
+
conference (str): Name of the conference.
|
437 |
+
model_name (str): Model name used to generate AC decisions.
|
438 |
+
ac_scoring_method (str): Method used for AC scoring ('ranking' or 'recommendation').
|
439 |
+
experiment_name (str): Name of the experiment.
|
440 |
+
num_papers_per_area_chair (int): Number of papers assigned to each area chair.
|
441 |
+
|
442 |
+
Returns:
|
443 |
+
List[Dict[str, str]]: List of batches, where each batch contains paper ID and decision.
|
444 |
+
|
445 |
+
Raises:
|
446 |
+
AssertionError: If a non-final batch has a paper count different from `num_papers_per_area_chair`.
|
447 |
+
"""
|
448 |
+
path = get_ac_decision_path(
|
449 |
+
output_dir=output_dir,
|
450 |
+
conference=conference,
|
451 |
+
model_name=model_name,
|
452 |
+
ac_scoring_method=ac_scoring_method,
|
453 |
+
experiment_name=experiment_name
|
454 |
+
)
|
455 |
|
456 |
if osp.exists(path):
|
457 |
+
with open(path, 'r', encoding='utf-8') as file:
|
458 |
+
ac_decision = json.load(file)
|
459 |
print(f"Loaded {len(ac_decision)} batches of existing AC decisions from {path}")
|
|
|
460 |
else:
|
461 |
ac_decision = []
|
462 |
print(f"No existing AC decisions found at {path}")
|
463 |
|
464 |
+
ac_decision = [batch for batch in ac_decision if batch] # Remove empty batches
|
465 |
|
466 |
for i, batch in enumerate(ac_decision):
|
467 |
if i != len(ac_decision) - 1:
|
468 |
+
if len(batch) != num_papers_per_area_chair:
|
469 |
+
raise AssertionError(
|
470 |
+
f"Batch {i} has {len(batch)} papers, expected {num_papers_per_area_chair} for non-final batches."
|
471 |
+
)
|
472 |
|
473 |
return ac_decision
|
474 |
|
|
|
475 |
def write_to_excel(data, file_path, sheet_name):
|
476 |
"""
|
477 |
Write data to an Excel file.
|
|
|
492 |
data.to_excel(writer, sheet_name=sheet_name, index=False)
|
493 |
|
494 |
|
495 |
+
def save_llm_ac_decisions(ac_decisions: List[dict], **kwargs):
|
496 |
path = get_ac_decision_path(**kwargs)
|
497 |
|
498 |
json.dump(ac_decisions, open(path, 'w', encoding='utf-8'), indent=2)
|
data
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../agent4reviews/data
|
demo.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
# # AgentReview
|
5 |
+
#
|
6 |
+
#
|
7 |
+
#
|
8 |
+
# In this tutorial, you will explore customizing the AgentReview experiment.
|
9 |
+
#
|
10 |
+
# π Venue: EMNLP 2024 (Oral)
|
11 |
+
#
|
12 |
+
# π arXiv: [https://arxiv.org/abs/2406.12708](https://arxiv.org/abs/2406.12708)
|
13 |
+
#
|
14 |
+
# π Website: [https://agentreview.github.io/](https://agentreview.github.io/)
|
15 |
+
#
|
16 |
+
# ```bibtex
|
17 |
+
# @inproceedings{jin2024agentreview,
|
18 |
+
# title={AgentReview: Exploring Peer Review Dynamics with LLM Agents},
|
19 |
+
# author={Jin, Yiqiao and Zhao, Qinlin and Wang, Yiyang and Chen, Hao and Zhu, Kaijie and Xiao, Yijia and Wang, Jindong},
|
20 |
+
# booktitle={EMNLP},
|
21 |
+
# year={2024}
|
22 |
+
# }
|
23 |
+
# ```
|
24 |
+
#
|
25 |
+
|
26 |
+
# In[2]:
|
27 |
+
|
28 |
+
|
29 |
+
import os
|
30 |
+
|
31 |
+
import numpy as np
|
32 |
+
|
33 |
+
from agentreview import const
|
34 |
+
|
35 |
+
os.environ["OPENAI_API_VERSION"] = "2024-06-01-preview"
|
36 |
+
|
37 |
+
|
38 |
+
# ## Overview
|
39 |
+
#
|
40 |
+
# AgentReview features a range of customizable variables, such as characteristics of reviewers, authors, area chairs (ACs), as well as the reviewing mechanisms
|
41 |
+
|
42 |
+
# In[3]:
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# ## Review Pipeline
|
47 |
+
#
|
48 |
+
# The simulation adopts a structured, 5-phase pipeline (Section 2 in the [paper](https://arxiv.org/abs/2406.12708)):
|
49 |
+
#
|
50 |
+
# * **I. Reviewer Assessment.** Each manuscript is evaluated by three reviewers independently.
|
51 |
+
# * **II. Author-Reviewer Discussion.** Authors submit rebuttals to address reviewers' concerns;
|
52 |
+
# * **III. Reviewer-AC Discussion.** The AC facilitates discussions among reviewers, prompting updates to their initial assessments.
|
53 |
+
# * **IV. Meta-Review Compilation.** The AC synthesizes the discussions into a meta-review.
|
54 |
+
# * **V. Paper Decision.** The AC makes the final decision on whether to accept or reject the paper, based on all gathered inputs.
|
55 |
+
|
56 |
+
# In[2]:
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
# In[4]:
|
61 |
+
|
62 |
+
|
63 |
+
import os
|
64 |
+
|
65 |
+
if os.path.basename(os.getcwd()) == "notebooks":
|
66 |
+
os.chdir("..")
|
67 |
+
# Change the working directory to AgentReview
|
68 |
+
print(f"Changing the current working directory to {os.path.basename(os.getcwd())}")
|
69 |
+
|
70 |
+
|
71 |
+
# In[5]:
|
72 |
+
|
73 |
+
|
74 |
+
from argparse import Namespace
|
75 |
+
|
76 |
+
args = Namespace(openai_key=None,
|
77 |
+
deployment=None,
|
78 |
+
openai_client_type='azure_openai',
|
79 |
+
endpoint=None,
|
80 |
+
api_version='2023-05-15',
|
81 |
+
ac_scoring_method='ranking',
|
82 |
+
conference='ICLR2024',
|
83 |
+
num_reviewers_per_paper=3,
|
84 |
+
ignore_missing_metareviews=False,
|
85 |
+
overwrite=False,
|
86 |
+
num_papers_per_area_chair=10,
|
87 |
+
model_name='gpt-4o',
|
88 |
+
output_dir='outputs',
|
89 |
+
max_num_words=16384,
|
90 |
+
visual_dir='outputs/visual',
|
91 |
+
device='cuda',
|
92 |
+
data_dir='./data', # Directory to all paper PDF
|
93 |
+
acceptance_rate=0.32,
|
94 |
+
task='paper_review')
|
95 |
+
|
96 |
+
os.environ['OPENAI_API_VERSION'] = args.api_version
|
97 |
+
|
98 |
+
# In[13]:
|
99 |
+
|
100 |
+
|
101 |
+
malicious_Rx1_setting = {
|
102 |
+
"AC": [
|
103 |
+
"BASELINE"
|
104 |
+
],
|
105 |
+
|
106 |
+
"reviewer": [
|
107 |
+
"malicious",
|
108 |
+
"BASELINE",
|
109 |
+
"BASELINE"
|
110 |
+
],
|
111 |
+
|
112 |
+
"author": [
|
113 |
+
"BASELINE"
|
114 |
+
],
|
115 |
+
"global_settings":{
|
116 |
+
"provides_numeric_rating": ['reviewer', 'ac'],
|
117 |
+
"persons_aware_of_authors_identities": []
|
118 |
+
}
|
119 |
+
}
|
120 |
+
|
121 |
+
all_settings = {"malicious_Rx1": malicious_Rx1_setting}
|
122 |
+
args.experiment_name = "malicious_Rx1_setting"
|
123 |
+
|
124 |
+
|
125 |
+
#
|
126 |
+
# `malicious_Rx1` means 1 reviewer is a malicious reviewer, and the other reviewers are default (i.e. `BASELINE`) reviewers.
|
127 |
+
#
|
128 |
+
#
|
129 |
+
|
130 |
+
# ## Reviews
|
131 |
+
#
|
132 |
+
# Define the review pipeline
|
133 |
+
|
134 |
+
# In[10]:
|
135 |
+
|
136 |
+
|
137 |
+
from agentreview.environments import PaperReview
|
138 |
+
|
139 |
+
def review_one_paper(paper_id, setting):
|
140 |
+
paper_decision = paper_id2decision[paper_id]
|
141 |
+
|
142 |
+
experiment_setting = get_experiment_settings(paper_id=paper_id,
|
143 |
+
paper_decision=paper_decision,
|
144 |
+
setting=setting)
|
145 |
+
print(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
|
146 |
+
|
147 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
148 |
+
|
149 |
+
player_names = [player.name for player in players]
|
150 |
+
|
151 |
+
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
152 |
+
args=args, experiment_setting=experiment_setting)
|
153 |
+
|
154 |
+
arena = PaperReviewArena(players=players, environment=env, args=args)
|
155 |
+
arena.launch_cli(interactive=False)
|
156 |
+
|
157 |
+
|
158 |
+
# In[11]:
|
159 |
+
|
160 |
+
|
161 |
+
import os
|
162 |
+
import sys
|
163 |
+
|
164 |
+
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "agentreview")))
|
165 |
+
|
166 |
+
from agentreview.paper_review_settings import get_experiment_settings
|
167 |
+
from agentreview.paper_review_arena import PaperReviewArena
|
168 |
+
from agentreview.utility.experiment_utils import initialize_players
|
169 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
|
170 |
+
|
171 |
+
|
172 |
+
# In[14]:
|
173 |
+
|
174 |
+
|
175 |
+
sampled_paper_ids = [39]
|
176 |
+
|
177 |
+
paper_id2decision, paper_decision2ids = get_paper_decision_mapping(args.data_dir, args.conference)
|
178 |
+
|
179 |
+
for paper_id in sampled_paper_ids:
|
180 |
+
review_one_paper(paper_id, malicious_Rx1_setting)
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
def run_paper_decision():
|
185 |
+
args.task = "paper_decision"
|
186 |
+
|
187 |
+
# Make sure the same set of papers always go through the same AC no matter which setting we choose
|
188 |
+
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
189 |
+
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
190 |
+
|
191 |
+
|
192 |
+
# Paper IDs we actually used in experiments
|
193 |
+
experimental_paper_ids = []
|
194 |
+
|
195 |
+
# For papers that have not been decided yet, load their metareviews
|
196 |
+
metareviews = []
|
197 |
+
print("Shuffling paper IDs")
|
198 |
+
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
199 |
+
|
200 |
+
# Exclude papers that already have AC decisions
|
201 |
+
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
202 |
+
conference=args.conference,
|
203 |
+
model_name=args.model_name,
|
204 |
+
ac_scoring_method=args.ac_scoring_method,
|
205 |
+
experiment_name=args.experiment_name,
|
206 |
+
num_papers_per_area_chair=args.num_papers_per_area_chair)
|
207 |
+
|
208 |
+
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
# In[ ]:
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
|
notebooks/demo.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -16,4 +16,4 @@ transformers
|
|
16 |
tenacity
|
17 |
openai
|
18 |
gradio
|
19 |
-
|
|
|
16 |
tenacity
|
17 |
openai
|
18 |
gradio
|
19 |
+
jupyter
|
run_paper_decision_cli.py
CHANGED
@@ -6,17 +6,15 @@ import numpy as np
|
|
6 |
|
7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
8 |
|
9 |
-
import const
|
|
|
10 |
from agentreview.experiment_config import all_settings
|
11 |
from agentreview.paper_review_settings import get_experiment_settings
|
12 |
-
from agentreview.config import AgentConfig
|
13 |
from agentreview.environments import PaperDecision
|
14 |
from agentreview.paper_review_arena import PaperReviewArena
|
15 |
-
from agentreview.
|
16 |
-
from
|
17 |
-
|
18 |
-
from utility.utils import project_setup, get_paper_decision_mapping, \
|
19 |
-
load_metareview, load_gpt4_generated_ac_decisions
|
20 |
|
21 |
# Set up logging configuration
|
22 |
logging.basicConfig(
|
@@ -27,6 +25,8 @@ logging.basicConfig(
|
|
27 |
]
|
28 |
)
|
29 |
|
|
|
|
|
30 |
|
31 |
def main(args):
|
32 |
"""
|
@@ -46,18 +46,16 @@ def main(args):
|
|
46 |
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
47 |
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
48 |
|
49 |
-
metareviews = []
|
50 |
-
|
51 |
# Paper IDs we actually used in experiments
|
52 |
experimental_paper_ids = []
|
53 |
|
54 |
# For papers that have not been decided yet, load their metareviews
|
55 |
-
|
56 |
print("Shuffling paper IDs")
|
57 |
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
58 |
|
59 |
# Exclude papers that already have AC decisions
|
60 |
-
existing_ac_decisions =
|
61 |
conference=args.conference,
|
62 |
model_name=args.model_name,
|
63 |
ac_scoring_method=args.ac_scoring_method,
|
@@ -68,65 +66,62 @@ def main(args):
|
|
68 |
|
69 |
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
70 |
|
71 |
-
|
|
|
72 |
|
73 |
-
|
74 |
|
75 |
for paper_id in sampled_paper_ids:
|
76 |
|
77 |
-
experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
|
78 |
-
|
79 |
# Load meta-reviews
|
80 |
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
81 |
experiment_name=args.experiment_name,
|
82 |
model_name=args.model_name, conference=args.conference)
|
83 |
|
84 |
if metareview is None:
|
|
|
|
|
|
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
f"completely filtered out due to content policy. "
|
90 |
-
f"Loading the BASELINE metareview...")
|
91 |
-
|
92 |
-
metareview = load_metareview(paper_id=paper_id, experiment_name="BASELINE",
|
93 |
-
model_name=args.model_name, conference=args.conference)
|
94 |
|
95 |
-
|
96 |
-
raise ValueError(f"Metareview for {paper_id} does not exist")
|
97 |
|
98 |
-
|
99 |
-
|
100 |
|
101 |
num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
|
102 |
|
103 |
for batch_index in range(num_batches):
|
104 |
-
experiment_setting["players"] = {k: v for k, v in experiment_setting["players"].items() if k.startswith("AC")}
|
105 |
-
|
106 |
-
players = []
|
107 |
-
|
108 |
-
for role, players_li in experiment_setting["players"].items():
|
109 |
-
|
110 |
-
for i, player_config in enumerate(players_li):
|
111 |
-
|
112 |
-
# This phase should only contain the Area Chair
|
113 |
-
if role == "AC":
|
114 |
-
|
115 |
-
player_config = get_ac_config(env_type="paper_decision",
|
116 |
-
scoring_method=args.ac_scoring_method,
|
117 |
-
num_papers_per_area_chair=args.num_papers_per_area_chair,
|
118 |
-
global_settings=experiment_setting['global_settings'],
|
119 |
-
acceptance_rate=args.acceptance_rate
|
120 |
-
**player_config)
|
121 |
-
|
122 |
-
player_config = AgentConfig(**player_config)
|
123 |
-
player_config['model'] = args.model_name
|
124 |
-
player = AreaChair(**player_config)
|
125 |
-
|
126 |
-
else:
|
127 |
-
raise NotImplementedError(f"Unknown role: {role}")
|
128 |
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
player_names = [player.name for player in players]
|
132 |
|
@@ -141,7 +136,7 @@ def main(args):
|
|
141 |
metareviews=metareviews,
|
142 |
experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
|
143 |
|
144 |
-
arena = PaperReviewArena(players=players, environment=env, args=args)
|
145 |
arena.launch_cli(interactive=False)
|
146 |
|
147 |
|
|
|
6 |
|
7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
8 |
|
9 |
+
from agentreview import const
|
10 |
+
from agentreview.utility.experiment_utils import initialize_players
|
11 |
from agentreview.experiment_config import all_settings
|
12 |
from agentreview.paper_review_settings import get_experiment_settings
|
|
|
13 |
from agentreview.environments import PaperDecision
|
14 |
from agentreview.paper_review_arena import PaperReviewArena
|
15 |
+
from agentreview.arguments import parse_args
|
16 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping, \
|
17 |
+
load_metareview, load_llm_ac_decisions
|
|
|
|
|
18 |
|
19 |
# Set up logging configuration
|
20 |
logging.basicConfig(
|
|
|
25 |
]
|
26 |
)
|
27 |
|
28 |
+
logger = logging.getLogger(__name__)
|
29 |
+
|
30 |
|
31 |
def main(args):
|
32 |
"""
|
|
|
46 |
NUM_PAPERS = len(const.year2paper_ids[args.conference])
|
47 |
order = np.random.choice(range(NUM_PAPERS), size=NUM_PAPERS, replace=False)
|
48 |
|
|
|
|
|
49 |
# Paper IDs we actually used in experiments
|
50 |
experimental_paper_ids = []
|
51 |
|
52 |
# For papers that have not been decided yet, load their metareviews
|
53 |
+
metareviews = []
|
54 |
print("Shuffling paper IDs")
|
55 |
sampled_paper_ids = np.array(const.year2paper_ids[args.conference])[order]
|
56 |
|
57 |
# Exclude papers that already have AC decisions
|
58 |
+
existing_ac_decisions = load_llm_ac_decisions(output_dir=args.output_dir,
|
59 |
conference=args.conference,
|
60 |
model_name=args.model_name,
|
61 |
ac_scoring_method=args.ac_scoring_method,
|
|
|
66 |
|
67 |
sampled_paper_ids = [paper_id for paper_id in sampled_paper_ids if paper_id not in existing_ac_decisions]
|
68 |
|
69 |
+
experiment_setting = get_experiment_settings(paper_id=None, paper_decision=None, setting=all_settings[
|
70 |
+
args.experiment_name])
|
71 |
|
72 |
+
logger.info(f"Loading metareview!")
|
73 |
|
74 |
for paper_id in sampled_paper_ids:
|
75 |
|
|
|
|
|
76 |
# Load meta-reviews
|
77 |
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
78 |
experiment_name=args.experiment_name,
|
79 |
model_name=args.model_name, conference=args.conference)
|
80 |
|
81 |
if metareview is None:
|
82 |
+
print(f"Metareview for {paper_id} does not exist. This may happen because the conversation is "
|
83 |
+
f"completely filtered out due to content policy. "
|
84 |
+
f"Loading the BASELINE metareview...")
|
85 |
|
86 |
+
metareview = load_metareview(output_dir=args.output_dir, paper_id=paper_id,
|
87 |
+
experiment_name="BASELINE",
|
88 |
+
model_name=args.model_name, conference=args.conference)
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
if metareview is not None:
|
|
|
91 |
|
92 |
+
metareviews += [metareview]
|
93 |
+
experimental_paper_ids += [paper_id]
|
94 |
|
95 |
num_batches = len(experimental_paper_ids) // args.num_papers_per_area_chair
|
96 |
|
97 |
for batch_index in range(num_batches):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
100 |
+
|
101 |
+
# players = []
|
102 |
+
#
|
103 |
+
# for role, players_li in experiment_setting["players"].items():
|
104 |
+
#
|
105 |
+
# for i, player_config in enumerate(players_li):
|
106 |
+
#
|
107 |
+
# # This phase should only contain the Area Chair
|
108 |
+
# if role == "AC":
|
109 |
+
#
|
110 |
+
# player_config = get_ac_config(env_type="paper_decision",
|
111 |
+
# scoring_method=args.ac_scoring_method,
|
112 |
+
# num_papers_per_area_chair=args.num_papers_per_area_chair,
|
113 |
+
# global_settings=experiment_setting['global_settings'],
|
114 |
+
# acceptance_rate=args.acceptance_rate
|
115 |
+
# ** player_config)
|
116 |
+
#
|
117 |
+
# # player_config = AgentConfig(**player_config)
|
118 |
+
# player_config['model'] = args.model_name
|
119 |
+
# player = AreaChair(**player_config)
|
120 |
+
#
|
121 |
+
# else:
|
122 |
+
# raise NotImplementedError(f"Unknown role: {role}")
|
123 |
+
#
|
124 |
+
# players.append(player)
|
125 |
|
126 |
player_names = [player.name for player in players]
|
127 |
|
|
|
136 |
metareviews=metareviews,
|
137 |
experiment_setting=experiment_setting, ac_scoring_method=args.ac_scoring_method)
|
138 |
|
139 |
+
arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
|
140 |
arena.launch_cli(interactive=False)
|
141 |
|
142 |
|
run_paper_review_cli.py
CHANGED
@@ -4,18 +4,17 @@ import os
|
|
4 |
import sys
|
5 |
from argparse import Namespace
|
6 |
|
|
|
7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
8 |
|
9 |
-
from
|
|
|
10 |
from agentreview.experiment_config import all_settings
|
11 |
-
from agentreview.agent import Player
|
12 |
from agentreview.environments import PaperReview
|
13 |
from agentreview.paper_review_settings import get_experiment_settings
|
14 |
from agentreview.paper_review_arena import PaperReviewArena
|
15 |
-
from agentreview.
|
16 |
-
from agentreview.
|
17 |
-
get_paper_extractor_config
|
18 |
-
from utility.utils import project_setup, get_paper_decision_mapping
|
19 |
|
20 |
# Set up logging configuration
|
21 |
logging.basicConfig(
|
@@ -53,81 +52,84 @@ def main(args: Namespace):
|
|
53 |
sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
|
54 |
|
55 |
for paper_id in sampled_paper_ids:
|
56 |
-
|
57 |
-
experiment_setting = get_experiment_settings(all_settings[args.experiment_name])
|
58 |
-
|
59 |
# Ground-truth decision in the conference.
|
60 |
# We use this to partition the papers into different quality.
|
61 |
paper_decision = paper_id2decision[paper_id]
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
128 |
args=args, experiment_setting=experiment_setting)
|
129 |
|
130 |
-
arena = PaperReviewArena(players=players, environment=env, args=args)
|
131 |
arena.launch_cli(interactive=False)
|
132 |
|
133 |
logger.info("Done!")
|
|
|
4 |
import sys
|
5 |
from argparse import Namespace
|
6 |
|
7 |
+
|
8 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
9 |
|
10 |
+
from agentreview import const
|
11 |
+
from agentreview.arguments import parse_args
|
12 |
from agentreview.experiment_config import all_settings
|
|
|
13 |
from agentreview.environments import PaperReview
|
14 |
from agentreview.paper_review_settings import get_experiment_settings
|
15 |
from agentreview.paper_review_arena import PaperReviewArena
|
16 |
+
from agentreview.utility.experiment_utils import initialize_players
|
17 |
+
from agentreview.utility.utils import project_setup, get_paper_decision_mapping
|
|
|
|
|
18 |
|
19 |
# Set up logging configuration
|
20 |
logging.basicConfig(
|
|
|
52 |
sampled_paper_ids = [int(os.path.basename(p).split(".pdf")[0]) for p in paper_paths if p.endswith(".pdf")]
|
53 |
|
54 |
for paper_id in sampled_paper_ids:
|
|
|
|
|
|
|
55 |
# Ground-truth decision in the conference.
|
56 |
# We use this to partition the papers into different quality.
|
57 |
paper_decision = paper_id2decision[paper_id]
|
58 |
|
59 |
+
experiment_setting = get_experiment_settings(paper_id=paper_id,
|
60 |
+
paper_decision=paper_decision,
|
61 |
+
setting=all_settings[args.experiment_name])
|
62 |
+
|
63 |
+
logger.info(f"Experiment Started!")
|
64 |
+
logger.info(f"Paper ID: {paper_id} (Decision in {args.conference}: {paper_decision})")
|
65 |
+
|
66 |
+
players = initialize_players(experiment_setting=experiment_setting, args=args)
|
67 |
+
|
68 |
+
player_names = [player.name for player in players]
|
69 |
+
|
70 |
+
# for role, players_list in experiment_setting["players"].items():
|
71 |
+
#
|
72 |
+
# for i, player_config in enumerate(players_list):
|
73 |
+
# if role == "Paper Extractor":
|
74 |
+
#
|
75 |
+
# player_config = get_paper_extractor_config(global_settings=experiment_setting['global_settings'], )
|
76 |
+
#
|
77 |
+
# player = PaperExtractorPlayer(data_dir=args.data_dir, paper_id=paper_id,
|
78 |
+
# paper_decision=paper_decision,
|
79 |
+
# args=args,
|
80 |
+
# conference=args.conference, **player_config)
|
81 |
+
#
|
82 |
+
# player_names.append(player.name)
|
83 |
+
#
|
84 |
+
#
|
85 |
+
# elif role == "AC":
|
86 |
+
#
|
87 |
+
# player_config = get_ac_config(env_type="paper_review",
|
88 |
+
# scoring_method=args.ac_scoring_method,
|
89 |
+
# num_papers_per_area_chair=args.num_papers_per_area_chair,
|
90 |
+
# global_settings=experiment_setting['global_settings'],
|
91 |
+
# acceptance_rate=args.acceptance_rate,
|
92 |
+
# **player_config)
|
93 |
+
#
|
94 |
+
# player_config['model'] = args.model_name
|
95 |
+
#
|
96 |
+
# player = AreaChair(data_dir=args.data_dir,
|
97 |
+
# conference=args.conference,
|
98 |
+
# args=args,
|
99 |
+
# **player_config)
|
100 |
+
#
|
101 |
+
# player_names.append(player.name)
|
102 |
+
#
|
103 |
+
#
|
104 |
+
# elif role == "Author":
|
105 |
+
#
|
106 |
+
# # Author requires no behavior customization.
|
107 |
+
# # So we directly use the Player class
|
108 |
+
# player_config = get_author_config()
|
109 |
+
# player = Player(data_dir=args.data_dir,
|
110 |
+
# conference=args.conference,
|
111 |
+
# args=args,
|
112 |
+
# **player_config)
|
113 |
+
#
|
114 |
+
# player_names.append(player.name)
|
115 |
+
#
|
116 |
+
# elif role == "Reviewer":
|
117 |
+
# player_config = get_reviewer_player_config(reviewer_index=i + 1,
|
118 |
+
# global_settings=experiment_setting['global_settings'],
|
119 |
+
# **player_config)
|
120 |
+
# player_config['model'] = args.model_name
|
121 |
+
# player = Reviewer(data_dir=args.data_dir, conference=args.conference, **player_config)
|
122 |
+
# player_names.append(player.name)
|
123 |
+
#
|
124 |
+
# else:
|
125 |
+
# raise NotImplementedError(f"Unknown role: {role}")
|
126 |
+
#
|
127 |
+
# players.append(player)
|
128 |
|
129 |
env = PaperReview(player_names=player_names, paper_decision=paper_decision, paper_id=paper_id,
|
130 |
args=args, experiment_setting=experiment_setting)
|
131 |
|
132 |
+
arena = PaperReviewArena(players=players, environment=env, args=args, global_prompt=const.GLOBAL_PROMPT)
|
133 |
arena.launch_cli(interactive=False)
|
134 |
|
135 |
logger.info("Done!")
|