Spaces:

Gatsby767
/

AbrahamicSolver

Running

App Files Files Community

Gatsby767 commited on 1 day ago

Commit

0e6368e

verified ·

1 Parent(s): 47a4065

Upload 11 files

Browse files

Files changed (11) hide show

caller.py +100 -0
caller_penalty.py +151 -0
config.yaml +93 -0
math.py +49 -0
math_format.jinja +1 -0
persona.jinja +1 -0
questioner.jinja +1 -0
r1v.py +47 -0
r1v_format.jinja +1 -0
runtime_env.yaml +9 -0
solver.jinja +1 -0

caller.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import regex as re
+from typing import Dict, List
+import json
+from mathruler.grader import extract_boxed_content, grade_answer
+import os
+import time
+import random
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+STORAGE_PATH = os.getenv("STORAGE_PATH")
+def generate_temp_filename(prefix="temp", suffix=".json"):
+    timestamp = int(time.time() * 1000)
+    rand_part = random.randint(0, 99999)
+    return f"{STORAGE_PATH}/temp_results/{prefix}_{timestamp}_{rand_part}{suffix}"
+def split_list(lst, n=4):
+    k, m = divmod(len(lst), n)
+    return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)]
+os.environ["NO_PROXY"] = "0.0.0.0,127.0.0.1"
+def fetch(index,i):
+    response = requests.get(f"http://0.0.0.0:{5000+index}/hello?name={i}")
+    print(response)
+    return True
+def generate_results(data):
+    datas = split_list(data,4)
+    random_names = [generate_temp_filename(prefix=f"temp_{i}", suffix=".json") for i in range(4)]
+    for i in range(4):
+        with open(random_names[i],'w') as f:
+            json.dump(datas[i],f,indent=4)
+    final_results = []
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [executor.submit(fetch, i,random_names[i]) for i in range(4)]
+        for future in as_completed(futures):
+            print(future.result())
+    for future in as_completed(futures):
+        with open(random_names[i].replace('.json','_results.json'),'r') as f:
+            final_results.extend(json.load(f))
+    return final_results
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(predict)
+    return 1.0 if grade_answer(answer, ground_truth) else 0.0
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1, file_path: str = "") -> List[Dict[str, float]]:
+    results = []
+    with open('test.json','w') as f:
+        json.dump(predicts,f,indent=4)
+    for i in range(len(predicts)):
+        questions = re.findall(r"<question>(.*?)</question>", predicts[i], re.DOTALL)
+        answers = extract_boxed_content(predicts[i])
+        if questions and answers:
+            try:
+                question = questions[-1].strip()
+                answer = answers[-1].strip()
+                results.append({"question": question, "answer": answer})
+            except:
+                results.append({"question": "", "answer": ""})
+        else:
+            results.append({"question": "", "answer": ""})
+    final_results = generate_results(results)
+    scores = [{"overall": min(item["score"],1-item["score"]) if item['question'] else -1,"format": 1 if item['question'] else 0,"accuracy": 1 if item['answer'] else 0} for item in final_results]
+    return scores

caller_penalty.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import regex as re
+from typing import Dict, List
+import json
+from mathruler.grader import extract_boxed_content, grade_answer
+import os
+import time
+import random
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from collections import Counter
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from sklearn.cluster import AgglomerativeClustering
+import numpy as np
+STORAGE_PATH = os.getenv("STORAGE_PATH","/apdcephfs_sh2/share_300000800/user/chengchuang")
+def _bleu_distance_matrix(sentences):
+    n = len(sentences)
+    dist = np.zeros((n, n))
+    smoother = SmoothingFunction().method1
+    for i in range(n):
+        for j in range(i, n):
+            if i == j:
+                score = 1.0
+            else:
+                ref = [sentences[j].split()]
+                hyp = sentences[i].split()
+                score = sentence_bleu(ref, hyp, smoothing_function=smoother)
+            dist[i, j] = dist[j, i] = 1 - score
+    return dist
+def cluster_share_per_problem(
+        problems,
+        distance_threshold: float = 0.5,
+        linkage: str = "average"):
+    if not problems:
+        return []
+    print('start clustering')
+    start_time = time.time()
+    dist_mat = _bleu_distance_matrix(problems)
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=distance_threshold,
+        metric="precomputed",
+        linkage=linkage
+    )
+    labels = clustering.fit_predict(dist_mat)
+    print(f'end clustering, time: {time.time() - start_time}')
+    total = len(problems)
+    cluster_size = Counter(labels)
+    cluster_ratio = {lab: sz / total for lab, sz in cluster_size.items()}
+    proportions = [cluster_ratio[lab] for lab in labels]
+    return proportions
+def generate_temp_filename(prefix="temp", suffix=".json"):
+    timestamp = int(time.time() * 1000)
+    rand_part = random.randint(0, 99999)
+    return f"{STORAGE_PATH}/temp_results/{prefix}_{timestamp}_{rand_part}{suffix}"
+def split_list(lst, n=4):
+    k, m = divmod(len(lst), n)
+    return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)]
+os.environ["NO_PROXY"] = "0.0.0.0,127.0.0.1"
+def fetch(index,i):
+    response = requests.get(f"http://0.0.0.0:{5000+index}/hello?name={i}")
+    print(response)
+    return True
+def generate_results(data):
+    datas = split_list(data,4)
+    random_names = [generate_temp_filename(prefix=f"temp_{i}", suffix=".json") for i in range(4)]
+    for i in range(4):
+        with open(random_names[i],'w') as f:
+            json.dump(datas[i],f,indent=4)
+    final_results = []
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [executor.submit(fetch, i,random_names[i]) for i in range(4)]
+        for future in as_completed(futures):
+            print(future.result())
+    for i in range(4):
+        with open(random_names[i].replace('.json','_results.json'),'r') as f:
+            final_results.extend(json.load(f))
+        # os.remove(random_names[i].replace('.json','_results.json'))
+    for i in range(4):
+        os.remove(random_names[i].replace('.json','_results.json'))
+    return final_results
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(predict)
+    return 1.0 if grade_answer(answer, ground_truth) else 0.0
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1, file_path: str = "") -> List[Dict[str, float]]:
+    results = []
+    with open('test.json','w') as f:
+        json.dump(predicts,f,indent=4)
+    for i in range(len(predicts)):
+        questions = re.findall(r"<question>(.*?)</question>", predicts[i], re.DOTALL)
+        answers = extract_boxed_content(predicts[i])
+        if questions and answers:
+            try:
+                question = questions[-1].strip()
+                answer = answers[-1].strip()
+                results.append({"question": question, "answer": answer})
+            except:
+                results.append({"question": "", "answer": ""})
+        else:
+            results.append({"question": "", "answer": ""})
+    final_results = generate_results(results)
+    penalty = cluster_share_per_problem([result['question'] for result in final_results], distance_threshold=0.5)
+    # print(penalty)
+    assert len(penalty) == len(final_results)
+    scores = []
+    for i in range(len(final_results)):
+        final_score = (min(final_results[i]["score"],1-final_results[i]["score"]) if final_results[i]['question'] else -1)-penalty[i]
+        scores.append({"overall": final_score,"format": 1 if final_results[i]['question'] else 0,"accuracy": penalty[i]})
+    return scores

config.yaml ADDED Viewed

	@@ -0,0 +1,93 @@

+data:
+  train_files: hiyouga/math12k@train
+  val_files: hiyouga/math12k@test
+  prompt_key: problem
+  answer_key: answer
+  image_key: images
+  max_prompt_length: 2048
+  max_response_length: 2048
+  rollout_batch_size: 512
+  val_batch_size: 1024
+  format_prompt: ./examples/format_prompt/math_format.jinja
+  override_chat_template: null
+  shuffle: true
+  seed: 1
+  max_pixels: 4194304
+  min_pixels: 262144
+  filter_overlong_prompts: true
+algorithm:
+  adv_estimator: grpo
+  disable_kl: false
+  use_kl_loss: true
+  kl_penalty: low_var_kl
+  kl_coef: 1.0e-2
+  mock_data: test
+worker:
+  actor:
+    global_batch_size: 128
+    micro_batch_size_per_device_for_update: 2
+    micro_batch_size_per_device_for_experience: 8
+    max_grad_norm: 1.0
+    padding_free: true
+    ulysses_sequence_parallel_size: 1
+    model:
+      model_path: Qwen/Qwen2.5-7B-Instruct
+      enable_gradient_checkpointing: true
+      trust_remote_code: false
+      freeze_vision_tower: false
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+      strategy: adamw  # {adamw, adamw_bf16}
+      lr_warmup_ratio: 0.0
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: false
+      enable_rank0_init: true
+    offload:
+      offload_params: true  # true: more CPU memory; false: more GPU memory
+      offload_optimizer: true  # true: more CPU memory; false: more GPU memory
+  rollout:
+    n: 5
+    temperature: 1.0
+    top_p: 0.99
+    gpu_memory_utilization: 0.7
+    enforce_eager: false
+    enable_chunked_prefill: false
+    tensor_parallel_size: 2
+    limit_images: 0
+    val_override_config:
+      temperature: 1.0
+      n: 1
+  ref:
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
+      enable_rank0_init: true
+    offload:
+      offload_params: true
+  reward:
+    reward_type: batch
+    reward_function: ./examples/reward_function/math.py:compute_score
+trainer:
+  total_epochs: 2
+  max_steps: null
+  project_name: easy_r1
+  experiment_name: qwen2_5_7b_math_grpo
+  logger: ["console", "wandb"]
+  nnodes: 1
+  n_gpus_per_node: 8
+  val_freq: 3  # -1 to disable
+  val_before_train: true
+  val_only: false
+  val_generations_to_log: 3
+  save_freq: 5  # -1 to disable
+  save_limit: 3  # -1 to disable
+  save_checkpoint_path: your_checkpoint_path
+  load_checkpoint_path: null

math.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Dict, List
+from mathruler.grader import extract_boxed_content, grade_answer
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(predict)
+    try:
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    except:
+        return 0.0
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    scores = []
+    for predict, ground_truth in zip(predicts, ground_truths):
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  # handle qwen2.5vl-32b format
+        format_score = format_reward(predict)
+        accuracy_score = accuracy_reward(predict, ground_truth)
+        scores.append(
+            {
+                "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+                "format": format_score,
+                "accuracy": accuracy_score,
+            }
+        )
+    return scores

math_format.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ {{ content \| trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.

persona.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ questioner_format_with_persona

questioner.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ questioner_format

r1v.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Dict
+from mathruler.grader import grade_answer
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    try:
+        content_match = re.search(r"<answer>(.*?)</answer>", predict)
+        given_answer = content_match.group(1).strip() if content_match else predict.strip()
+        if grade_answer(given_answer, ground_truth.strip()):
+            return 1.0
+    except Exception:
+        pass
+    return 0.0
+def compute_score(predict: str, ground_truth: str, format_weight: float = 0.5) -> Dict[str, float]:
+    format_score = format_reward(predict)
+    accuracy_score = accuracy_reward(predict, ground_truth)
+    return {
+        "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+        "format": format_score,
+        "accuracy": accuracy_score,
+    }

r1v_format.jinja ADDED Viewed

	@@ -0,0 +1 @@

+ {{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>

runtime_env.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+working_dir: ./
+excludes: ["/.git/"]
+env_vars:
+  TOKENIZERS_PARALLELISM: "true"
+  NCCL_DEBUG: "WARN"
+  VLLM_LOGGING_LEVEL: "WARN"
+  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
+  PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+  PYTHONUNBUFFERED: "1"

solver.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ solver_format