Spaces:
Sleeping
Sleeping
""" | |
Collect data from the multiple sources and create a base datafranme for the LLMCalculator table | |
Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency | |
Pricing - pricing.json | |
Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json | |
""" | |
import pandas as pd | |
import json | |
import requests | |
from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX | |
import os | |
def validate_request(url: str, response) -> bool: | |
""" | |
Validate if an HTTP request was successful. | |
Args: | |
url (str): The URL that was requested | |
response (requests.Response): The response object from the request | |
Returns: | |
bool: True if request was successful (status code 200), False otherwise | |
""" | |
if response.status_code != 200: | |
print(f"Failed to read file - {url}. Status Code: {response.status_code}") | |
return False | |
return True | |
def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple: | |
""" | |
Fetch and parse benchmark results and latency data from CSV files. | |
Args: | |
benchmark (str): Type of benchmark to fetch ('text' or 'multimodal') | |
version_names (list): List of version names to search through, sorted by latest first | |
Returns: | |
tuple[pd.DataFrame, pd.DataFrame]: A tuple containing: | |
- results_df: DataFrame with benchmark results | |
- latency_df: DataFrame with latency measurements | |
Returns (None, None) if no matching version is found or requests fail | |
Raises: | |
requests.RequestException: If there's an error fetching the data | |
pd.errors.EmptyDataError: If CSV file is empty | |
pd.errors.ParserError: If CSV parsing fails | |
""" | |
for v in version_names: | |
# Check if version matches benchmark type | |
is_multimodal = 'multimodal' in v | |
if (benchmark == "multimodal") != is_multimodal: | |
continue | |
# Construct URLs | |
results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE) | |
latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX) | |
try: | |
results = requests.get(results_url) | |
latency = requests.get(latency_url) | |
if validate_request(results_url, results) and validate_request(latency_url, latency): | |
# Convert the CSV content to pandas DataFrames | |
results_df = pd.read_csv(pd.io.common.StringIO(results.text)) | |
latency_df = pd.read_csv(pd.io.common.StringIO(latency.text)) | |
return results_df, latency_df | |
except requests.RequestException as e: | |
print(f"Error fetching data for version {v}: {e}") | |
except pd.errors.EmptyDataError: | |
print(f"Error: Empty CSV file found for version {v}") | |
except pd.errors.ParserError: | |
print(f"Error: Unable to parse CSV data for version {v}") | |
return None, None | |
def fetch_version_metadata() -> tuple: | |
""" | |
Fetch and process benchmark metadata from the Clembench GitHub repository. | |
The data is sourced from: https://github.com/clembench/clembench-runs | |
Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO | |
Returns: | |
tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing: | |
- mm_result: Multimodal benchmark results | |
- mm_latency: Multimodal latency data | |
- text_result: Text benchmark results | |
- text_latency: Text latency data | |
Returns (None, None, None, None) if the request fails | |
""" | |
json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE | |
response = requests.get(json_url) | |
# Check if the JSON file request was successful | |
if not validate_request(json_url, response): | |
return None, None, None, None | |
json_data = response.json() | |
versions = json_data['versions'] | |
# Sort the versions in benchmark by latest first | |
version_names = sorted( | |
[ver['version'] for ver in versions], | |
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), | |
reverse=True | |
) | |
# Latency is in seconds | |
mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names) | |
text_result, text_latency = fetch_benchmark_data("text", version_names) | |
return mm_latency, mm_result, text_latency, text_result | |
def fetch_registry_data() -> dict: | |
""" | |
Fetch and parse model registry data from the Clembench registry URL. | |
The data is sourced from the model registry defined in REGISTRY_URL. | |
Contains information about various LLM models including their specifications | |
and capabilities. | |
Returns: | |
dict: Dictionary containing model registry data. | |
Returns None if the request fails or the JSON is invalid. | |
Raises: | |
requests.RequestException: If there's an error fetching the data | |
json.JSONDecodeError: If the response cannot be parsed as JSON | |
""" | |
try: | |
response = requests.get(REGISTRY_URL) | |
if not validate_request(REGISTRY_URL, response): | |
return None | |
return response.json() | |
except requests.RequestException as e: | |
print(f"Error fetching registry data: {e}") | |
except json.JSONDecodeError as e: | |
print(f"Error parsing registry JSON: {e}") | |
return None | |
if __name__=="__main__": | |
fetch_version_metadata() | |
registry_data = fetch_registry_data() | |
print(registry_data[0]) | |