import gradio as gr
import pandas as pd
import json
from datasets import load_dataset
import requests
from huggingface_hub import list_datasets, list_models, list_spaces
from collections import Counter
import numpy as np
def compute_ranking(df, column, method="sum", keep="last"):
df_rank = df.groupby("author").aggregate({column: method})[[column]]
df_rank = df_rank.sort_values(by=column)
df_rank.reset_index(drop=True, inplace=True)
df_rank["top_perc"] = df_rank.apply(lambda x: f"{100 * (1-(x.name/len(df_rank))):.2f}", axis=1)
df_rank = df_rank.drop_duplicates(subset=column, keep=keep)
df_rank = df_rank.rename({column: "value"}, axis='columns')
return df_rank
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
return super(NpEncoder, self).default(obj)
ds = load_dataset("open-source-metrics/model-repos-stats", split="train")
df = ds.to_pandas()
df_ranks = {}
df_ranks["likes"] = compute_ranking(df, "likes")
df_ranks["downloads"] = compute_ranking(df, "downloads_30d")
df_ranks["repos"] = compute_ranking(df, "repo_id", method="count")
with open("./html_template.html", "r") as f:
template = f.read()
def create_user_summary(user_name):
summary = {}
df_user = df.loc[df["author"]==user_name]
if len(df_user) == 0:
return """
Unfortunately there is not enough data for your report.
Enter your HF user name:
""") with gr.Row(): username = gr.Textbox(lines=1, max_lines=1, label="User name") with gr.Row(): run = gr.Button() with gr.Row(): output = gr.HTML(label="Generated code") event = run.click(create_user_summary, [username], output) demo.launch()