Spaces:

AlekseyKorshuk
/

role-play-crowdsource-signup

Runtime error

App Files Files Community

AlekseyKorshuk commited on Jun 26, 2023

Commit

b58f9ec

1 Parent(s): 7d65544

initial commit

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +57 -0
config.py +7 -0
requirements.txt +3 -0
utils.py +168 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Role Play Crowdsource Signup
-emoji: 🌍
 colorFrom: purple
 colorTo: green
 sdk: gradio

 ---
 title: Role Play Crowdsource Signup
+emoji: 👥
 colorFrom: purple
 colorTo: green
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+from functools import partial
+import argilla as rg
+from huggingface_hub import login
+import gradio as gr
+import config
+import utils
+def main():
+    records = init()
+    with gr.Blocks() as demo:
+        gr.Markdown("# Role-Play Crowdsource\n"
+                    "TODO")
+        username = gr.Textbox(label="Username", placeholder="alekseykorshuk")
+        password = gr.Textbox(label="Password", placeholder="12345678")
+        btn = gr.Button("Run")
+        status = gr.Textbox(label="Status")
+        btn.click(
+            fn=partial(signup, records=records),
+            inputs=[username, password], outputs=status)
+    demo.launch()
+def init():
+    rg.init(
+        api_url=config.api_url,
+        api_key=config.api_key
+    )
+    login(config.hf_token)
+    records = utils.get_records()
+    return records
+def signup(username, password, records):
+    inputs_correctness = utils.check_inputs(username, password)
+    if inputs_correctness:
+        return inputs_correctness
+    user = utils.get_user(username, password)
+    response = utils.authorize_user(username, password)
+    if response.status_code != 200:
+        return "Unable to authorize, please check your credentials."
+    workspace = utils.add_workspace(user)
+    num_datasets = utils.get_num_datasets(user)
+    records_to_add = utils.get_records_to_add(user, records, num_datasets)
+    dataset_name = utils.push_dataset(num_datasets, workspace, records_to_add)
+    response_text = utils.get_response_message(username, password, dataset_name)
+    return response_text
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+samples_per_group = int(os.environ.get("SAMPLES_PER_GROUP", 250))
+api_url = os.environ.get("ARGILLA_API_URL")
+api_key = os.environ.get("ARGILLA_API_KEY")
+hf_dataset_path = os.environ.get("HF_DATASET_PATH")
+hf_token = os.environ.get("HF_TOKEN")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.24.1
+datasets==2.11.0
+argilla==1.11.0

utils.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import re
+import requests
+import tqdm
+from datasets import load_dataset
+import argilla as rg
+import config
+def push_dataset(num_datasets, workspace, records_to_add):
+    dataset = rg.FeedbackDataset(
+        guidelines=_get_guidelines(),
+        fields=_get_fields(),
+        questions=_get_questions()
+    )
+    dataset.add_records(records_to_add)
+    dataset_name = get_dataset_name(num_datasets)
+    dataset.push_to_argilla(name=dataset_name, workspace=workspace.name, show_progress=True)
+    return dataset_name
+def _get_fields():
+    fields = [
+        rg.TextField(name="system", title="Character description"),
+        rg.TextField(name="conversation_history", title="Conversation history"),
+    ]
+    return fields
+def get_records():
+    dataset = _get_dataset()
+    records = [
+        rg.FeedbackRecord(
+            fields={
+                "system": record["system"],
+                "conversation_history": record["conversation_history"],
+            },
+            external_id=record['external_id']
+        )
+        for record in tqdm.tqdm(dataset)
+    ]
+    return records
+def _get_dataset():
+    dataset = load_dataset(config.hf_dataset_path, split="train")
+    return dataset
+def _get_questions():
+    questions = [
+        rg.TextQuestion(
+            name="new-response",
+            title="Character response:",
+            description="Write the final version of the Character response, making sure that it matches the character "
+                        "description and makes sense for the conversation history.",
+            required=True
+        )
+    ]
+    return questions
+def _get_guidelines():
+    guidelines = None
+    return guidelines
+def authorize_user(username, password):
+    data = {
+        "username": username,
+        "password": password,
+    }
+    response = requests.post(f"{config.api_url}/api/security/token", data=data)
+    return response
+def get_user(username, password):
+    user = get_existing_user(username)
+    if user is None:
+        user = create_new_user(username, password)
+    return user
+def create_new_user(username, password):
+    users = list(rg.User.list())
+    num_users = len(users)
+    first_name = str(num_users)
+    user = rg.User.create(
+        username=username,
+        first_name=first_name,
+        last_name="-",
+        password=password,
+        role="annotator",
+    )
+    return user
+def get_existing_user(username):
+    for user in rg.User.list():
+        if user.username == username:
+            return user
+    return None
+def add_workspace(user):
+    try:
+        workspace = rg.Workspace.create(name=user.username)
+        workspace.add_user(user.id)
+    except ValueError:
+        print("Workspace for this user already exists.")
+        workspace = rg.Workspace.from_name(name=user.username)
+    return workspace
+def get_records_to_add(user, records, num_dataset):
+    user_index = int(user.first_name)
+    shifts = user_index + num_dataset
+    records_to_add = assign_samples(records, config.samples_per_group, shifts)
+    return records_to_add
+def assign_samples(records, samples_per_group, shifts):
+    start = (samples_per_group * (shifts - 1)) % len(records)
+    end = start + samples_per_group
+    if end <= len(records):
+        return records[start:end]
+    end = end % len(records)
+    return records[start:] + records[:end]
+def get_num_datasets(user):
+    header = {
+        "X-Argilla-Api-Key": user.api_key
+    }
+    response = requests.get(f"{config.api_url}/api/v1/me/datasets", headers=header)
+    datasets = response.json()["items"]
+    num_datasets = len(datasets)
+    return num_datasets
+def get_dataset_name(num_datasets):
+    dataset_name = f"dataset-group-{num_datasets + 1}"
+    return dataset_name
+def check_inputs(username, password):
+    if not re.match(r"^(?!-|_)[a-z0-9-_]+$", username):
+        return "Your username does not match the pattern '^(?!-|_)[a-z0-9-_]+$', please fix and try again.\n" \
+               "Tips:\n" \
+               "1. Make it lowercase.\n" \
+               "2. Use only english.\n" \
+               "3. Use only '_' as special symbol."
+    if len(password) < 8:
+        return "Your password is less than 8 symbols, please fix and try again."
+    return None
+def get_response_message(username, password, dataset_name):
+    response_text = f"Successfully created/updated your profile at {config.api_url}. " \
+                    f"Use the following credential to login:\n" \
+                    f"Username: {username}\n" \
+                    f"Password: {password}\n\n" \
+                    f"You will find the dataset '{dataset_name}' with {config.samples_per_group} new samples.\n" \
+                    f"Please take your time to annotate the data. If you finished all provided samples, " \
+                    f"simply use the same credentials in this Gradio Space and we will add you another dataset with " \
+                    f"new samples. "
+    return response_text