yuntian-deng commited on
Commit
ce0d1e8
·
verified ·
1 Parent(s): 3ff8b6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -81,7 +81,7 @@ def model_inference(title, authors, abstract):
81
  score = probs[1].item()
82
  return score
83
 
84
- def predict(title, authors, abstract):
85
  # Your model prediction logic here
86
  score = model_inference(title, authors, abstract)
87
 
@@ -98,14 +98,16 @@ def predict(title, authors, abstract):
98
  example_title = "WildChat: 1M ChatGPT Interaction Logs in the Wild"
99
  example_authors = "Wenting Zhao, Xiang Ren, Jack Hessel, Claire Cardie, Yejin Choi, Yuntian Deng"
100
  example_abstract = "Chatbots such as GPT-4 and ChatGPT are now serving millions of users. Despite their widespread use, there remains a lack of public datasets showcasing how these tools are used by a population of users in practice. To bridge this gap, we offered free access to ChatGPT for online users in exchange for their affirmative, consensual opt-in to anonymously collect their chat transcripts and request headers. From this, we compiled WildChat, a corpus of 1 million user-ChatGPT conversations, which consists of over 2.5 million interaction turns. We compare WildChat with other popular user-chatbot interaction datasets, and find that our dataset offers the most diverse user prompts, contains the largest number of languages, and presents the richest variety of potentially toxic use-cases for researchers to study. In addition to timestamped chat transcripts, we enrich the dataset with demographic data, including state, country, and hashed IP addresses, alongside request headers. This augmentation allows for more detailed analysis of user behaviors across different geographical regions and temporal dimensions. Finally, because it captures a broad range of use cases, we demonstrate the dataset’s potential utility in fine-tuning instruction-following models. WildChat is released at https://wildchat.allen.ai under AI2 ImpACT Licenses."
101
-
 
 
 
 
 
 
102
  iface = gr.Interface(
103
  fn=predict,
104
- inputs=[gr.Textbox(label="Paper Title", placeholder="Enter paper title", value=example_title),
105
- gr.Textbox(label="Authors (separated by comma)", placeholder="Enter authors (separated by comma)", value=example_authors),
106
- gr.TextArea(label="Abstract", placeholder="Enter abstract", value=example_abstract),
107
- gr.Textbox(label="[Optional] Autofill using arXiv URL/ID", placeholder="[Optional] Autofill using arXiv URL/ID", on_change=update_fields, change_elements=["title", "authors", "abstract"]),
108
- ],
109
  outputs=[gr.Textbox(label="Predicted Score"), gr.Textbox(label="Predicted Selection Probability")],
110
  title="Paper Selection Prediction",
111
  description="Predict if @_akhaliq will select your paper into Hugging Face papers. Enter the title, authors, and abstract of your paper, or enter an arXiv URL/ID.",
 
81
  score = probs[1].item()
82
  return score
83
 
84
+ def predict(title, authors, abstract, _):
85
  # Your model prediction logic here
86
  score = model_inference(title, authors, abstract)
87
 
 
98
  example_title = "WildChat: 1M ChatGPT Interaction Logs in the Wild"
99
  example_authors = "Wenting Zhao, Xiang Ren, Jack Hessel, Claire Cardie, Yejin Choi, Yuntian Deng"
100
  example_abstract = "Chatbots such as GPT-4 and ChatGPT are now serving millions of users. Despite their widespread use, there remains a lack of public datasets showcasing how these tools are used by a population of users in practice. To bridge this gap, we offered free access to ChatGPT for online users in exchange for their affirmative, consensual opt-in to anonymously collect their chat transcripts and request headers. From this, we compiled WildChat, a corpus of 1 million user-ChatGPT conversations, which consists of over 2.5 million interaction turns. We compare WildChat with other popular user-chatbot interaction datasets, and find that our dataset offers the most diverse user prompts, contains the largest number of languages, and presents the richest variety of potentially toxic use-cases for researchers to study. In addition to timestamped chat transcripts, we enrich the dataset with demographic data, including state, country, and hashed IP addresses, alongside request headers. This augmentation allows for more detailed analysis of user behaviors across different geographical regions and temporal dimensions. Finally, because it captures a broad range of use cases, we demonstrate the dataset’s potential utility in fine-tuning instruction-following models. WildChat is released at https://wildchat.allen.ai under AI2 ImpACT Licenses."
101
+
102
+ title_box = gr.Textbox(label="Paper Title", placeholder="Enter paper title", value=example_title)
103
+ author_box = gr.Textbox(label="Authors (separated by comma)", placeholder="Enter authors (separated by comma)", value=example_authors)
104
+ abstract_box = gr.TextArea(label="Abstract", placeholder="Enter abstract", value=example_abstract)
105
+ arxiv_box = gr.Textbox(label="[Optional] Autofill using arXiv URL/ID", placeholder="[Optional] Autofill using arXiv URL/ID", change_elements=["title", "authors", "abstract"])
106
+ arxiv_box.input(update_fields, inputs=[arxiv_box], outputs=[title_box, author_box, abstract_box])
107
+
108
  iface = gr.Interface(
109
  fn=predict,
110
+ inputs=[title_box, author_box, abstract_box, arxiv_box],
 
 
 
 
111
  outputs=[gr.Textbox(label="Predicted Score"), gr.Textbox(label="Predicted Selection Probability")],
112
  title="Paper Selection Prediction",
113
  description="Predict if @_akhaliq will select your paper into Hugging Face papers. Enter the title, authors, and abstract of your paper, or enter an arXiv URL/ID.",