HowardZhangdqs
commited on
Commit
·
ccb6afe
1
Parent(s):
188ab1e
懒得写commit了
Browse files- app-dev.py +14 -0
- app.py +4 -176
- assets.py +1 -0
- css/interface.css +39 -9
- css/table.css +46 -0
- date.py +81 -0
- fetch_paper.py +33 -11
- interface.py +282 -0
- sorter.py +6 -6
app-dev.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from interface import create_interface
|
4 |
+
|
5 |
+
gr.close_all()
|
6 |
+
|
7 |
+
demo = create_interface()
|
8 |
+
|
9 |
+
if __name__ == "__main__":
|
10 |
+
demo.launch(
|
11 |
+
server_name="localhost",
|
12 |
+
server_port=7860,
|
13 |
+
# share=True
|
14 |
+
)
|
app.py
CHANGED
@@ -1,182 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
-
from datetime import datetime, timedelta
|
3 |
-
from fetch_paper import fetch_papers, fetch_papers_with_daterange
|
4 |
-
from sorter import sort_by_upvotes
|
5 |
|
|
|
6 |
|
7 |
-
|
8 |
-
"""格式化作者信息"""
|
9 |
-
if not author:
|
10 |
-
return ""
|
11 |
-
hidden_status = "(隐藏)" if author.hidden else ""
|
12 |
-
if author.name:
|
13 |
-
return f"<a href='https://scholar.google.com/citations?view_op=search_authors&mauthors={author.name.replace(' ', '+')}'>{author.name}</a>{hidden_status}"
|
14 |
-
return f"匿名作者{hidden_status}"
|
15 |
-
|
16 |
-
|
17 |
-
def format_paper_info(article):
|
18 |
-
"""生成论文展示的 HTML 内容"""
|
19 |
-
if not article.paper:
|
20 |
-
return "论文信息缺失"
|
21 |
-
|
22 |
-
info = []
|
23 |
-
# 标题部分
|
24 |
-
info.append(f"<h2>{article.title or '无标题论文'}</h2>")
|
25 |
-
|
26 |
-
# 缩略图
|
27 |
-
if article.thumbnail:
|
28 |
-
info.append(f"<p><img src='{article.thumbnail}' style='max-width: 30em; width: 100%; margin: auto'/></p>")
|
29 |
-
|
30 |
-
# 基本信息
|
31 |
-
info.append(f"<p><strong>论文 ID</strong>:<a href='https://huggingface.co/papers/{article.paper.id}'>{article.paper.id or '未知'}</a></p>")
|
32 |
-
info.append(f"<p><strong>发布时间</strong>:{article.paper.publishedAt.strftime('%Y-%m-%d %H:%M') if article.paper.publishedAt else '未知'}</p>")
|
33 |
-
|
34 |
-
# 作者信息
|
35 |
-
authors = "、".join([format_author(a) for a in article.paper.authors]) if article.paper.authors else "作者信息暂缺"
|
36 |
-
info.append(f"<p><strong>作者</strong>:{authors}</p>")
|
37 |
-
|
38 |
-
# 摘要
|
39 |
-
if article.paper.summary:
|
40 |
-
summary = article.paper.summary.replace('{{', '{').replace('}}', '}').replace('\n', ' ')
|
41 |
-
info.append(f"<h3>摘要</h3><p>{summary}</p>")
|
42 |
-
|
43 |
-
# 讨论信息
|
44 |
-
info.append(f"<p><strong>点赞数</strong>:{article.paper.upvotes or 0}<span style='margin-left: .5rem'></span>")
|
45 |
-
info.append(f"<strong>评论数</strong>:{article.numComments or 0}</p>")
|
46 |
-
if article.paper.discussionId:
|
47 |
-
info.append(f"<a href='https://huggingface.co/papers/{article.paper.id}/discussion/{article.paper.discussionId}'>进入讨论</a></p>")
|
48 |
-
|
49 |
-
# 提交者信息
|
50 |
-
if article.submittedBy:
|
51 |
-
submitter = article.submittedBy
|
52 |
-
info.append(f"<hr><p><strong>提交者</strong>: ")
|
53 |
-
info.append(
|
54 |
-
f"<span><img src='{submitter.avatarUrl}' class='author' /></span>{submitter.fullname}(<a href='https://huggingface.co/{submitter.name}'>@{submitter.name}</a>) ")
|
55 |
-
info.append(f"粉丝数:{submitter.followerCount or 0}</p>")
|
56 |
-
|
57 |
-
return "".join(info)
|
58 |
-
|
59 |
-
|
60 |
-
def generate_table_html(papers):
|
61 |
-
"""生成带可点击标题的表格 HTML"""
|
62 |
-
html = ['<table class="paper-table"><tr><th>标题</th><th>👍点赞</th><th>💬评论</th><th>📅日期</th></tr>']
|
63 |
-
|
64 |
-
for article in papers:
|
65 |
-
title = article.title or "无标题"
|
66 |
-
upvotes = article.paper.upvotes or 0
|
67 |
-
comments = article.numComments or 0
|
68 |
-
date = article.paper.publishedAt.strftime("%Y-%m-%d") if article.paper.publishedAt else "未知"
|
69 |
-
paper_id = article.paper.id
|
70 |
-
|
71 |
-
row = f"""
|
72 |
-
<tr>
|
73 |
-
<td><a class="paper-title" href="javascript:void(0)" onclick="showDetail('{paper_id}')">{title}</a></td>
|
74 |
-
<td>{upvotes}</td>
|
75 |
-
<td>{comments}</td>
|
76 |
-
<td>{date}</td>
|
77 |
-
</tr>
|
78 |
-
"""
|
79 |
-
html.append(row)
|
80 |
-
|
81 |
-
html.append("</table>")
|
82 |
-
return "".join(html)
|
83 |
-
|
84 |
-
|
85 |
-
def build_html(papers):
|
86 |
-
# 将所有的papers转换为一个html字符串,每个paper用一个div包裹,div内部包含paper的信息,div的id为paper的id
|
87 |
-
html = ""
|
88 |
-
for article in papers:
|
89 |
-
article_html = format_paper_info(article)
|
90 |
-
html += f"<div id='smartflow-paper-{article.paper.id.replace('.', '-')}' style='display: none'>{article_html}</div>"
|
91 |
-
return html
|
92 |
-
|
93 |
-
|
94 |
-
def query_papers(start_date_str, end_date_str):
|
95 |
-
"""处理日期查询"""
|
96 |
-
try:
|
97 |
-
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
|
98 |
-
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
|
99 |
-
papers = fetch_papers_with_daterange(start_date, end_date)
|
100 |
-
papers = sort_by_upvotes(papers)
|
101 |
-
return generate_table_html(papers), build_html(papers)
|
102 |
-
except Exception as e:
|
103 |
-
print(f"查询出错: {e}")
|
104 |
-
return "<p>⚠️ 查询失败,请检查日期格式(YYYY-MM-DD)</p>", "<p>⚠️ 查询失败,请检查日期格式(YYYY-MM-DD)</p>"
|
105 |
-
|
106 |
-
|
107 |
-
def show_detail(paper_id, papers):
|
108 |
-
"""显示论文详情"""
|
109 |
-
if not papers:
|
110 |
-
return "请先进行查询"
|
111 |
-
|
112 |
-
return build_html(papers)
|
113 |
-
|
114 |
-
|
115 |
-
# CSS 样式(可放入单独文件)
|
116 |
-
custom_css = """
|
117 |
-
.paper-table { width: 100%; border-collapse: collapse; }
|
118 |
-
.paper-table td { padding: 12px; border-bottom: 1px solid #ddd; }
|
119 |
-
.paper-table th { font-weight: bold; background: #f9f9f920; }
|
120 |
-
.paper-table tr:hover { background: #f9f9f920; }
|
121 |
-
.paper-title { color: #1a73e8; cursor: pointer; text-decoration: none !important; }
|
122 |
-
.paper-title:hover { text-decoration: underline !important; }
|
123 |
-
.paper-table td:nth-child(2), .paper-table td:nth-child(3), .paper-table td:nth-child(4) { text-align: center; }
|
124 |
-
.paper-table th:nth-child(2), .paper-table th:nth-child(3), .paper-table th:nth-child(4) { text-align: center; }
|
125 |
-
.detail-area { margin-top: 20px; padding: 20px; border: 1px solid #ddd; border-radius: 5px; }
|
126 |
-
"""
|
127 |
-
|
128 |
-
custom_js = """
|
129 |
-
function showDetail(paperId) {
|
130 |
-
// 隐藏 smartflow-paper-paperId 的所有兄弟节点
|
131 |
-
var siblings = document.querySelectorAll(`div[id^='smartflow-paper-']:not(#smartflow-paper-${paperId.replace('.', '-')})`);
|
132 |
-
siblings.forEach(sibling => sibling.style.display = 'none');
|
133 |
-
|
134 |
-
// 显示当前节点
|
135 |
-
var paper = document.getElementById(`smartflow-paper-${paperId.replace('.', '-')}`);
|
136 |
-
if (paper) {
|
137 |
-
paper.style.display = 'block';
|
138 |
-
}
|
139 |
-
}
|
140 |
-
"""
|
141 |
-
|
142 |
-
|
143 |
-
def create_interface():
|
144 |
-
"""创建新的界面布局"""
|
145 |
-
with gr.Blocks(title="Hugging Face Daily Paper", css=custom_css, head=f"<script>{custom_js}</script>") as app:
|
146 |
-
|
147 |
-
# 主界面
|
148 |
-
gr.Markdown("# 📚 Hugging Face Daily Paper")
|
149 |
-
|
150 |
-
# 查询控制区
|
151 |
-
with gr.Row():
|
152 |
-
start_date = gr.Textbox(label="起始日期", placeholder="YYYY-MM-DD", value=datetime.now().strftime("%Y-%m-%d"))
|
153 |
-
end_date = gr.Textbox(label="结束日期", placeholder="YYYY-MM-DD", value=datetime.now().strftime("%Y-%m-%d"))
|
154 |
-
query_btn = gr.Button("🔍 查询", variant="primary")
|
155 |
-
|
156 |
-
# 结果显示区
|
157 |
-
with gr.Column(visible=True):
|
158 |
-
results_html = gr.HTML(label="查询结果")
|
159 |
-
|
160 |
-
# 论文详情区
|
161 |
-
with gr.Column(visible=True, elem_classes="detail-area"):
|
162 |
-
gr.Markdown("## 论文详情")
|
163 |
-
detail_html = gr.HTML(elem_id="detail-html")
|
164 |
-
|
165 |
-
# 事件处理
|
166 |
-
query_btn.click(
|
167 |
-
fn=query_papers,
|
168 |
-
inputs=[start_date, end_date],
|
169 |
-
outputs=[results_html, detail_html]
|
170 |
-
)
|
171 |
-
|
172 |
-
return app
|
173 |
|
|
|
174 |
|
175 |
if __name__ == "__main__":
|
176 |
-
|
177 |
-
app = create_interface()
|
178 |
-
app.launch(
|
179 |
-
# server_name="localhost",
|
180 |
-
# server_port=7860,
|
181 |
-
# share=True
|
182 |
-
)
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
|
3 |
+
from interface import create_interface
|
4 |
|
5 |
+
gr.close_all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
demo = create_interface()
|
8 |
|
9 |
if __name__ == "__main__":
|
10 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
assets.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
SVG_LINK = """<svg class="sf-svg" xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 15 15"><path fill="currentColor" d="M8.293 2.293a1 1 0 0 1 1.414 0l4.5 4.5a1 1 0 0 1 0 1.414l-4.5 4.5a1 1 0 0 1-1.414-1.414L11 8.5H1.5a1 1 0 0 1 0-2H11L8.293 3.707a1 1 0 0 1 0-1.414"/></svg>"""
|
css/interface.css
CHANGED
@@ -1,12 +1,42 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
4 |
}
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
}
|
|
|
1 |
+
.sf-svg {
|
2 |
+
width: 0.8rem;
|
3 |
+
height: 0.8rem;
|
4 |
+
display: inline;
|
5 |
+
margin-left: 0.1rem;
|
6 |
+
transform: rotate(-45deg);
|
7 |
}
|
8 |
|
9 |
+
.sf-svg path,
|
10 |
+
.sf-svg {
|
11 |
+
color: inherit;
|
12 |
+
}
|
13 |
+
|
14 |
+
.sf-button {
|
15 |
+
background: black;
|
16 |
+
border: 1px solid #f9f9f9;
|
17 |
+
border-radius: 6px;
|
18 |
+
padding: 0 .5rem;
|
19 |
+
cursor: pointer;
|
20 |
+
color: white;
|
21 |
+
font-weight: bold;
|
22 |
+
text-align: center;
|
23 |
+
text-decoration: none;
|
24 |
+
display: inline-block;
|
25 |
+
margin: 0.5rem 0;
|
26 |
+
transition: background 0.3s, color 0.3s;
|
27 |
+
margin: 5px;
|
28 |
+
}
|
29 |
+
|
30 |
+
.sf-button:hover {
|
31 |
+
background: #1a73e8;
|
32 |
+
color: #f9f9f9;
|
33 |
+
}
|
34 |
+
|
35 |
+
.sf-button:active {
|
36 |
+
background: #f9f9f9;
|
37 |
+
color: #1a73e8;
|
38 |
+
}
|
39 |
+
|
40 |
+
hr {
|
41 |
+
margin: 1rem 0;
|
42 |
}
|
css/table.css
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.paper-table {
|
2 |
+
width: 100%;
|
3 |
+
border-collapse: collapse;
|
4 |
+
}
|
5 |
+
|
6 |
+
.paper-table td {
|
7 |
+
padding: 12px;
|
8 |
+
border-bottom: 1px solid #ddd;
|
9 |
+
}
|
10 |
+
|
11 |
+
.paper-table th {
|
12 |
+
font-weight: bold;
|
13 |
+
background: #f9f9f920;
|
14 |
+
}
|
15 |
+
|
16 |
+
.paper-table tr:hover {
|
17 |
+
background: #f9f9f920;
|
18 |
+
}
|
19 |
+
|
20 |
+
.paper-table tbody {
|
21 |
+
max-width: 400px;
|
22 |
+
}
|
23 |
+
|
24 |
+
.paper-title {
|
25 |
+
color: #1a73e8;
|
26 |
+
cursor: pointer;
|
27 |
+
text-decoration: none !important;
|
28 |
+
}
|
29 |
+
|
30 |
+
.paper-title:hover {
|
31 |
+
text-decoration: underline !important;
|
32 |
+
}
|
33 |
+
|
34 |
+
.paper-table td:nth-child(2),
|
35 |
+
.paper-table td:nth-child(3),
|
36 |
+
.paper-table td:nth-child(4),
|
37 |
+
.paper-table td:nth-child(5) {
|
38 |
+
text-align: center;
|
39 |
+
}
|
40 |
+
|
41 |
+
.paper-table th:nth-child(2),
|
42 |
+
.paper-table th:nth-child(3),
|
43 |
+
.paper-table th:nth-child(4),
|
44 |
+
.paper-table th:nth-child(5) {
|
45 |
+
text-align: center;
|
46 |
+
}
|
date.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import date as dt_date, timedelta
|
2 |
+
|
3 |
+
|
4 |
+
class Date:
|
5 |
+
def __init__(self, *args):
|
6 |
+
if len(args) == 0:
|
7 |
+
self._date = dt_date.today()
|
8 |
+
elif len(args) == 1:
|
9 |
+
s = args[0]
|
10 |
+
if isinstance(s, str):
|
11 |
+
if 'T' in s:
|
12 |
+
date_part = s.split('T')[0]
|
13 |
+
else:
|
14 |
+
date_part = s
|
15 |
+
self._date = dt_date.fromisoformat(date_part)
|
16 |
+
else:
|
17 |
+
raise TypeError("Invalid argument type. Expected string.")
|
18 |
+
elif len(args) == 3:
|
19 |
+
year, month, day = args
|
20 |
+
self._date = dt_date(year, month, day)
|
21 |
+
else:
|
22 |
+
raise TypeError("Date() requires 0, 1, or 3 arguments")
|
23 |
+
|
24 |
+
def __eq__(self, other):
|
25 |
+
if isinstance(other, Date):
|
26 |
+
return self._date == other._date
|
27 |
+
return False
|
28 |
+
|
29 |
+
def __add__(self, days):
|
30 |
+
if isinstance(days, int):
|
31 |
+
new_date = self._date + timedelta(days=days)
|
32 |
+
return Date(new_date.year, new_date.month, new_date.day)
|
33 |
+
else:
|
34 |
+
return NotImplemented
|
35 |
+
|
36 |
+
__radd__ = __add__
|
37 |
+
|
38 |
+
def __repr__(self):
|
39 |
+
return f"Date({self._date.year}, {self._date.month}, {self._date.day})"
|
40 |
+
|
41 |
+
def __str__(self):
|
42 |
+
return f"{self._date.year:04}-{self._date.month:02}-{self._date.day:02}"
|
43 |
+
|
44 |
+
# 小于
|
45 |
+
def __lt__(self, other):
|
46 |
+
if isinstance(other, Date):
|
47 |
+
return self._date < other._date
|
48 |
+
return False
|
49 |
+
|
50 |
+
# 小于等于
|
51 |
+
def __le__(self, other):
|
52 |
+
if isinstance(other, Date):
|
53 |
+
return self._date <= other._date
|
54 |
+
return False
|
55 |
+
|
56 |
+
# 大于
|
57 |
+
def __gt__(self, other):
|
58 |
+
if isinstance(other, Date):
|
59 |
+
return self._date > other._date
|
60 |
+
return False
|
61 |
+
|
62 |
+
# 大于等于
|
63 |
+
def __ge__(self, other):
|
64 |
+
if isinstance(other, Date):
|
65 |
+
return self._date >= other._date
|
66 |
+
return False
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
date1 = Date() # 返回今天日期
|
71 |
+
date2 = Date("2025-01-28")
|
72 |
+
date3 = Date(2025, 1, 28)
|
73 |
+
date4 = Date("2025-01-31T13:33:38.548Z") # 删除所有时间信息,仅保留日期
|
74 |
+
print(date2 == date3) # True
|
75 |
+
|
76 |
+
date4 = Date("2025-01-29")
|
77 |
+
print(date2 + 1 == date4)
|
78 |
+
|
79 |
+
print(date1, date2, date3, date4)
|
80 |
+
|
81 |
+
print(date1 < date2) # False
|
fetch_paper.py
CHANGED
@@ -4,6 +4,9 @@ import requests
|
|
4 |
import datetime
|
5 |
import hashlib
|
6 |
import json
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
API_URL = "https://huggingface.co/api/daily_papers"
|
@@ -51,35 +54,54 @@ def fetch_papers():
|
|
51 |
|
52 |
|
53 |
def fetch_papers_with_date(date: datetime):
|
54 |
-
formatted_date = date
|
55 |
data = make_request(API_URL + "?date=" + formatted_date)
|
56 |
return [parse_article(item) for item in data]
|
57 |
|
58 |
|
59 |
-
def fetch_papers_with_daterange(start_date:
|
60 |
-
# return []
|
61 |
-
# 每天的数据都是独立的,所以只需要遍历日期范围即可
|
62 |
articles = []
|
63 |
current_date = start_date
|
|
|
|
|
64 |
while current_date <= end_date:
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
# 根据每个文章的.paper.id去重
|
71 |
unique_articles = {}
|
72 |
for article in articles:
|
73 |
if article.paper.id not in unique_articles:
|
74 |
unique_articles[article.paper.id] = article
|
75 |
|
|
|
|
|
76 |
return list(unique_articles.values())
|
77 |
|
78 |
|
79 |
if __name__ == "__main__":
|
80 |
from rich import print
|
81 |
-
start_date =
|
82 |
-
end_date =
|
83 |
articles = fetch_papers_with_daterange(start_date=start_date, end_date=end_date)
|
84 |
# print(articles)
|
85 |
print(f"Total articles: {len(articles)}")
|
|
|
4 |
import datetime
|
5 |
import hashlib
|
6 |
import json
|
7 |
+
from rich import print
|
8 |
+
from date import Date
|
9 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
10 |
|
11 |
|
12 |
API_URL = "https://huggingface.co/api/daily_papers"
|
|
|
54 |
|
55 |
|
56 |
def fetch_papers_with_date(date: datetime):
|
57 |
+
formatted_date = str(date)
|
58 |
data = make_request(API_URL + "?date=" + formatted_date)
|
59 |
return [parse_article(item) for item in data]
|
60 |
|
61 |
|
62 |
+
def fetch_papers_with_daterange(start_date: Date, end_date: Date):
|
|
|
|
|
63 |
articles = []
|
64 |
current_date = start_date
|
65 |
+
dates = []
|
66 |
+
|
67 |
while current_date <= end_date:
|
68 |
+
dates.append(current_date)
|
69 |
+
current_date += 1
|
70 |
+
|
71 |
+
def fetch_for_date(date):
|
72 |
+
print(date)
|
73 |
+
if date == Date():
|
74 |
+
print("Fetching papers for today")
|
75 |
+
return fetch_papers()
|
76 |
+
else:
|
77 |
+
print(f"Fetching papers for {date}")
|
78 |
+
return fetch_papers_with_date(date)
|
79 |
+
|
80 |
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
81 |
+
future_to_date = {executor.submit(fetch_for_date, date): date for date in dates}
|
82 |
+
for future in as_completed(future_to_date):
|
83 |
+
date = future_to_date[future]
|
84 |
+
try:
|
85 |
+
articles.extend(future.result())
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error fetching articles for date {date}: {e}")
|
88 |
+
|
89 |
+
# articles = [article for article in articles if (start_date <= Date(article.publishedAt.isoformat().split('T')[0]) <= end_date)]
|
90 |
|
|
|
91 |
unique_articles = {}
|
92 |
for article in articles:
|
93 |
if article.paper.id not in unique_articles:
|
94 |
unique_articles[article.paper.id] = article
|
95 |
|
96 |
+
print(f"Unique articles: {len(unique_articles)}")
|
97 |
+
|
98 |
return list(unique_articles.values())
|
99 |
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
from rich import print
|
103 |
+
start_date = Date(2025, 1, 21)
|
104 |
+
end_date = Date(2025, 2, 1)
|
105 |
articles = fetch_papers_with_daterange(start_date=start_date, end_date=end_date)
|
106 |
# print(articles)
|
107 |
print(f"Total articles: {len(articles)}")
|
interface.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import gradio as gr
|
3 |
+
from datetime import datetime, timedelta
|
4 |
+
from fetch_paper import fetch_papers_with_daterange
|
5 |
+
from sorter import sort_by_upvotes, sort_by_comments, sort_by_date
|
6 |
+
from date import Date
|
7 |
+
import assets
|
8 |
+
|
9 |
+
|
10 |
+
def format_author(author):
|
11 |
+
"""Format author information"""
|
12 |
+
if not author:
|
13 |
+
return ""
|
14 |
+
hidden_status = " (hidden)" if author.hidden else ""
|
15 |
+
if author.name:
|
16 |
+
return f"<a href='https://scholar.google.com/citations?view_op=search_authors&mauthors={author.name.replace(' ', '+')}' target='_blank'>{author.name}</a>"
|
17 |
+
return "Anonymous author"
|
18 |
+
|
19 |
+
|
20 |
+
def format_paper_info(article):
|
21 |
+
"""Generate HTML content for paper display"""
|
22 |
+
if not article.paper:
|
23 |
+
return "Paper information missing"
|
24 |
+
|
25 |
+
info = []
|
26 |
+
# Title section
|
27 |
+
info.append(f"<h2 class='sf-paper-title'>{article.title or 'Untitled Paper'}</h2>")
|
28 |
+
info.append(f"<p style='text-align: center'>")
|
29 |
+
info.append(f"<a href='https://huggingface.co/papers/{article.paper.id}' target='_blank' class='sf-button'>Hugging Face{assets.SVG_LINK}</a>")
|
30 |
+
info.append(f"<a href='https://arxiv.org/abs/{article.paper.id}' target='_blank' class='sf-button'>arXiv{assets.SVG_LINK}</a>")
|
31 |
+
info.append(f"<a href='https://arxiv.org/pdf/{article.paper.id}' target='_blank' class='sf-button'>PDF{assets.SVG_LINK}</a>")
|
32 |
+
info.append(f"</p>")
|
33 |
+
|
34 |
+
# Thumbnail
|
35 |
+
if article.thumbnail:
|
36 |
+
info.append(f"<p><img src='{article.thumbnail}' style='max-width: 30em; width: 100%; margin: auto'/></p>")
|
37 |
+
|
38 |
+
# Basic information
|
39 |
+
info.append(
|
40 |
+
f"<p><strong>Paper ID</strong>: <a href='https://huggingface.co/papers/{article.paper.id}' target='_blank'>{article.paper.id or 'Unknown'}</a></p>")
|
41 |
+
info.append(
|
42 |
+
f"<p><strong>Published At</strong>: {article.paper.publishedAt.strftime('%Y-%m-%d %H:%M') if article.paper.publishedAt else 'Unknown'}</p>")
|
43 |
+
|
44 |
+
# Author information
|
45 |
+
authors = ", ".join([format_author(a) for a in article.paper.authors]) if article.paper.authors else "Author information not available"
|
46 |
+
info.append(f"<p><strong>Authors</strong>: {authors}</p>")
|
47 |
+
|
48 |
+
# Summary
|
49 |
+
if article.paper.summary:
|
50 |
+
summary = article.paper.summary.replace('{{', '{').replace('}}', '}').replace('\n', ' ')
|
51 |
+
info.append(f"<h3>Summary</h3><p>{summary}</p>")
|
52 |
+
|
53 |
+
# Discussion information
|
54 |
+
info.append(f"<p><strong>Upvotes</strong>: {article.paper.upvotes or 0}<span style='margin-left: .5rem'></span>")
|
55 |
+
info.append(f"<strong>Comments</strong>: {article.numComments or 0}</p>")
|
56 |
+
if article.paper.discussionId:
|
57 |
+
info.append(
|
58 |
+
f"<a href='https://huggingface.co/papers/{article.paper.id}#community' target='_blank'>Join Discussion</a></p>")
|
59 |
+
|
60 |
+
# Submitter information
|
61 |
+
if article.submittedBy:
|
62 |
+
submitter = article.submittedBy
|
63 |
+
info.append(f"<hr><p><strong>Submitter</strong>: ")
|
64 |
+
avatar_url = submitter.avatarUrl if submitter.avatarUrl.startswith("http") else f"https://huggingface.co{submitter.avatarUrl}"
|
65 |
+
profile_url = f"https://huggingface.co/{submitter.name}"
|
66 |
+
info.append(
|
67 |
+
f"<span><img src='{avatar_url}' class='sf-author' /></span>{submitter.fullname}(<a href='{profile_url}' target='_blank'>@{submitter.name}</a>) ")
|
68 |
+
info.append(f"Followers: {submitter.followerCount or 0}</p>")
|
69 |
+
|
70 |
+
return "".join(info)
|
71 |
+
|
72 |
+
|
73 |
+
def generate_table_html(papers):
|
74 |
+
"""Generate table HTML with clickable titles and an extra column for arXiv abs link"""
|
75 |
+
html = ['<table class="paper-table"><thead><tr>'
|
76 |
+
'<th>Title</th>'
|
77 |
+
'<th>👍 Upvotes</th>'
|
78 |
+
'<th>💬 Comments</th>'
|
79 |
+
'<th>📅 Date</th>'
|
80 |
+
'<th>arXiv</th>'
|
81 |
+
'</tr></thead><tbody>']
|
82 |
+
|
83 |
+
for article in papers:
|
84 |
+
title = article.title or "Untitled"
|
85 |
+
upvotes = article.paper.upvotes or 0
|
86 |
+
comments = article.numComments or 0
|
87 |
+
date = article.paper.publishedAt.strftime("%Y-%m-%d") if article.paper.publishedAt else "Unknown"
|
88 |
+
paper_id = article.paper.id
|
89 |
+
|
90 |
+
# 构造 arXiv abs 链接
|
91 |
+
arxiv_abs_link = f"https://arxiv.org/abs/{paper_id}"
|
92 |
+
|
93 |
+
row = f"""
|
94 |
+
<tr>
|
95 |
+
<td><a class="paper-title" href="javascript:void(0)" onclick="showDetail('{paper_id}')">{title}</a></td>
|
96 |
+
<td>{upvotes}</td>
|
97 |
+
<td>{comments}</td>
|
98 |
+
<td>{date}</td>
|
99 |
+
<td><a href="{arxiv_abs_link}" target="_blank" class="sf-button">arXiv{assets.SVG_LINK}</a></td>
|
100 |
+
</tr>
|
101 |
+
"""
|
102 |
+
html.append(row)
|
103 |
+
|
104 |
+
html.append("</tbody></table>")
|
105 |
+
return "Click the paper title to view the details of the paper." + "".join(html)
|
106 |
+
|
107 |
+
|
108 |
+
def build_html(papers):
|
109 |
+
# Convert all papers to an HTML string, each paper wrapped in a div, with the div containing the paper's information, and the div's id being the paper's id
|
110 |
+
html = ""
|
111 |
+
for article in papers:
|
112 |
+
article_html = format_paper_info(article)
|
113 |
+
html += f"<div id='smartflow-paper-{article.paper.id.replace('.', '-')}' style='display: none'>{article_html}</div>"
|
114 |
+
return html
|
115 |
+
|
116 |
+
|
117 |
+
def query_papers(start_date_str: str, end_date_str: str, sort_method: str): # Added sort_method parameter
|
118 |
+
"""Handle date range query"""
|
119 |
+
try:
|
120 |
+
start_date = Date(start_date_str)
|
121 |
+
end_date = Date(end_date_str)
|
122 |
+
papers = fetch_papers_with_daterange(start_date, end_date)
|
123 |
+
|
124 |
+
# Sort papers based on the selected sorting method
|
125 |
+
if sort_method == "Sort by upvotes ascending":
|
126 |
+
papers = sort_by_upvotes(papers, reverse=False)
|
127 |
+
elif sort_method == "Sort by upvotes descending":
|
128 |
+
papers = sort_by_upvotes(papers, reverse=True)
|
129 |
+
elif sort_method == "Sort by comments ascending":
|
130 |
+
papers = sort_by_comments(papers, reverse=False)
|
131 |
+
elif sort_method == "Sort by comments descending":
|
132 |
+
papers = sort_by_comments(papers, reverse=True)
|
133 |
+
elif sort_method == "Sort by date ascending":
|
134 |
+
papers = sort_by_date(papers, reverse=False)
|
135 |
+
elif sort_method == "Sort by date descending":
|
136 |
+
papers = sort_by_date(papers, reverse=True)
|
137 |
+
|
138 |
+
return generate_table_html(papers), build_html(papers)
|
139 |
+
except Exception as e:
|
140 |
+
print(f"Query error: {e}")
|
141 |
+
return "<p>⚠️ Query failed, please check the date format (YYYY-MM-DD)</p>", "<p>⚠️ Query failed, please check the date format (YYYY-MM-DD)</p>"
|
142 |
+
|
143 |
+
|
144 |
+
def show_detail(paper_id, papers):
|
145 |
+
"""Show paper details"""
|
146 |
+
if not papers:
|
147 |
+
return "Please perform a query first"
|
148 |
+
|
149 |
+
return build_html(papers)
|
150 |
+
|
151 |
+
|
152 |
+
# CSS 样式(可放入单独文件)
|
153 |
+
custom_css = """
|
154 |
+
.detail-area { margin-top: 20px; padding: 20px; border: 1px solid #ddd; border-radius: 5px; }
|
155 |
+
.sf-paper-title { text-align: center; }
|
156 |
+
|
157 |
+
img.sf-author {
|
158 |
+
height: 1.3rem;
|
159 |
+
border: 1px solid #000;
|
160 |
+
vertical-align: middle;
|
161 |
+
border-radius: 50%;
|
162 |
+
display: inline;
|
163 |
+
margin: 0 0.1rem;
|
164 |
+
}
|
165 |
+
|
166 |
+
#paper-detail-area { display: none; }
|
167 |
+
#query-results-html { min-height: 100px; }
|
168 |
+
"""
|
169 |
+
|
170 |
+
# 遍历./css文件夹下的所有文件,将文件内容作为CSS样式添加到页面中
|
171 |
+
for css_file in Path("./css").glob("*.css"):
|
172 |
+
with open(css_file, "r") as f:
|
173 |
+
custom_css += "\n" + f.read() + "\n"
|
174 |
+
|
175 |
+
custom_js = """
|
176 |
+
function showDetail(paperId) {
|
177 |
+
// 隐藏 smartflow-paper-paperId 的所有兄弟节点
|
178 |
+
var siblings = document.querySelectorAll(`div[id^='smartflow-paper-']:not(#smartflow-paper-${paperId.replace('.', '-')})`);
|
179 |
+
siblings.forEach(sibling => sibling.style.display = 'none');
|
180 |
+
|
181 |
+
// 显示当前节点
|
182 |
+
var paper = document.getElementById(`smartflow-paper-${paperId.replace('.', '-')}`);
|
183 |
+
if (paper) {
|
184 |
+
paper.style.display = 'block';
|
185 |
+
}
|
186 |
+
|
187 |
+
document.getElementById("paper-detail-area").style.display = "block";
|
188 |
+
document.getElementById("paper-detail-area").scrollIntoView({
|
189 |
+
behavior: 'smooth'
|
190 |
+
});
|
191 |
+
}
|
192 |
+
"""
|
193 |
+
|
194 |
+
|
195 |
+
def create_interface():
|
196 |
+
"""Create a new interface layout"""
|
197 |
+
with gr.Blocks(title="Hugging Face Daily Paper", css=custom_css, head=f"<script>{custom_js}</script>") as app:
|
198 |
+
|
199 |
+
# Main interface
|
200 |
+
gr.Markdown("# 📚 Hugging Face Daily Paper")
|
201 |
+
|
202 |
+
# Query control area
|
203 |
+
with gr.Row():
|
204 |
+
with gr.Column():
|
205 |
+
with gr.Row():
|
206 |
+
start_date = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD", value=str(Date() + (-1)))
|
207 |
+
end_date = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD", value=str(Date() + (-1)))
|
208 |
+
with gr.Column():
|
209 |
+
with gr.Row():
|
210 |
+
today_btn = gr.Button("Today")
|
211 |
+
last_week_btn = gr.Button("Last Week")
|
212 |
+
last_month_btn = gr.Button("Last Month")
|
213 |
+
|
214 |
+
query_btn = gr.Button("🔍 Query", variant="primary")
|
215 |
+
|
216 |
+
with gr.Row():
|
217 |
+
|
218 |
+
# Add sorting method selection
|
219 |
+
sort_method = gr.Radio(
|
220 |
+
label="Sort Method",
|
221 |
+
choices=[
|
222 |
+
"Sort by upvotes descending",
|
223 |
+
"Sort by comments descending",
|
224 |
+
"Sort by date descending",
|
225 |
+
"Sort by upvotes ascending",
|
226 |
+
"Sort by comments ascending",
|
227 |
+
"Sort by date ascending",
|
228 |
+
],
|
229 |
+
value="Sort by upvotes descending",
|
230 |
+
)
|
231 |
+
|
232 |
+
# Results display area
|
233 |
+
with gr.Column(visible=True):
|
234 |
+
results_html = gr.HTML(label="Query Results", elem_id="query-results-html")
|
235 |
+
|
236 |
+
# Paper details area
|
237 |
+
with gr.Column(visible=True, elem_classes="detail-area", elem_id="paper-detail-area"):
|
238 |
+
gr.Markdown("## Paper Details")
|
239 |
+
detail_html = gr.HTML(elem_id="detail-html")
|
240 |
+
|
241 |
+
# Event handling
|
242 |
+
query_btn.click(
|
243 |
+
fn=query_papers,
|
244 |
+
inputs=[start_date, end_date, sort_method],
|
245 |
+
outputs=[results_html, detail_html]
|
246 |
+
)
|
247 |
+
|
248 |
+
sort_method.change(
|
249 |
+
fn=query_papers,
|
250 |
+
inputs=[start_date, end_date, sort_method],
|
251 |
+
outputs=[results_html, detail_html]
|
252 |
+
)
|
253 |
+
|
254 |
+
# Add button event handling
|
255 |
+
today_btn.click(
|
256 |
+
fn=lambda: (str(Date()), str(Date())),
|
257 |
+
outputs=[start_date, end_date]
|
258 |
+
).then(
|
259 |
+
fn=query_papers,
|
260 |
+
inputs=[start_date, end_date, sort_method],
|
261 |
+
outputs=[results_html, detail_html]
|
262 |
+
)
|
263 |
+
|
264 |
+
last_week_btn.click(
|
265 |
+
fn=lambda: (str(Date() + (-7)), str(Date())),
|
266 |
+
outputs=[start_date, end_date]
|
267 |
+
).then(
|
268 |
+
fn=query_papers,
|
269 |
+
inputs=[start_date, end_date, sort_method],
|
270 |
+
outputs=[results_html, detail_html]
|
271 |
+
)
|
272 |
+
|
273 |
+
last_month_btn.click(
|
274 |
+
fn=lambda: (str(Date() + (-30)), str(Date())),
|
275 |
+
outputs=[start_date, end_date]
|
276 |
+
).then(
|
277 |
+
fn=query_papers,
|
278 |
+
inputs=[start_date, end_date, sort_method],
|
279 |
+
outputs=[results_html, detail_html]
|
280 |
+
)
|
281 |
+
|
282 |
+
return app
|
sorter.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
from parser import Article
|
2 |
|
3 |
|
4 |
-
def sort_by_date(articles):
|
5 |
-
return sorted(articles, key=lambda x: x.publishedAt, reverse=
|
6 |
|
7 |
|
8 |
-
def sort_by_upvotes(articles):
|
9 |
-
return sorted(articles, key=lambda x: x.paper.upvotes, reverse=
|
10 |
|
11 |
|
12 |
-
def sort_by_comments(articles):
|
13 |
-
return sorted(articles, key=lambda x: x.numComments, reverse=
|
14 |
|
15 |
|
16 |
if __name__ == "__main__":
|
|
|
1 |
from parser import Article
|
2 |
|
3 |
|
4 |
+
def sort_by_date(articles, reverse=True):
|
5 |
+
return sorted(articles, key=lambda x: x.publishedAt, reverse=reverse)
|
6 |
|
7 |
|
8 |
+
def sort_by_upvotes(articles, reverse=True):
|
9 |
+
return sorted(articles, key=lambda x: x.paper.upvotes, reverse=reverse)
|
10 |
|
11 |
|
12 |
+
def sort_by_comments(articles, reverse=True):
|
13 |
+
return sorted(articles, key=lambda x: x.numComments, reverse=reverse)
|
14 |
|
15 |
|
16 |
if __name__ == "__main__":
|