Luigi commited on
Commit
32fe845
·
1 Parent(s): e5da3db

show classs names

Browse files
Files changed (2) hide show
  1. app.py +44 -11
  2. kinetics-400-class-names.csv +401 -0
app.py CHANGED
@@ -7,12 +7,39 @@ import numpy as np
7
  import matplotlib.pyplot as plt
8
  from io import BytesIO
9
  from PIL import Image
10
-
11
  from transformers import AutoFeatureExtractor, AutoModelForVideoClassification
12
 
13
  # Specify the model checkpoint for TimeSformer.
14
  MODEL_NAME = "facebook/timesformer-base-finetuned-k400"
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def extract_frames(video_path, num_frames=16, target_size=(224, 224)):
17
  """
18
  Extract up to `num_frames` uniformly-sampled frames from the video.
@@ -43,14 +70,23 @@ def classify_video(video_path):
43
  """
44
  Loads the TimeSformer model and feature extractor inside the GPU context,
45
  extracts frames from the video, runs inference, and returns:
46
- 1. A text string of the top 5 predicted action labels with their class IDs and probabilities.
47
- 2. A bar chart image showing the distribution over the top predictions.
 
48
  """
49
- # Load the feature extractor and model inside the GPU context.
50
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
51
  model = AutoModelForVideoClassification.from_pretrained(MODEL_NAME)
52
  model.eval()
53
 
 
 
 
 
 
 
 
 
54
  # Determine the device.
55
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
56
  model.to(device)
@@ -77,14 +113,11 @@ def classify_video(video_path):
77
  top_probs = top_probs.cpu().numpy()
78
  top_indices = top_indices.cpu().numpy()
79
 
80
- # Retrieve the label mapping from model config.
81
- id2label = model.config.id2label if hasattr(model.config, "id2label") else {}
82
-
83
- # Prepare textual results showing both ID and label.
84
  results = []
85
  x_labels = []
86
  for idx, prob in zip(top_indices, top_probs):
87
- label = id2label.get(str(idx), f"Class {idx}")
88
  results.append(f"ID {idx} - {label}: {prob:.3f}")
89
  x_labels.append(f"ID {idx}\n{label}")
90
  results_text = "\n".join(results)
@@ -124,8 +157,8 @@ demo = gr.Interface(
124
  title="Video Human Detection Demo using TimeSformer",
125
  description=(
126
  "Upload a video clip to see the top predicted human action labels using the TimeSformer model "
127
- "(fine-tuned on Kinetics-400). The output shows each prediction (with class ID and label) and a bar chart "
128
- "of the distribution over the top 5 predictions."
129
  )
130
  )
131
 
 
7
  import matplotlib.pyplot as plt
8
  from io import BytesIO
9
  from PIL import Image
10
+ import csv
11
  from transformers import AutoFeatureExtractor, AutoModelForVideoClassification
12
 
13
  # Specify the model checkpoint for TimeSformer.
14
  MODEL_NAME = "facebook/timesformer-base-finetuned-k400"
15
 
16
+ def load_kinetics_labels(csv_path="kinetics-400-class-names.csv"):
17
+ """
18
+ Loads the Kinetics-400 labels from a CSV file.
19
+ Expected CSV format:
20
+ id,name
21
+ 0,abseiling
22
+ 1,air drumming
23
+ ...
24
+ 399,zumba
25
+ Returns a dictionary mapping string IDs to label names.
26
+ """
27
+ labels = {}
28
+ try:
29
+ with open(csv_path, "r", encoding="utf-8") as f:
30
+ reader = csv.reader(f)
31
+ # Skip header if present
32
+ header = next(reader)
33
+ if "id" not in header[0].lower():
34
+ f.seek(0)
35
+ reader = csv.reader(f)
36
+ for row in reader:
37
+ if len(row) >= 2:
38
+ labels[row[0].strip()] = row[1].strip()
39
+ except Exception as e:
40
+ print("Error reading CSV mapping:", e)
41
+ return labels
42
+
43
  def extract_frames(video_path, num_frames=16, target_size=(224, 224)):
44
  """
45
  Extract up to `num_frames` uniformly-sampled frames from the video.
 
70
  """
71
  Loads the TimeSformer model and feature extractor inside the GPU context,
72
  extracts frames from the video, runs inference, and returns:
73
+ 1. A text string of the top 5 predicted actions (with class ID and descriptive label)
74
+ along with their probabilities.
75
+ 2. A bar chart (as a PIL Image) showing the prediction distribution.
76
  """
77
+ # Load the feature extractor and model.
78
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
79
  model = AutoModelForVideoClassification.from_pretrained(MODEL_NAME)
80
  model.eval()
81
 
82
+ # Load the complete Kinetics-400 mapping from CSV.
83
+ kinetics_id2label = load_kinetics_labels("kinetics-400-class-names.csv")
84
+ if kinetics_id2label:
85
+ print("Loaded complete Kinetics-400 mapping from CSV.")
86
+ else:
87
+ print("Warning: Could not load Kinetics-400 mapping; using default labels.")
88
+ model.config.id2label = kinetics_id2label if kinetics_id2label else model.config.id2label
89
+
90
  # Determine the device.
91
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
92
  model.to(device)
 
113
  top_probs = top_probs.cpu().numpy()
114
  top_indices = top_indices.cpu().numpy()
115
 
116
+ # Prepare textual results including both ID and label.
 
 
 
117
  results = []
118
  x_labels = []
119
  for idx, prob in zip(top_indices, top_probs):
120
+ label = kinetics_id2label.get(str(idx), f"Class {idx}")
121
  results.append(f"ID {idx} - {label}: {prob:.3f}")
122
  x_labels.append(f"ID {idx}\n{label}")
123
  results_text = "\n".join(results)
 
157
  title="Video Human Detection Demo using TimeSformer",
158
  description=(
159
  "Upload a video clip to see the top predicted human action labels using the TimeSformer model "
160
+ "(fine-tuned on Kinetics-400). The output displays each prediction's class ID and label, along with "
161
+ "a bar chart distribution of the top 5 predictions. A complete Kinetics-400 mapping is loaded from a CSV file."
162
  )
163
  )
164
 
kinetics-400-class-names.csv ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,name
2
+ 0,abseiling
3
+ 1,air drumming
4
+ 2,answering questions
5
+ 3,applauding
6
+ 4,applying cream
7
+ 5,archery
8
+ 6,arm wrestling
9
+ 7,arranging flowers
10
+ 8,assembling computer
11
+ 9,auctioning
12
+ 10,baby waking up
13
+ 11,baking cookies
14
+ 12,balloon blowing
15
+ 13,bandaging
16
+ 14,barbequing
17
+ 15,bartending
18
+ 16,beatboxing
19
+ 17,bee keeping
20
+ 18,belly dancing
21
+ 19,bench pressing
22
+ 20,bending back
23
+ 21,bending metal
24
+ 22,biking through snow
25
+ 23,blasting sand
26
+ 24,blowing glass
27
+ 25,blowing leaves
28
+ 26,blowing nose
29
+ 27,blowing out candles
30
+ 28,bobsledding
31
+ 29,bookbinding
32
+ 30,bouncing on trampoline
33
+ 31,bowling
34
+ 32,braiding hair
35
+ 33,breading or breadcrumbing
36
+ 34,breakdancing
37
+ 35,brush painting
38
+ 36,brushing hair
39
+ 37,brushing teeth
40
+ 38,building cabinet
41
+ 39,building shed
42
+ 40,bungee jumping
43
+ 41,busking
44
+ 42,canoeing or kayaking
45
+ 43,capoeira
46
+ 44,carrying baby
47
+ 45,cartwheeling
48
+ 46,carving pumpkin
49
+ 47,catching fish
50
+ 48,catching or throwing baseball
51
+ 49,catching or throwing frisbee
52
+ 50,catching or throwing softball
53
+ 51,celebrating
54
+ 52,changing oil
55
+ 53,changing wheel
56
+ 54,checking tires
57
+ 55,cheerleading
58
+ 56,chopping wood
59
+ 57,clapping
60
+ 58,clay pottery making
61
+ 59,clean and jerk
62
+ 60,cleaning floor
63
+ 61,cleaning gutters
64
+ 62,cleaning pool
65
+ 63,cleaning shoes
66
+ 64,cleaning toilet
67
+ 65,cleaning windows
68
+ 66,climbing a rope
69
+ 67,climbing ladder
70
+ 68,climbing tree
71
+ 69,contact juggling
72
+ 70,cooking chicken
73
+ 71,cooking egg
74
+ 72,cooking on campfire
75
+ 73,cooking sausages
76
+ 74,counting money
77
+ 75,country line dancing
78
+ 76,cracking neck
79
+ 77,crawling baby
80
+ 78,crossing river
81
+ 79,crying
82
+ 80,curling hair
83
+ 81,cutting nails
84
+ 82,cutting pineapple
85
+ 83,cutting watermelon
86
+ 84,dancing ballet
87
+ 85,dancing charleston
88
+ 86,dancing gangnam style
89
+ 87,dancing macarena
90
+ 88,deadlifting
91
+ 89,decorating the christmas tree
92
+ 90,digging
93
+ 91,dining
94
+ 92,disc golfing
95
+ 93,diving cliff
96
+ 94,dodgeball
97
+ 95,doing aerobics
98
+ 96,doing laundry
99
+ 97,doing nails
100
+ 98,drawing
101
+ 99,dribbling basketball
102
+ 100,drinking
103
+ 101,drinking beer
104
+ 102,drinking shots
105
+ 103,driving car
106
+ 104,driving tractor
107
+ 105,drop kicking
108
+ 106,drumming fingers
109
+ 107,dunking basketball
110
+ 108,dying hair
111
+ 109,eating burger
112
+ 110,eating cake
113
+ 111,eating carrots
114
+ 112,eating chips
115
+ 113,eating doughnuts
116
+ 114,eating hotdog
117
+ 115,eating ice cream
118
+ 116,eating spaghetti
119
+ 117,eating watermelon
120
+ 118,egg hunting
121
+ 119,exercising arm
122
+ 120,exercising with an exercise ball
123
+ 121,extinguishing fire
124
+ 122,faceplanting
125
+ 123,feeding birds
126
+ 124,feeding fish
127
+ 125,feeding goats
128
+ 126,filling eyebrows
129
+ 127,finger snapping
130
+ 128,fixing hair
131
+ 129,flipping pancake
132
+ 130,flying kite
133
+ 131,folding clothes
134
+ 132,folding napkins
135
+ 133,folding paper
136
+ 134,front raises
137
+ 135,frying vegetables
138
+ 136,garbage collecting
139
+ 137,gargling
140
+ 138,getting a haircut
141
+ 139,getting a tattoo
142
+ 140,giving or receiving award
143
+ 141,golf chipping
144
+ 142,golf driving
145
+ 143,golf putting
146
+ 144,grinding meat
147
+ 145,grooming dog
148
+ 146,grooming horse
149
+ 147,gymnastics tumbling
150
+ 148,hammer throw
151
+ 149,headbanging
152
+ 150,headbutting
153
+ 151,high jump
154
+ 152,high kick
155
+ 153,hitting baseball
156
+ 154,hockey stop
157
+ 155,holding snake
158
+ 156,hopscotch
159
+ 157,hoverboarding
160
+ 158,hugging
161
+ 159,hula hooping
162
+ 160,hurdling
163
+ 161,hurling (sport)
164
+ 162,ice climbing
165
+ 163,ice fishing
166
+ 164,ice skating
167
+ 165,ironing
168
+ 166,javelin throw
169
+ 167,jetskiing
170
+ 168,jogging
171
+ 169,juggling balls
172
+ 170,juggling fire
173
+ 171,juggling soccer ball
174
+ 172,jumping into pool
175
+ 173,jumpstyle dancing
176
+ 174,kicking field goal
177
+ 175,kicking soccer ball
178
+ 176,kissing
179
+ 177,kitesurfing
180
+ 178,knitting
181
+ 179,krumping
182
+ 180,laughing
183
+ 181,laying bricks
184
+ 182,long jump
185
+ 183,lunge
186
+ 184,making a cake
187
+ 185,making a sandwich
188
+ 186,making bed
189
+ 187,making jewelry
190
+ 188,making pizza
191
+ 189,making snowman
192
+ 190,making sushi
193
+ 191,making tea
194
+ 192,marching
195
+ 193,massaging back
196
+ 194,massaging feet
197
+ 195,massaging legs
198
+ 196,massaging person's head
199
+ 197,milking cow
200
+ 198,mopping floor
201
+ 199,motorcycling
202
+ 200,moving furniture
203
+ 201,mowing lawn
204
+ 202,news anchoring
205
+ 203,opening bottle
206
+ 204,opening present
207
+ 205,paragliding
208
+ 206,parasailing
209
+ 207,parkour
210
+ 208,passing American football (in game)
211
+ 209,passing American football (not in game)
212
+ 210,peeling apples
213
+ 211,peeling potatoes
214
+ 212,petting animal (not cat)
215
+ 213,petting cat
216
+ 214,picking fruit
217
+ 215,planting trees
218
+ 216,plastering
219
+ 217,playing accordion
220
+ 218,playing badminton
221
+ 219,playing bagpipes
222
+ 220,playing basketball
223
+ 221,playing bass guitar
224
+ 222,playing cards
225
+ 223,playing cello
226
+ 224,playing chess
227
+ 225,playing clarinet
228
+ 226,playing controller
229
+ 227,playing cricket
230
+ 228,playing cymbals
231
+ 229,playing didgeridoo
232
+ 230,playing drums
233
+ 231,playing flute
234
+ 232,playing guitar
235
+ 233,playing harmonica
236
+ 234,playing harp
237
+ 235,playing ice hockey
238
+ 236,playing keyboard
239
+ 237,playing kickball
240
+ 238,playing monopoly
241
+ 239,playing organ
242
+ 240,playing paintball
243
+ 241,playing piano
244
+ 242,playing poker
245
+ 243,playing recorder
246
+ 244,playing saxophone
247
+ 245,playing squash or racquetball
248
+ 246,playing tennis
249
+ 247,playing trombone
250
+ 248,playing trumpet
251
+ 249,playing ukulele
252
+ 250,playing violin
253
+ 251,playing volleyball
254
+ 252,playing xylophone
255
+ 253,pole vault
256
+ 254,presenting weather forecast
257
+ 255,pull ups
258
+ 256,pumping fist
259
+ 257,pumping gas
260
+ 258,punching bag
261
+ 259,punching person (boxing)
262
+ 260,push up
263
+ 261,pushing car
264
+ 262,pushing cart
265
+ 263,pushing wheelchair
266
+ 264,reading book
267
+ 265,reading newspaper
268
+ 266,recording music
269
+ 267,riding a bike
270
+ 268,riding camel
271
+ 269,riding elephant
272
+ 270,riding mechanical bull
273
+ 271,riding mountain bike
274
+ 272,riding mule
275
+ 273,riding or walking with horse
276
+ 274,riding scooter
277
+ 275,riding unicycle
278
+ 276,ripping paper
279
+ 277,robot dancing
280
+ 278,rock climbing
281
+ 279,rock scissors paper
282
+ 280,roller skating
283
+ 281,running on treadmill
284
+ 282,sailing
285
+ 283,salsa dancing
286
+ 284,sanding floor
287
+ 285,scrambling eggs
288
+ 286,scuba diving
289
+ 287,setting table
290
+ 288,shaking hands
291
+ 289,shaking head
292
+ 290,sharpening knives
293
+ 291,sharpening pencil
294
+ 292,shaving head
295
+ 293,shaving legs
296
+ 294,shearing sheep
297
+ 295,shining shoes
298
+ 296,shooting basketball
299
+ 297,shooting goal (soccer)
300
+ 298,shot put
301
+ 299,shoveling snow
302
+ 300,shredding paper
303
+ 301,shuffling cards
304
+ 302,side kick
305
+ 303,sign language interpreting
306
+ 304,singing
307
+ 305,situp
308
+ 306,skateboarding
309
+ 307,ski jumping
310
+ 308,skiing (not slalom or crosscountry)
311
+ 309,skiing crosscountry
312
+ 310,skiing slalom
313
+ 311,skipping rope
314
+ 312,skydiving
315
+ 313,slacklining
316
+ 314,slapping
317
+ 315,sled dog racing
318
+ 316,smoking
319
+ 317,smoking hookah
320
+ 318,snatch weight lifting
321
+ 319,sneezing
322
+ 320,sniffing
323
+ 321,snorkeling
324
+ 322,snowboarding
325
+ 323,snowkiting
326
+ 324,snowmobiling
327
+ 325,somersaulting
328
+ 326,spinning poi
329
+ 327,spray painting
330
+ 328,spraying
331
+ 329,springboard diving
332
+ 330,squat
333
+ 331,sticking tongue out
334
+ 332,stomping grapes
335
+ 333,stretching arm
336
+ 334,stretching leg
337
+ 335,strumming guitar
338
+ 336,surfing crowd
339
+ 337,surfing water
340
+ 338,sweeping floor
341
+ 339,swimming backstroke
342
+ 340,swimming breast stroke
343
+ 341,swimming butterfly stroke
344
+ 342,swing dancing
345
+ 343,swinging legs
346
+ 344,swinging on something
347
+ 345,sword fighting
348
+ 346,tai chi
349
+ 347,taking a shower
350
+ 348,tango dancing
351
+ 349,tap dancing
352
+ 350,tapping guitar
353
+ 351,tapping pen
354
+ 352,tasting beer
355
+ 353,tasting food
356
+ 354,testifying
357
+ 355,texting
358
+ 356,throwing axe
359
+ 357,throwing ball
360
+ 358,throwing discus
361
+ 359,tickling
362
+ 360,tobogganing
363
+ 361,tossing coin
364
+ 362,tossing salad
365
+ 363,training dog
366
+ 364,trapezing
367
+ 365,trimming or shaving beard
368
+ 366,trimming trees
369
+ 367,triple jump
370
+ 368,tying bow tie
371
+ 369,tying knot (not on a tie)
372
+ 370,tying tie
373
+ 371,unboxing
374
+ 372,unloading truck
375
+ 373,using computer
376
+ 374,using remote controller (not gaming)
377
+ 375,using segway
378
+ 376,vault
379
+ 377,waiting in line
380
+ 378,walking the dog
381
+ 379,washing dishes
382
+ 380,washing feet
383
+ 381,washing hair
384
+ 382,washing hands
385
+ 383,water skiing
386
+ 384,water sliding
387
+ 385,watering plants
388
+ 386,waxing back
389
+ 387,waxing chest
390
+ 388,waxing eyebrows
391
+ 389,waxing legs
392
+ 390,weaving basket
393
+ 391,welding
394
+ 392,whistling
395
+ 393,windsurfing
396
+ 394,wrapping present
397
+ 395,wrestling
398
+ 396,writing
399
+ 397,yawning
400
+ 398,yoga
401
+ 399,zumba