ics") >>> model = TimesformerForVideoClassification.from_pretrained("facebook/timesformer-base-finetuned-k400") >>> inputs = image_processor(list(video), return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) ... logits = outputs.logits >>> # model predicts one of the 400 Kinetics-400 classes >>> predicted_label = logits.argmax(-1).item() >>> print(model.config.id2label[predicted_label]) eating spaghetti ```NrĘ