io"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` a]