nsors="pt").input_values # Batch size 1 >>> # compute masked indices >>> batch_size, raw_sequence_length = input_values.shape >>> sequence_length = model._get_feat_extract_output_lengths(raw_sequence_length).item() >>> mask_time_indices = _compute_mask_indices( ... shape=(batch_size, sequence_length), mask_prob=0.2, mask_length=2 ... ) >>> sampled_negative_indices = _sample_negative_indices( ... features_shape=(batch_size, sequence_length), ... num_negatives=model.config.num_negatives, ... mask_time_indices=mask_time_indices, ... ) >>> mask_time_indices = torch.tensor(data=mask_time_indices, device=input_values.device, dtype=torch.long) >>> sampled_negative_indices = torch.tensor( ... data=sampled_negative_indices, device=input_values.device, dtype=torch.long ... ) >>> with torch.no_grad(): ... outputs = model(input_values, mask_time_indices=mask_time_indices) >>> # compute cosine similarity between predicted (=projected_states) and target (=projected_quantized_states) >>> cosine_sim = torch.cosine_similarity(outputs.projected_states, outputs.projected_quantized_states, dim=-1) >>> # show that cosine similarity is much higher than random >>> cosine_sim[mask_time_indices.to(torch.bool)].mean() > 0.5 tensor(True) >>> # for contrastive loss training model should be put into train mode >>> model = model.train() >>> loss = model( ... input_values, mask_time_indices=mask_time_indices, sampled_negative_indices=sampled_negative_indices ... ).loss ```N)