ntized_states) >>> cosine_sim = torch.cosine_similarity(outputs.projected_states, outputs.projected_quantized_states, dim=-1) >>> # show that cosine similarity is much higher than random >>> cosine_sim[mask_time_indices.to(torch.bool)].mean() > 0.5 tensor(True) >>> # for contrastive loss training model should be put into train mode >>> model = model.train() >>> loss = model( ... input_values, mask_time_indices=mask_time_indices, sampled_negative_indices=sampled_negative_indices ... ).loss ```N)