k?" >>> encoding = processor(image, question, return_tensors="pt") >>> outputs = model(**encoding) >>> predicted_start_idx = outputs.start_logits.argmax(-1).item() >>> predicted_end_idx = outputs.end_logits.argmax(-1).item() >>> predicted_start_idx, predicted_end_idx (154, 287) >>> predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1] >>> predicted_answer = processor.tokenizer.decode(predicted_answer_tokens) >>> predicted_answer # results are not very good without further fine-tuning 'council mem - bers conducted by trrf treasurer philip g. kuehn to get answers which the public ... ``` ```python >>> target_start_index = torch.tensor([7]) >>> target_end_index = torch.tensor([14]) >>> outputs = model(**encoding, start_positions=target_start_index, end_positions=target_end_index) >>> predicted_answer_span_start = outputs.start_logits.argmax(-1).item() >>> predicted_answer_span_end = outputs.end_logits.argmax(-1).item() >>> predicted_answer_span_start, predicted_answer_span_end (154, 287) ``` NrE