requests.get(url, stream=True).raw) >>> text = "How many cats are in the picture?" >>> inputs = processor(images=image, text=text, return_tensors="tf") >>> outputs = model.generate(**inputs) >>> print(processor.decode(outputs[0], skip_special_tokens=True)) 2 ``` r