tf >>> from transformers import AutoTokenizer, RagRetriever, TFRagTokenForGeneration >>> tokenizer = AutoTokenizer.from_pretrained("facebook/rag-token-nq") >>> retriever = RagRetriever.from_pretrained( ... "facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True ... ) >>> # initialize with RagRetriever to do everything in one forward call >>> model = TFRagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever, from_pt=True) >>> input_dict = tokenizer.prepare_seq2seq_batch( ... "How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf" ... ) >>> outputs = model(input_dict, output_retrieved=True) >>> # or use retriever separately >>> # 1. Encode >>> input_ids = input_dict["input_ids"] >>> question_hidden_states = model.question_encoder(input_ids)[0] >>> # 2. Retrieve >>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.numpy(), return_tensors="tf") >>> doc_scores = tf.squeeze( ... tf.matmul( ... tf.expand_dims(question_hidden_states, axis=1), docs_dict["retrieved_doc_embeds"], transpose_b=True ... ), ... axis=1, ... ) >>> # 3. Forward to generator >>> outputs = model( ... inputs=None, ... context_input_ids=docs_dict["context_input_ids"], ... context_attention_mask=docs_dict["context_attention_mask"], ... doc_scores=doc_scores, ... decoder_input_ids=input_dict["labels"], ... ) >>> # or directly generate >>> generated = model.generate( ... context_input_ids=docs_dict["context_input_ids"], ... context_attention_mask=docs_dict["context_attention_mask"], ... doc_scores=doc_scores, ... ) >>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True) ```r„