-attention layers will be randomly initialized >>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2") >>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased") >>> # forward >>> input_ids = tokenizer.encode( ... "Hello, my dog is cute", add_special_tokens=True, return_tensors="tf" ... ) # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids) >>> # training >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids) >>> loss, logits = outputs.loss, outputs.logits >>> # save and load from pretrained >>> model.save_pretrained("bert2gpt2") >>> model = TFEncoderDecoderModel.from_pretrained("bert2gpt2") >>> # generation >>> generated = model.generate(input_ids, decoder_start_token_id=model.config.decoder.bos_token_id) ```Nc