model_encoder_ddp=DistributedDataParallel(model.encoder,device_ids=[rank],output_device=rank,find_unused_parameters=True)# Set find_unused_parameters to True because the ViT is not trained
model.encoder=DistributedDataParallel(model.encoder,device_ids=[rank],output_device=rank,find_unused_parameters=True)# Set find_unused_parameters to True because the ViT is not trained