Skip to content
Snippets Groups Projects
Commit 02f95633 authored by Schneider Leo's avatar Schneider Leo
Browse files

del raysesult

parent b844726e
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 137 deletions
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1284.8956039308564,1719330178,checkpoint_000000,True,False,1,25a9f43a,2024-06-25_17-42-59,79.39262270927429,79.39262270927429,2195253,r8i6n2,10.159.28.60,79.39262270927429,1
517.7363889198604,1719330947,checkpoint_000001,True,False,2,25a9f43a,2024-06-25_17-55-47,77.99471831321716,157.38734102249146,2195253,r8i6n2,10.159.28.60,77.99471831321716,1
424.9116310660295,1719331554,checkpoint_000002,True,False,3,25a9f43a,2024-06-25_18-05-54,94.57213068008423,251.95947170257568,2195253,r8i6n2,10.159.28.60,94.57213068008423,1
381.2108863169753,1719331634,checkpoint_000003,True,False,4,25a9f43a,2024-06-25_18-07-14,80.07035231590271,332.0298240184784,2195253,r8i6n2,10.159.28.60,174.64248299598694,2
{"loss": 1284.8956039308564, "timestamp": 1719330178, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "25a9f43a", "date": "2024-06-25_17-42-59", "time_this_iter_s": 79.39262270927429, "time_total_s": 79.39262270927429, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7474706763430264, "lr": 0.00045083297498032556, "batch_size": 1024}, "time_since_restore": 79.39262270927429, "iterations_since_restore": 1}
{"loss": 517.7363889198604, "timestamp": 1719330947, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "25a9f43a", "date": "2024-06-25_17-55-47", "time_this_iter_s": 77.99471831321716, "time_total_s": 157.38734102249146, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7474706763430264, "lr": 0.00045083297498032556, "batch_size": 1024}, "time_since_restore": 77.99471831321716, "iterations_since_restore": 1}
{"loss": 424.9116310660295, "timestamp": 1719331554, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "25a9f43a", "date": "2024-06-25_18-05-54", "time_this_iter_s": 94.57213068008423, "time_total_s": 251.95947170257568, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7474706763430264, "lr": 0.00045083297498032556, "batch_size": 1024}, "time_since_restore": 94.57213068008423, "iterations_since_restore": 1}
{"loss": 381.2108863169753, "timestamp": 1719331634, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "25a9f43a", "date": "2024-06-25_18-07-14", "time_this_iter_s": 80.07035231590271, "time_total_s": 332.0298240184784, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7474706763430264, "lr": 0.00045083297498032556, "batch_size": 1024}, "time_since_restore": 174.64248299598694, "iterations_since_restore": 2}
{
"batch_size": 2048,
"decoder_int_ff": 2048,
"decoder_int_num_layer": 8,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 1,
"drop_rate": 0.3256301932631026,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 1,
"lr": 0.0018178629337167497,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
5061.415592704232,1719326792,checkpoint_000000,True,False,1,25b42194,2024-06-25_16-46-32,194.36064219474792,194.36064219474792,122466,r3i5n6,10.159.8.159,194.36064219474792,1
{"loss": 5061.415592704232, "timestamp": 1719326792, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "25b42194", "date": "2024-06-25_16-46-32", "time_this_iter_s": 194.36064219474792, "time_total_s": 194.36064219474792, "pid": 122466, "hostname": "r3i5n6", "node_ip": "10.159.8.159", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 8, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 2048, "n_head": 16, "drop_rate": 0.3256301932631026, "lr": 0.0018178629337167497, "batch_size": 2048}, "time_since_restore": 194.36064219474792, "iterations_since_restore": 1}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 4,
"drop_rate": 0.2474625892184107,
"embedding_dim": 1024,
"encoder_ff": 1024,
"encoder_num_layer": 2,
"lr": 0.09815210368028619,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
5146.327555979331,1719552388,checkpoint_000000,True,False,1,261349af,2024-06-28_07-26-29,641.6589081287384,641.6589081287384,242786,r8i6n8,10.159.28.66,641.6589081287384,1
5048.819774237205,1719553016,checkpoint_000001,True,False,2,261349af,2024-06-28_07-36-56,627.6112632751465,1269.270171403885,242786,r8i6n8,10.159.28.66,1269.270171403885,2
4911.544510488435,1719553644,checkpoint_000002,True,False,3,261349af,2024-06-28_07-47-24,627.9598400592804,1897.2300114631653,242786,r8i6n8,10.159.28.66,1897.2300114631653,3
4737.393370140256,1719554272,checkpoint_000003,True,False,4,261349af,2024-06-28_07-57-52,628.01415848732,2525.2441699504852,242786,r8i6n8,10.159.28.66,2525.2441699504852,4
4523.467796505905,1719554899,checkpoint_000004,True,False,5,261349af,2024-06-28_08-08-20,627.539436340332,3152.7836062908173,242786,r8i6n8,10.159.28.66,3152.7836062908173,5
4273.132577971211,1719555527,checkpoint_000005,True,False,6,261349af,2024-06-28_08-18-47,627.5760381221771,3780.3596444129944,242786,r8i6n8,10.159.28.66,3780.3596444129944,6
3991.0541396253693,1719556155,checkpoint_000006,True,False,7,261349af,2024-06-28_08-29-15,627.5778048038483,4407.937449216843,242786,r8i6n8,10.159.28.66,4407.937449216843,7
3685.58821166031,1719556782,checkpoint_000007,True,False,8,261349af,2024-06-28_08-39-43,627.666775226593,5035.604224443436,242786,r8i6n8,10.159.28.66,5035.604224443436,8
3365.655848225271,1719557410,checkpoint_000008,True,False,9,261349af,2024-06-28_08-50-10,627.4417996406555,5663.046024084091,242786,r8i6n8,10.159.28.66,5663.046024084091,9
3048.0907587967517,1719558037,checkpoint_000009,True,False,10,261349af,2024-06-28_09-00-38,627.6434020996094,6290.689426183701,242786,r8i6n8,10.159.28.66,6290.689426183701,10
{"loss": 5146.327555979331, "timestamp": 1719552388, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "261349af", "date": "2024-06-28_07-26-29", "time_this_iter_s": 641.6589081287384, "time_total_s": 641.6589081287384, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 641.6589081287384, "iterations_since_restore": 1}
{"loss": 5048.819774237205, "timestamp": 1719553016, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "261349af", "date": "2024-06-28_07-36-56", "time_this_iter_s": 627.6112632751465, "time_total_s": 1269.270171403885, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 1269.270171403885, "iterations_since_restore": 2}
{"loss": 4911.544510488435, "timestamp": 1719553644, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "261349af", "date": "2024-06-28_07-47-24", "time_this_iter_s": 627.9598400592804, "time_total_s": 1897.2300114631653, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 1897.2300114631653, "iterations_since_restore": 3}
{"loss": 4737.393370140256, "timestamp": 1719554272, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "261349af", "date": "2024-06-28_07-57-52", "time_this_iter_s": 628.01415848732, "time_total_s": 2525.2441699504852, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 2525.2441699504852, "iterations_since_restore": 4}
{"loss": 4523.467796505905, "timestamp": 1719554899, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "261349af", "date": "2024-06-28_08-08-20", "time_this_iter_s": 627.539436340332, "time_total_s": 3152.7836062908173, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 3152.7836062908173, "iterations_since_restore": 5}
{"loss": 4273.132577971211, "timestamp": 1719555527, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "261349af", "date": "2024-06-28_08-18-47", "time_this_iter_s": 627.5760381221771, "time_total_s": 3780.3596444129944, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 3780.3596444129944, "iterations_since_restore": 6}
{"loss": 3991.0541396253693, "timestamp": 1719556155, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "261349af", "date": "2024-06-28_08-29-15", "time_this_iter_s": 627.5778048038483, "time_total_s": 4407.937449216843, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 4407.937449216843, "iterations_since_restore": 7}
{"loss": 3685.58821166031, "timestamp": 1719556782, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "261349af", "date": "2024-06-28_08-39-43", "time_this_iter_s": 627.666775226593, "time_total_s": 5035.604224443436, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 5035.604224443436, "iterations_since_restore": 8}
{"loss": 3365.655848225271, "timestamp": 1719557410, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "261349af", "date": "2024-06-28_08-50-10", "time_this_iter_s": 627.4417996406555, "time_total_s": 5663.046024084091, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 5663.046024084091, "iterations_since_restore": 9}
{"loss": 3048.0907587967517, "timestamp": 1719558037, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "261349af", "date": "2024-06-28_09-00-38", "time_this_iter_s": 627.6434020996094, "time_total_s": 6290.689426183701, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2474625892184107, "lr": 0.09815210368028619, "batch_size": 2048}, "time_since_restore": 6290.689426183701, "iterations_since_restore": 10}
Failure # 1 (occurred at 2024-06-27_16-33-47)
ray::ImplicitFunc.train() (pid=69992, ip=10.159.28.66, actor_id=4f23fe319defb9cd61c97da301000000, repr=train_model)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/ray/tune/trainable/trainable.py", line 342, in train
raise skipped from exception_cause(skipped)
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/ray/air/_internal/util.py", line 88, in run
self._ret = self._target(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/ray/tune/trainable/function_trainable.py", line 249, in <lambda>
training_func=lambda: self._trainable_func(self.config),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/ray/tune/trainable/function_trainable.py", line 332, in _trainable_func
output = fn()
^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/ray/tune/trainable/util.py", line 138, in inner
return trainable(config, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfswork/rech/ute/ucg81ws/these/LC-MS-RT-prediction/main_ray_tune.py", line 66, in train_model
pred_rt = net.module.forward_rt(seq)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfsdswork/projects/rech/ute/ucg81ws/these/LC-MS-RT-prediction/model_custom.py", line 111, in forward_rt
out_rt = self.decoder_RT(enc)
^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/container.py", line 217, in forward
input = module(input)
^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/transformer.py", line 391, in forward
output = mod(output, src_mask=mask, is_causal=is_causal, src_key_padding_mask=src_key_padding_mask_for_layers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/transformer.py", line 715, in forward
x = self.norm2(x + self._ff_block(x))
^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/transformer.py", line 730, in _ff_block
x = self.linear2(self.dropout(self.activation(self.linear1(x))))
^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.2.0+py3.11.7/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 400.00 MiB. GPU 0 has a total capacity of 31.73 GiB of which 116.69 MiB is free. Including non-PyTorch memory, this process has 31.62 GiB memory in use. Of the allocated memory 30.71 GiB is allocated by PyTorch, and 541.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 8,
"drop_rate": 0.8248444707020037,
"embedding_dim": 1024,
"encoder_ff": 1024,
"encoder_num_layer": 4,
"lr": 0.027832194286055118,
"n_head": 16
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment