Skip to content
Snippets Groups Projects
Commit 915e419e authored by Léo Schneider's avatar Léo Schneider Committed by Schneider Leo
Browse files

res

parent a790c5c2
No related branches found
No related tags found
No related merge requests found
Showing
with 82 additions and 0 deletions
{"loss": 4021.740253598671, "timestamp": 1719492153, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "3c9a1c0c", "date": "2024-06-27_14-42-33", "time_this_iter_s": 270.95838141441345, "time_total_s": 270.95838141441345, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 1024, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.3031602001882847, "lr": 0.019401170360006482, "batch_size": 1024}, "time_since_restore": 270.95838141441345, "iterations_since_restore": 1}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 8,
"drop_rate": 0.9833194838066408,
"embedding_dim": 16,
"encoder_ff": 512,
"encoder_num_layer": 2,
"lr": 0.0004542749659626088,
"n_head": 2
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
nan,1719548436,checkpoint_000000,True,False,1,3d8a1340,2024-06-28_06-20-36,91.73844981193542,91.73844981193542,242786,r8i6n8,10.159.28.66,91.73844981193542,1
nan,1719548515,checkpoint_000001,True,False,2,3d8a1340,2024-06-28_06-21-55,78.92051219940186,170.65896201133728,242786,r8i6n8,10.159.28.66,170.65896201133728,2
nan,1719548594,checkpoint_000002,True,False,3,3d8a1340,2024-06-28_06-23-14,78.98558306694031,249.6445450782776,242786,r8i6n8,10.159.28.66,249.6445450782776,3
nan,1719548672,checkpoint_000003,True,False,4,3d8a1340,2024-06-28_06-24-32,78.50712847709656,328.15167355537415,242786,r8i6n8,10.159.28.66,328.15167355537415,4
nan,1719548751,checkpoint_000004,True,False,5,3d8a1340,2024-06-28_06-25-51,78.95465588569641,407.10632944107056,242786,r8i6n8,10.159.28.66,407.10632944107056,5
nan,1719548830,checkpoint_000005,True,False,6,3d8a1340,2024-06-28_06-27-10,79.0093162059784,486.11564564704895,242786,r8i6n8,10.159.28.66,486.11564564704895,6
nan,1719548909,checkpoint_000006,True,False,7,3d8a1340,2024-06-28_06-28-29,78.55737948417664,564.6730251312256,242786,r8i6n8,10.159.28.66,564.6730251312256,7
nan,1719548988,checkpoint_000007,True,False,8,3d8a1340,2024-06-28_06-29-48,79.00080251693726,643.6738276481628,242786,r8i6n8,10.159.28.66,643.6738276481628,8
nan,1719549067,checkpoint_000008,True,False,9,3d8a1340,2024-06-28_06-31-07,78.99766731262207,722.6714949607849,242786,r8i6n8,10.159.28.66,722.6714949607849,9
nan,1719549146,checkpoint_000009,True,False,10,3d8a1340,2024-06-28_06-32-26,79.03019595146179,801.7016909122467,242786,r8i6n8,10.159.28.66,801.7016909122467,10
{"loss": NaN, "timestamp": 1719548436, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "3d8a1340", "date": "2024-06-28_06-20-36", "time_this_iter_s": 91.73844981193542, "time_total_s": 91.73844981193542, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 91.73844981193542, "iterations_since_restore": 1}
{"loss": NaN, "timestamp": 1719548515, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "3d8a1340", "date": "2024-06-28_06-21-55", "time_this_iter_s": 78.92051219940186, "time_total_s": 170.65896201133728, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 170.65896201133728, "iterations_since_restore": 2}
{"loss": NaN, "timestamp": 1719548594, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "3d8a1340", "date": "2024-06-28_06-23-14", "time_this_iter_s": 78.98558306694031, "time_total_s": 249.6445450782776, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 249.6445450782776, "iterations_since_restore": 3}
{"loss": NaN, "timestamp": 1719548672, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "3d8a1340", "date": "2024-06-28_06-24-32", "time_this_iter_s": 78.50712847709656, "time_total_s": 328.15167355537415, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 328.15167355537415, "iterations_since_restore": 4}
{"loss": NaN, "timestamp": 1719548751, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "3d8a1340", "date": "2024-06-28_06-25-51", "time_this_iter_s": 78.95465588569641, "time_total_s": 407.10632944107056, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 407.10632944107056, "iterations_since_restore": 5}
{"loss": NaN, "timestamp": 1719548830, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "3d8a1340", "date": "2024-06-28_06-27-10", "time_this_iter_s": 79.0093162059784, "time_total_s": 486.11564564704895, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 486.11564564704895, "iterations_since_restore": 6}
{"loss": NaN, "timestamp": 1719548909, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "3d8a1340", "date": "2024-06-28_06-28-29", "time_this_iter_s": 78.55737948417664, "time_total_s": 564.6730251312256, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 564.6730251312256, "iterations_since_restore": 7}
{"loss": NaN, "timestamp": 1719548988, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "3d8a1340", "date": "2024-06-28_06-29-48", "time_this_iter_s": 79.00080251693726, "time_total_s": 643.6738276481628, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 643.6738276481628, "iterations_since_restore": 8}
{"loss": NaN, "timestamp": 1719549067, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "3d8a1340", "date": "2024-06-28_06-31-07", "time_this_iter_s": 78.99766731262207, "time_total_s": 722.6714949607849, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 722.6714949607849, "iterations_since_restore": 9}
{"loss": NaN, "timestamp": 1719549146, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "3d8a1340", "date": "2024-06-28_06-32-26", "time_this_iter_s": 79.03019595146179, "time_total_s": 801.7016909122467, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9833194838066408, "lr": 0.0004542749659626088, "batch_size": 1024}, "time_since_restore": 801.7016909122467, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 8,
"drop_rate": 0.9072626058636482,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 1,
"lr": 0.003236929771052812,
"n_head": 4
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2134.4770325187624,1719493181,checkpoint_000000,True,False,1,41e4bd86,2024-06-27_14-59-41,720.7119467258453,720.7119467258453,69992,r8i6n8,10.159.28.66,720.7119467258453,1
{"loss": 2134.4770325187624, "timestamp": 1719493181, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "41e4bd86", "date": "2024-06-27_14-59-41", "time_this_iter_s": 720.7119467258453, "time_total_s": 720.7119467258453, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.9072626058636482, "lr": 0.003236929771052812, "batch_size": 2048}, "time_since_restore": 720.7119467258453, "iterations_since_restore": 1}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 1,
"drop_rate": 0.584365221617647,
"embedding_dim": 256,
"encoder_ff": 1024,
"encoder_num_layer": 4,
"lr": 0.0009124143820205934,
"n_head": 4
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1976.3005707507998,1719490613,checkpoint_000000,True,False,1,45b0a7c2,2024-06-27_14-16-53,105.02805352210999,105.02805352210999,35196,r8i6n8,10.159.28.66,105.02805352210999,1
1972.5077211875616,1719493830,checkpoint_000001,True,False,2,45b0a7c2,2024-06-27_15-10-30,103.25011920928955,208.27817273139954,69992,r8i6n8,10.159.28.66,103.25011920928955,1
{"loss": 1976.3005707507998, "timestamp": 1719490613, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "45b0a7c2", "date": "2024-06-27_14-16-53", "time_this_iter_s": 105.02805352210999, "time_total_s": 105.02805352210999, "pid": 35196, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.584365221617647, "lr": 0.0009124143820205934, "batch_size": 2048}, "time_since_restore": 105.02805352210999, "iterations_since_restore": 1}
{"loss": 1972.5077211875616, "timestamp": 1719493830, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "45b0a7c2", "date": "2024-06-27_15-10-30", "time_this_iter_s": 103.25011920928955, "time_total_s": 208.27817273139954, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.584365221617647, "lr": 0.0009124143820205934, "batch_size": 2048}, "time_since_restore": 103.25011920928955, "iterations_since_restore": 1}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 4,
"drop_rate": 0.701917375344629,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 1,
"lr": 0.07215703780021865,
"n_head": 1
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment