Skip to content
Snippets Groups Projects
Commit 02f95633 authored by Schneider Leo's avatar Schneider Leo
Browse files

del raysesult

parent b844726e
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 132 deletions
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 1,
"drop_rate": 0.2,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 1,
"lr": 0.0001,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
220.77590359665277,1719315958,checkpoint_000000,True,False,1,016f7353,2024-06-25_13-45-58,188.1737368106842,188.1737368106842,339920,r9i1n2,10.159.32.15,188.1737368106842,1
180.26498923714706,1719316230,checkpoint_000001,True,False,2,016f7353,2024-06-25_13-50-30,187.0150330066681,375.1887698173523,347039,r9i1n2,10.159.32.15,187.0150330066681,1
147.3930386821116,1719316493,checkpoint_000002,True,False,3,016f7353,2024-06-25_13-54-53,185.07583117485046,560.2646009922028,353741,r9i1n2,10.159.32.15,185.07583117485046,1
134.21845143235575,1719316663,checkpoint_000003,True,False,4,016f7353,2024-06-25_13-57-43,170.05932211875916,730.3239231109619,353741,r9i1n2,10.159.32.15,355.1351532936096,2
125.97545107143132,1719316969,checkpoint_000004,True,False,5,016f7353,2024-06-25_14-02-49,187.42582607269287,917.7497491836548,366320,r9i1n2,10.159.32.15,187.42582607269287,1
119.25108247291385,1719317141,checkpoint_000005,True,False,6,016f7353,2024-06-25_14-05-41,172.101571559906,1089.8513207435608,366320,r9i1n2,10.159.32.15,359.5273976325989,2
114.85140582707923,1719317314,checkpoint_000006,True,False,7,016f7353,2024-06-25_14-08-34,172.7140347957611,1262.565355539322,366320,r9i1n2,10.159.32.15,532.24143242836,3
110.36972532497616,1719317486,checkpoint_000007,True,False,8,016f7353,2024-06-25_14-11-26,172.59792017936707,1435.163275718689,366320,r9i1n2,10.159.32.15,704.839352607727,4
108.50565037764902,1719317880,checkpoint_000008,True,False,9,016f7353,2024-06-25_14-18-00,187.18606781959534,1622.3493435382843,366320,r9i1n2,10.159.32.15,187.18606781959534,1
107.58732394721564,1719318052,checkpoint_000009,True,False,10,016f7353,2024-06-25_14-20-52,172.15624952316284,1794.5055930614471,366320,r9i1n2,10.159.32.15,359.3423173427582,2
104.85794463871032,1719318224,checkpoint_000010,True,False,11,016f7353,2024-06-25_14-23-44,172.26081562042236,1966.7664086818695,366320,r9i1n2,10.159.32.15,531.6031329631805,3
104.4458197045514,1719318397,checkpoint_000011,True,False,12,016f7353,2024-06-25_14-26-37,172.6046712398529,2139.3710799217224,366320,r9i1n2,10.159.32.15,704.2078042030334,4
99.18223199318713,1719318569,checkpoint_000012,True,False,13,016f7353,2024-06-25_14-29-29,172.24528121948242,2311.616361141205,366320,r9i1n2,10.159.32.15,876.4530854225159,5
99.00881489430826,1719318741,checkpoint_000013,True,False,14,016f7353,2024-06-25_14-32-22,172.6691517829895,2484.2855129241943,366320,r9i1n2,10.159.32.15,1049.1222372055054,6
97.12115343349187,1719318914,checkpoint_000014,True,False,15,016f7353,2024-06-25_14-35-15,172.88193345069885,2657.167446374893,366320,r9i1n2,10.159.32.15,1222.0041706562042,7
97.0790608623835,1719319087,checkpoint_000015,True,False,16,016f7353,2024-06-25_14-38-07,172.1524109840393,2829.3198573589325,366320,r9i1n2,10.159.32.15,1394.1565816402435,8
97.36735979215366,1719319259,checkpoint_000016,True,False,17,016f7353,2024-06-25_14-40-59,172.8087797164917,3002.128637075424,366320,r9i1n2,10.159.32.15,1566.9653613567352,9
94.57831074872355,1719319432,checkpoint_000017,True,False,18,016f7353,2024-06-25_14-43-52,172.65869641304016,3174.7873334884644,366320,r9i1n2,10.159.32.15,1739.6240577697754,10
{"loss": 220.77590359665277, "timestamp": 1719315958, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "016f7353", "date": "2024-06-25_13-45-58", "time_this_iter_s": 188.1737368106842, "time_total_s": 188.1737368106842, "pid": 339920, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 188.1737368106842, "iterations_since_restore": 1}
{"loss": 147.3930386821116, "timestamp": 1719316493, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "016f7353", "date": "2024-06-25_13-54-53", "time_this_iter_s": 185.07583117485046, "time_total_s": 560.2646009922028, "pid": 353741, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 185.07583117485046, "iterations_since_restore": 1}
{"loss": 125.97545107143132, "timestamp": 1719316969, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "016f7353", "date": "2024-06-25_14-02-49", "time_this_iter_s": 187.42582607269287, "time_total_s": 917.7497491836548, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 187.42582607269287, "iterations_since_restore": 1}
{"loss": 119.25108247291385, "timestamp": 1719317141, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "016f7353", "date": "2024-06-25_14-05-41", "time_this_iter_s": 172.101571559906, "time_total_s": 1089.8513207435608, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 359.5273976325989, "iterations_since_restore": 2}
{"loss": 114.85140582707923, "timestamp": 1719317314, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "016f7353", "date": "2024-06-25_14-08-34", "time_this_iter_s": 172.7140347957611, "time_total_s": 1262.565355539322, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 532.24143242836, "iterations_since_restore": 3}
{"loss": 110.36972532497616, "timestamp": 1719317486, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "016f7353", "date": "2024-06-25_14-11-26", "time_this_iter_s": 172.59792017936707, "time_total_s": 1435.163275718689, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 704.839352607727, "iterations_since_restore": 4}
{"loss": 108.50565037764902, "timestamp": 1719317880, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "016f7353", "date": "2024-06-25_14-18-00", "time_this_iter_s": 187.18606781959534, "time_total_s": 1622.3493435382843, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 187.18606781959534, "iterations_since_restore": 1}
{"loss": 107.58732394721564, "timestamp": 1719318052, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "016f7353", "date": "2024-06-25_14-20-52", "time_this_iter_s": 172.15624952316284, "time_total_s": 1794.5055930614471, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 359.3423173427582, "iterations_since_restore": 2}
{"loss": 104.85794463871032, "timestamp": 1719318224, "checkpoint_dir_name": "checkpoint_000010", "should_checkpoint": true, "done": false, "training_iteration": 11, "trial_id": "016f7353", "date": "2024-06-25_14-23-44", "time_this_iter_s": 172.26081562042236, "time_total_s": 1966.7664086818695, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 531.6031329631805, "iterations_since_restore": 3}
{"loss": 104.4458197045514, "timestamp": 1719318397, "checkpoint_dir_name": "checkpoint_000011", "should_checkpoint": true, "done": false, "training_iteration": 12, "trial_id": "016f7353", "date": "2024-06-25_14-26-37", "time_this_iter_s": 172.6046712398529, "time_total_s": 2139.3710799217224, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 704.2078042030334, "iterations_since_restore": 4}
{"loss": 99.18223199318713, "timestamp": 1719318569, "checkpoint_dir_name": "checkpoint_000012", "should_checkpoint": true, "done": false, "training_iteration": 13, "trial_id": "016f7353", "date": "2024-06-25_14-29-29", "time_this_iter_s": 172.24528121948242, "time_total_s": 2311.616361141205, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 876.4530854225159, "iterations_since_restore": 5}
{"loss": 99.00881489430826, "timestamp": 1719318741, "checkpoint_dir_name": "checkpoint_000013", "should_checkpoint": true, "done": false, "training_iteration": 14, "trial_id": "016f7353", "date": "2024-06-25_14-32-22", "time_this_iter_s": 172.6691517829895, "time_total_s": 2484.2855129241943, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 1049.1222372055054, "iterations_since_restore": 6}
{"loss": 97.12115343349187, "timestamp": 1719318914, "checkpoint_dir_name": "checkpoint_000014", "should_checkpoint": true, "done": false, "training_iteration": 15, "trial_id": "016f7353", "date": "2024-06-25_14-35-15", "time_this_iter_s": 172.88193345069885, "time_total_s": 2657.167446374893, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 1222.0041706562042, "iterations_since_restore": 7}
{"loss": 97.0790608623835, "timestamp": 1719319087, "checkpoint_dir_name": "checkpoint_000015", "should_checkpoint": true, "done": false, "training_iteration": 16, "trial_id": "016f7353", "date": "2024-06-25_14-38-07", "time_this_iter_s": 172.1524109840393, "time_total_s": 2829.3198573589325, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 1394.1565816402435, "iterations_since_restore": 8}
{"loss": 97.36735979215366, "timestamp": 1719319259, "checkpoint_dir_name": "checkpoint_000016", "should_checkpoint": true, "done": false, "training_iteration": 17, "trial_id": "016f7353", "date": "2024-06-25_14-40-59", "time_this_iter_s": 172.8087797164917, "time_total_s": 3002.128637075424, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 1566.9653613567352, "iterations_since_restore": 9}
{"loss": 94.57831074872355, "timestamp": 1719319432, "checkpoint_dir_name": "checkpoint_000017", "should_checkpoint": true, "done": false, "training_iteration": 18, "trial_id": "016f7353", "date": "2024-06-25_14-43-52", "time_this_iter_s": 172.65869641304016, "time_total_s": 3174.7873334884644, "pid": 366320, "hostname": "r9i1n2", "node_ip": "10.159.32.15", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.2, "lr": 0.0001, "batch_size": 1024}, "time_since_restore": 1739.6240577697754, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 4,
"drop_rate": 0.18759871111111892,
"embedding_dim": 16,
"encoder_ff": 1024,
"encoder_num_layer": 1,
"lr": 0.02750689103642202,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2019.4176592488927,1719329610,checkpoint_000000,True,False,1,040fcf09,2024-06-25_17-33-30,65.59718012809753,65.59718012809753,2195253,r8i6n2,10.159.28.60,65.59718012809753,1
{"loss": 2019.4176592488927, "timestamp": 1719329610, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "040fcf09", "date": "2024-06-25_17-33-30", "time_this_iter_s": 65.59718012809753, "time_total_s": 65.59718012809753, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.18759871111111892, "lr": 0.02750689103642202, "batch_size": 2048}, "time_since_restore": 65.59718012809753, "iterations_since_restore": 1}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 8,
"drop_rate": 0.9904174177662719,
"embedding_dim": 16,
"encoder_ff": 512,
"encoder_num_layer": 4,
"lr": 0.0015488652457456295,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
nan,1719523981,checkpoint_000000,True,False,1,04dab8e5,2024-06-27_23-33-01,90.10185432434082,90.10185432434082,242786,r8i6n8,10.159.28.66,90.10185432434082,1
nan,1719524057,checkpoint_000001,True,False,2,04dab8e5,2024-06-27_23-34-17,75.47974467277527,165.5815989971161,242786,r8i6n8,10.159.28.66,165.5815989971161,2
nan,1719524133,checkpoint_000002,True,False,3,04dab8e5,2024-06-27_23-35-33,76.8026487827301,242.3842477798462,242786,r8i6n8,10.159.28.66,242.3842477798462,3
nan,1719524210,checkpoint_000003,True,False,4,04dab8e5,2024-06-27_23-36-50,76.45394325256348,318.83819103240967,242786,r8i6n8,10.159.28.66,318.83819103240967,4
nan,1719524286,checkpoint_000004,True,False,5,04dab8e5,2024-06-27_23-38-06,76.52552080154419,395.36371183395386,242786,r8i6n8,10.159.28.66,395.36371183395386,5
nan,1719524372,checkpoint_000005,True,False,6,04dab8e5,2024-06-27_23-39-32,85.11763429641724,480.4813461303711,242786,r8i6n8,10.159.28.66,480.4813461303711,6
nan,1719524463,checkpoint_000006,True,False,7,04dab8e5,2024-06-27_23-41-03,90.97299361228943,571.4543397426605,242786,r8i6n8,10.159.28.66,571.4543397426605,7
nan,1719524553,checkpoint_000007,True,False,8,04dab8e5,2024-06-27_23-42-33,90.95869708061218,662.4130368232727,242786,r8i6n8,10.159.28.66,662.4130368232727,8
nan,1719524644,checkpoint_000008,True,False,9,04dab8e5,2024-06-27_23-44-04,90.53222966194153,752.9452664852142,242786,r8i6n8,10.159.28.66,752.9452664852142,9
nan,1719524735,checkpoint_000009,True,False,10,04dab8e5,2024-06-27_23-45-35,90.93833804130554,843.8836045265198,242786,r8i6n8,10.159.28.66,843.8836045265198,10
{"loss": NaN, "timestamp": 1719523981, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "04dab8e5", "date": "2024-06-27_23-33-01", "time_this_iter_s": 90.10185432434082, "time_total_s": 90.10185432434082, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 90.10185432434082, "iterations_since_restore": 1}
{"loss": NaN, "timestamp": 1719524057, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "04dab8e5", "date": "2024-06-27_23-34-17", "time_this_iter_s": 75.47974467277527, "time_total_s": 165.5815989971161, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 165.5815989971161, "iterations_since_restore": 2}
{"loss": NaN, "timestamp": 1719524133, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "04dab8e5", "date": "2024-06-27_23-35-33", "time_this_iter_s": 76.8026487827301, "time_total_s": 242.3842477798462, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 242.3842477798462, "iterations_since_restore": 3}
{"loss": NaN, "timestamp": 1719524210, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "04dab8e5", "date": "2024-06-27_23-36-50", "time_this_iter_s": 76.45394325256348, "time_total_s": 318.83819103240967, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 318.83819103240967, "iterations_since_restore": 4}
{"loss": NaN, "timestamp": 1719524286, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "04dab8e5", "date": "2024-06-27_23-38-06", "time_this_iter_s": 76.52552080154419, "time_total_s": 395.36371183395386, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 395.36371183395386, "iterations_since_restore": 5}
{"loss": NaN, "timestamp": 1719524372, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "04dab8e5", "date": "2024-06-27_23-39-32", "time_this_iter_s": 85.11763429641724, "time_total_s": 480.4813461303711, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 480.4813461303711, "iterations_since_restore": 6}
{"loss": NaN, "timestamp": 1719524463, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "04dab8e5", "date": "2024-06-27_23-41-03", "time_this_iter_s": 90.97299361228943, "time_total_s": 571.4543397426605, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 571.4543397426605, "iterations_since_restore": 7}
{"loss": NaN, "timestamp": 1719524553, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "04dab8e5", "date": "2024-06-27_23-42-33", "time_this_iter_s": 90.95869708061218, "time_total_s": 662.4130368232727, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 662.4130368232727, "iterations_since_restore": 8}
{"loss": NaN, "timestamp": 1719524644, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "04dab8e5", "date": "2024-06-27_23-44-04", "time_this_iter_s": 90.53222966194153, "time_total_s": 752.9452664852142, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 752.9452664852142, "iterations_since_restore": 9}
{"loss": NaN, "timestamp": 1719524735, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "04dab8e5", "date": "2024-06-27_23-45-35", "time_this_iter_s": 90.93833804130554, "time_total_s": 843.8836045265198, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.9904174177662719, "lr": 0.0015488652457456295, "batch_size": 1024}, "time_since_restore": 843.8836045265198, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 2,
"drop_rate": 0.7685314783666725,
"embedding_dim": 64,
"encoder_ff": 512,
"encoder_num_layer": 8,
"lr": 0.056339001711097965,
"n_head": 4
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
3700.8752076156497,1719551095,checkpoint_000000,True,False,1,08bd9367,2024-06-28_07-04-55,85.83756732940674,85.83756732940674,242786,r8i6n8,10.159.28.66,85.83756732940674,1
2637.508512241634,1719551167,checkpoint_000001,True,False,2,08bd9367,2024-06-28_07-06-07,72.44411158561707,158.2816789150238,242786,r8i6n8,10.159.28.66,158.2816789150238,2
2160.1915196696605,1719551239,checkpoint_000002,True,False,3,08bd9367,2024-06-28_07-07-20,72.27386951446533,230.55554842948914,242786,r8i6n8,10.159.28.66,230.55554842948914,3
2001.967397614727,1719551312,checkpoint_000003,True,False,4,08bd9367,2024-06-28_07-08-32,72.46391272544861,303.01946115493774,242786,r8i6n8,10.159.28.66,303.01946115493774,4
1963.2728338767224,1719551384,checkpoint_000004,True,False,5,08bd9367,2024-06-28_07-09-44,72.46509909629822,375.48456025123596,242786,r8i6n8,10.159.28.66,375.48456025123596,5
1955.7411042461245,1719551457,checkpoint_000005,True,False,6,08bd9367,2024-06-28_07-10-57,72.14350414276123,447.6280643939972,242786,r8i6n8,10.159.28.66,447.6280643939972,6
1954.256914754552,1719551529,checkpoint_000006,True,False,7,08bd9367,2024-06-28_07-12-09,72.5984845161438,520.226548910141,242786,r8i6n8,10.159.28.66,520.226548910141,7
1953.846070297121,1719551602,checkpoint_000007,True,False,8,08bd9367,2024-06-28_07-13-22,72.54622983932495,592.7727787494659,242786,r8i6n8,10.159.28.66,592.7727787494659,8
1954.4387774129552,1719551674,checkpoint_000008,True,False,9,08bd9367,2024-06-28_07-14-34,72.2018780708313,664.9746568202972,242786,r8i6n8,10.159.28.66,664.9746568202972,9
1954.1531540277435,1719551747,checkpoint_000009,True,False,10,08bd9367,2024-06-28_07-15-47,72.91625618934631,737.8909130096436,242786,r8i6n8,10.159.28.66,737.8909130096436,10
{"loss": 3700.8752076156497, "timestamp": 1719551095, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "08bd9367", "date": "2024-06-28_07-04-55", "time_this_iter_s": 85.83756732940674, "time_total_s": 85.83756732940674, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 85.83756732940674, "iterations_since_restore": 1}
{"loss": 2637.508512241634, "timestamp": 1719551167, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "08bd9367", "date": "2024-06-28_07-06-07", "time_this_iter_s": 72.44411158561707, "time_total_s": 158.2816789150238, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 158.2816789150238, "iterations_since_restore": 2}
{"loss": 2160.1915196696605, "timestamp": 1719551239, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "08bd9367", "date": "2024-06-28_07-07-20", "time_this_iter_s": 72.27386951446533, "time_total_s": 230.55554842948914, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 230.55554842948914, "iterations_since_restore": 3}
{"loss": 2001.967397614727, "timestamp": 1719551312, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "08bd9367", "date": "2024-06-28_07-08-32", "time_this_iter_s": 72.46391272544861, "time_total_s": 303.01946115493774, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 303.01946115493774, "iterations_since_restore": 4}
{"loss": 1963.2728338767224, "timestamp": 1719551384, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "08bd9367", "date": "2024-06-28_07-09-44", "time_this_iter_s": 72.46509909629822, "time_total_s": 375.48456025123596, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 375.48456025123596, "iterations_since_restore": 5}
{"loss": 1955.7411042461245, "timestamp": 1719551457, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "08bd9367", "date": "2024-06-28_07-10-57", "time_this_iter_s": 72.14350414276123, "time_total_s": 447.6280643939972, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 447.6280643939972, "iterations_since_restore": 6}
{"loss": 1954.256914754552, "timestamp": 1719551529, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "08bd9367", "date": "2024-06-28_07-12-09", "time_this_iter_s": 72.5984845161438, "time_total_s": 520.226548910141, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 520.226548910141, "iterations_since_restore": 7}
{"loss": 1953.846070297121, "timestamp": 1719551602, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "08bd9367", "date": "2024-06-28_07-13-22", "time_this_iter_s": 72.54622983932495, "time_total_s": 592.7727787494659, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 592.7727787494659, "iterations_since_restore": 8}
{"loss": 1954.4387774129552, "timestamp": 1719551674, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "08bd9367", "date": "2024-06-28_07-14-34", "time_this_iter_s": 72.2018780708313, "time_total_s": 664.9746568202972, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 664.9746568202972, "iterations_since_restore": 9}
{"loss": 1954.1531540277435, "timestamp": 1719551747, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "08bd9367", "date": "2024-06-28_07-15-47", "time_this_iter_s": 72.91625618934631, "time_total_s": 737.8909130096436, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.7685314783666725, "lr": 0.056339001711097965, "batch_size": 2048}, "time_since_restore": 737.8909130096436, "iterations_since_restore": 10}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment