Skip to content
Snippets Groups Projects
Commit 02f95633 authored by Schneider Leo's avatar Schneider Leo
Browse files

del raysesult

parent b844726e
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 114 deletions
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 2,
"drop_rate": 0.2811887966312956,
"embedding_dim": 256,
"encoder_ff": 2048,
"encoder_num_layer": 1,
"lr": 0.0005389075253520159,
"n_head": 8
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
234.06637176753966,1719329693,checkpoint_000000,True,False,1,934c5104,2024-06-25_17-34-53,82.2380793094635,82.2380793094635,2195253,r8i6n2,10.159.28.60,82.2380793094635,1
191.28313151682457,1719330754,checkpoint_000001,True,False,2,934c5104,2024-06-25_17-52-34,82.23219752311707,164.47027683258057,2195253,r8i6n2,10.159.28.60,82.23219752311707,1
166.0584006722518,1719331377,checkpoint_000002,True,False,3,934c5104,2024-06-25_18-02-57,95.2866895198822,259.75696635246277,2195253,r8i6n2,10.159.28.60,95.2866895198822,1
159.9789658794253,1719331459,checkpoint_000003,True,False,4,934c5104,2024-06-25_18-04-19,81.72880601882935,341.4857723712921,2195253,r8i6n2,10.159.28.60,177.01549553871155,2
141.96990696464,1719332153,checkpoint_000004,True,False,5,934c5104,2024-06-25_18-15-54,82.40532422065735,423.89109659194946,2195253,r8i6n2,10.159.28.60,82.40532422065735,1
117.70846791905682,1719332222,checkpoint_000005,True,False,6,934c5104,2024-06-25_18-17-02,68.08270859718323,491.9738051891327,2195253,r8i6n2,10.159.28.60,150.48803281784058,2
113.39667721245233,1719332289,checkpoint_000006,True,False,7,934c5104,2024-06-25_18-18-09,67.7198805809021,559.6936857700348,2195253,r8i6n2,10.159.28.60,218.20791339874268,3
111.7227829460084,1719332358,checkpoint_000007,True,False,8,934c5104,2024-06-25_18-19-18,68.60852479934692,628.3022105693817,2195253,r8i6n2,10.159.28.60,286.8164381980896,4
111.3588055588129,1719332426,checkpoint_000008,True,False,9,934c5104,2024-06-25_18-20-26,67.80738162994385,696.1095921993256,2195253,r8i6n2,10.159.28.60,354.62381982803345,5
108.66629178505244,1719332494,checkpoint_000009,True,False,10,934c5104,2024-06-25_18-21-35,68.99091339111328,765.1005055904388,2195253,r8i6n2,10.159.28.60,423.61473321914673,6
102.03670796071451,1719332563,checkpoint_000010,True,False,11,934c5104,2024-06-25_18-22-43,68.06165027618408,833.1621558666229,2195253,r8i6n2,10.159.28.60,491.6763834953308,7
100.34690394364004,1719332631,checkpoint_000011,True,False,12,934c5104,2024-06-25_18-23-51,68.09097814559937,901.2531340122223,2195253,r8i6n2,10.159.28.60,559.7673616409302,8
102.1303488663801,1719332699,checkpoint_000012,True,False,13,934c5104,2024-06-25_18-24-59,68.49995398521423,969.7530879974365,2195253,r8i6n2,10.159.28.60,628.2673156261444,9
98.24798722154512,1719332767,checkpoint_000013,True,False,14,934c5104,2024-06-25_18-26-08,68.14825201034546,1037.901340007782,2195253,r8i6n2,10.159.28.60,696.4155676364899,10
{"loss": 234.06637176753966, "timestamp": 1719329693, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "934c5104", "date": "2024-06-25_17-34-53", "time_this_iter_s": 82.2380793094635, "time_total_s": 82.2380793094635, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 82.2380793094635, "iterations_since_restore": 1}
{"loss": 191.28313151682457, "timestamp": 1719330754, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "934c5104", "date": "2024-06-25_17-52-34", "time_this_iter_s": 82.23219752311707, "time_total_s": 164.47027683258057, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 82.23219752311707, "iterations_since_restore": 1}
{"loss": 166.0584006722518, "timestamp": 1719331377, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "934c5104", "date": "2024-06-25_18-02-57", "time_this_iter_s": 95.2866895198822, "time_total_s": 259.75696635246277, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 95.2866895198822, "iterations_since_restore": 1}
{"loss": 159.9789658794253, "timestamp": 1719331459, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "934c5104", "date": "2024-06-25_18-04-19", "time_this_iter_s": 81.72880601882935, "time_total_s": 341.4857723712921, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 177.01549553871155, "iterations_since_restore": 2}
{"loss": 141.96990696464, "timestamp": 1719332153, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "934c5104", "date": "2024-06-25_18-15-54", "time_this_iter_s": 82.40532422065735, "time_total_s": 423.89109659194946, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 82.40532422065735, "iterations_since_restore": 1}
{"loss": 117.70846791905682, "timestamp": 1719332222, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "934c5104", "date": "2024-06-25_18-17-02", "time_this_iter_s": 68.08270859718323, "time_total_s": 491.9738051891327, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 150.48803281784058, "iterations_since_restore": 2}
{"loss": 113.39667721245233, "timestamp": 1719332289, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "934c5104", "date": "2024-06-25_18-18-09", "time_this_iter_s": 67.7198805809021, "time_total_s": 559.6936857700348, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 218.20791339874268, "iterations_since_restore": 3}
{"loss": 111.7227829460084, "timestamp": 1719332358, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "934c5104", "date": "2024-06-25_18-19-18", "time_this_iter_s": 68.60852479934692, "time_total_s": 628.3022105693817, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 286.8164381980896, "iterations_since_restore": 4}
{"loss": 111.3588055588129, "timestamp": 1719332426, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "934c5104", "date": "2024-06-25_18-20-26", "time_this_iter_s": 67.80738162994385, "time_total_s": 696.1095921993256, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 354.62381982803345, "iterations_since_restore": 5}
{"loss": 108.66629178505244, "timestamp": 1719332494, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "934c5104", "date": "2024-06-25_18-21-35", "time_this_iter_s": 68.99091339111328, "time_total_s": 765.1005055904388, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 423.61473321914673, "iterations_since_restore": 6}
{"loss": 102.03670796071451, "timestamp": 1719332563, "checkpoint_dir_name": "checkpoint_000010", "should_checkpoint": true, "done": false, "training_iteration": 11, "trial_id": "934c5104", "date": "2024-06-25_18-22-43", "time_this_iter_s": 68.06165027618408, "time_total_s": 833.1621558666229, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 491.6763834953308, "iterations_since_restore": 7}
{"loss": 100.34690394364004, "timestamp": 1719332631, "checkpoint_dir_name": "checkpoint_000011", "should_checkpoint": true, "done": false, "training_iteration": 12, "trial_id": "934c5104", "date": "2024-06-25_18-23-51", "time_this_iter_s": 68.09097814559937, "time_total_s": 901.2531340122223, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 559.7673616409302, "iterations_since_restore": 8}
{"loss": 102.1303488663801, "timestamp": 1719332699, "checkpoint_dir_name": "checkpoint_000012", "should_checkpoint": true, "done": false, "training_iteration": 13, "trial_id": "934c5104", "date": "2024-06-25_18-24-59", "time_this_iter_s": 68.49995398521423, "time_total_s": 969.7530879974365, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 628.2673156261444, "iterations_since_restore": 9}
{"loss": 98.24798722154512, "timestamp": 1719332767, "checkpoint_dir_name": "checkpoint_000013", "should_checkpoint": true, "done": false, "training_iteration": 14, "trial_id": "934c5104", "date": "2024-06-25_18-26-08", "time_this_iter_s": 68.14825201034546, "time_total_s": 1037.901340007782, "pid": 2195253, "hostname": "r8i6n2", "node_ip": "10.159.28.60", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.2811887966312956, "lr": 0.0005389075253520159, "batch_size": 2048}, "time_since_restore": 696.4155676364899, "iterations_since_restore": 10}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 8,
"drop_rate": 0.7914041407453669,
"embedding_dim": 16,
"encoder_ff": 1024,
"encoder_num_layer": 1,
"lr": 0.0017184389008014717,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1996.2721538393516,1719502422,checkpoint_000000,True,False,1,93c649bd,2024-06-27_17-33-42,84.1758508682251,84.1758508682251,242786,r8i6n8,10.159.28.66,84.1758508682251,1
1999.4757651982345,1719502493,checkpoint_000001,True,False,2,93c649bd,2024-06-27_17-34-53,70.39586234092712,154.57171320915222,242786,r8i6n8,10.159.28.66,154.57171320915222,2
2008.213741753045,1719502563,checkpoint_000002,True,False,3,93c649bd,2024-06-27_17-36-03,70.20663857460022,224.77835178375244,242786,r8i6n8,10.159.28.66,224.77835178375244,3
1996.1505780557948,1719502633,checkpoint_000003,True,False,4,93c649bd,2024-06-27_17-37-13,70.27228903770447,295.0506408214569,242786,r8i6n8,10.159.28.66,295.0506408214569,4
{"loss": 1996.2721538393516, "timestamp": 1719502422, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "93c649bd", "date": "2024-06-27_17-33-42", "time_this_iter_s": 84.1758508682251, "time_total_s": 84.1758508682251, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.7914041407453669, "lr": 0.0017184389008014717, "batch_size": 1024}, "time_since_restore": 84.1758508682251, "iterations_since_restore": 1}
{"loss": 1999.4757651982345, "timestamp": 1719502493, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "93c649bd", "date": "2024-06-27_17-34-53", "time_this_iter_s": 70.39586234092712, "time_total_s": 154.57171320915222, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.7914041407453669, "lr": 0.0017184389008014717, "batch_size": 1024}, "time_since_restore": 154.57171320915222, "iterations_since_restore": 2}
{"loss": 2008.213741753045, "timestamp": 1719502563, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "93c649bd", "date": "2024-06-27_17-36-03", "time_this_iter_s": 70.20663857460022, "time_total_s": 224.77835178375244, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.7914041407453669, "lr": 0.0017184389008014717, "batch_size": 1024}, "time_since_restore": 224.77835178375244, "iterations_since_restore": 3}
{"loss": 1996.1505780557948, "timestamp": 1719502633, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "93c649bd", "date": "2024-06-27_17-37-13", "time_this_iter_s": 70.27228903770447, "time_total_s": 295.0506408214569, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.7914041407453669, "lr": 0.0017184389008014717, "batch_size": 1024}, "time_since_restore": 295.0506408214569, "iterations_since_restore": 4}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 1,
"drop_rate": 0.7686353033166121,
"embedding_dim": 256,
"encoder_ff": 2048,
"encoder_num_layer": 8,
"lr": 0.00011707230271575167,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1995.6765771099901,1719521818,checkpoint_000000,True,False,1,951ea23d,2024-06-27_22-56-58,243.84869742393494,243.84869742393494,242786,r8i6n8,10.159.28.66,243.84869742393494,1
1996.344031626784,1719522048,checkpoint_000001,True,False,2,951ea23d,2024-06-27_23-00-48,230.28329491615295,474.1319923400879,242786,r8i6n8,10.159.28.66,474.1319923400879,2
1999.2121284064347,1719522278,checkpoint_000002,True,False,3,951ea23d,2024-06-27_23-04-38,230.10066199302673,704.2326543331146,242786,r8i6n8,10.159.28.66,704.2326543331146,3
1990.6904046967275,1719522509,checkpoint_000003,True,False,4,951ea23d,2024-06-27_23-08-29,230.43604636192322,934.6687006950378,242786,r8i6n8,10.159.28.66,934.6687006950378,4
1992.301810677596,1719522739,checkpoint_000004,True,False,5,951ea23d,2024-06-27_23-12-19,230.36418223381042,1165.0328829288483,242786,r8i6n8,10.159.28.66,1165.0328829288483,5
1994.9048264871433,1719522970,checkpoint_000005,True,False,6,951ea23d,2024-06-27_23-16-10,230.64503145217896,1395.6779143810272,242786,r8i6n8,10.159.28.66,1395.6779143810272,6
1997.1295665831078,1719523200,checkpoint_000006,True,False,7,951ea23d,2024-06-27_23-20-00,230.41295719146729,1626.0908715724945,242786,r8i6n8,10.159.28.66,1626.0908715724945,7
1995.2905263825664,1719523430,checkpoint_000007,True,False,8,951ea23d,2024-06-27_23-23-50,230.26359033584595,1856.3544619083405,242786,r8i6n8,10.159.28.66,1856.3544619083405,8
1990.7004615603469,1719523661,checkpoint_000008,True,False,9,951ea23d,2024-06-27_23-27-41,230.41418027877808,2086.7686421871185,242786,r8i6n8,10.159.28.66,2086.7686421871185,9
1996.574927142286,1719523891,checkpoint_000009,True,False,10,951ea23d,2024-06-27_23-31-31,230.07325339317322,2316.8418955802917,242786,r8i6n8,10.159.28.66,2316.8418955802917,10
{"loss": 1995.6765771099901, "timestamp": 1719521818, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "951ea23d", "date": "2024-06-27_22-56-58", "time_this_iter_s": 243.84869742393494, "time_total_s": 243.84869742393494, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 243.84869742393494, "iterations_since_restore": 1}
{"loss": 1996.344031626784, "timestamp": 1719522048, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "951ea23d", "date": "2024-06-27_23-00-48", "time_this_iter_s": 230.28329491615295, "time_total_s": 474.1319923400879, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 474.1319923400879, "iterations_since_restore": 2}
{"loss": 1999.2121284064347, "timestamp": 1719522278, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "951ea23d", "date": "2024-06-27_23-04-38", "time_this_iter_s": 230.10066199302673, "time_total_s": 704.2326543331146, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 704.2326543331146, "iterations_since_restore": 3}
{"loss": 1990.6904046967275, "timestamp": 1719522509, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "951ea23d", "date": "2024-06-27_23-08-29", "time_this_iter_s": 230.43604636192322, "time_total_s": 934.6687006950378, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 934.6687006950378, "iterations_since_restore": 4}
{"loss": 1992.301810677596, "timestamp": 1719522739, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "951ea23d", "date": "2024-06-27_23-12-19", "time_this_iter_s": 230.36418223381042, "time_total_s": 1165.0328829288483, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 1165.0328829288483, "iterations_since_restore": 5}
{"loss": 1994.9048264871433, "timestamp": 1719522970, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "951ea23d", "date": "2024-06-27_23-16-10", "time_this_iter_s": 230.64503145217896, "time_total_s": 1395.6779143810272, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 1395.6779143810272, "iterations_since_restore": 6}
{"loss": 1997.1295665831078, "timestamp": 1719523200, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "951ea23d", "date": "2024-06-27_23-20-00", "time_this_iter_s": 230.41295719146729, "time_total_s": 1626.0908715724945, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 1626.0908715724945, "iterations_since_restore": 7}
{"loss": 1995.2905263825664, "timestamp": 1719523430, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "951ea23d", "date": "2024-06-27_23-23-50", "time_this_iter_s": 230.26359033584595, "time_total_s": 1856.3544619083405, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 1856.3544619083405, "iterations_since_restore": 8}
{"loss": 1990.7004615603469, "timestamp": 1719523661, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "951ea23d", "date": "2024-06-27_23-27-41", "time_this_iter_s": 230.41418027877808, "time_total_s": 2086.7686421871185, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 2086.7686421871185, "iterations_since_restore": 9}
{"loss": 1996.574927142286, "timestamp": 1719523891, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "951ea23d", "date": "2024-06-27_23-31-31", "time_this_iter_s": 230.07325339317322, "time_total_s": 2316.8418955802917, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 2048, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.7686353033166121, "lr": 0.00011707230271575167, "batch_size": 2048}, "time_since_restore": 2316.8418955802917, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 2,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 1,
"drop_rate": 0.8156275326399022,
"embedding_dim": 64,
"encoder_ff": 1024,
"encoder_num_layer": 1,
"lr": 0.00010404955423531405,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2002.7163720318651,1719324792,checkpoint_000000,True,False,1,975aa6a0,2024-06-25_16-13-12,64.14145565032959,64.14145565032959,69318,r3i5n6,10.159.8.159,64.14145565032959,1
{"loss": 2002.7163720318651, "timestamp": 1719324792, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "975aa6a0", "date": "2024-06-25_16-13-12", "time_this_iter_s": 64.14145565032959, "time_total_s": 64.14145565032959, "pid": 69318, "hostname": "r3i5n6", "node_ip": "10.159.8.159", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 2, "embedding_dim": 64, "encoder_ff": 1024, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.8156275326399022, "lr": 0.00010404955423531405, "batch_size": 2048}, "time_since_restore": 64.14145565032959, "iterations_since_restore": 1}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment