Skip to content
Snippets Groups Projects
Commit 915e419e authored by Léo Schneider's avatar Léo Schneider Committed by Schneider Leo
Browse files

res

parent a790c5c2
No related branches found
No related tags found
No related merge requests found
Showing
with 110 additions and 0 deletions
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 8,
"drop_rate": 0.620662951741282,
"embedding_dim": 1024,
"encoder_ff": 2048,
"encoder_num_layer": 2,
"lr": 0.00347738554616066,
"n_head": 4
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2050.3518681563733,1719540226,checkpoint_000000,True,False,1,aaac0ba4,2024-06-28_04-03-47,859.009360074997,859.009360074997,242786,r8i6n8,10.159.28.66,859.009360074997,1
2052.4004069651205,1719541069,checkpoint_000001,True,False,2,aaac0ba4,2024-06-28_04-17-50,843.1840569972992,1702.1934170722961,242786,r8i6n8,10.159.28.66,1702.1934170722961,2
2052.2435062438485,1719541912,checkpoint_000002,True,False,3,aaac0ba4,2024-06-28_04-31-53,842.8456587791443,2545.0390758514404,242786,r8i6n8,10.159.28.66,2545.0390758514404,3
2050.0776482529527,1719542755,checkpoint_000003,True,False,4,aaac0ba4,2024-06-28_04-45-55,842.6302690505981,3387.6693449020386,242786,r8i6n8,10.159.28.66,3387.6693449020386,4
2056.0218419352855,1719543597,checkpoint_000004,True,False,5,aaac0ba4,2024-06-28_04-59-57,841.9567799568176,4229.626124858856,242786,r8i6n8,10.159.28.66,4229.626124858856,5
2051.6352116141734,1719544438,checkpoint_000005,True,False,6,aaac0ba4,2024-06-28_05-13-59,841.3759407997131,5071.002065658569,242786,r8i6n8,10.159.28.66,5071.002065658569,6
2049.202928918553,1719545280,checkpoint_000006,True,False,7,aaac0ba4,2024-06-28_05-28-01,842.0156388282776,5913.017704486847,242786,r8i6n8,10.159.28.66,5913.017704486847,7
2061.2424547090304,1719546122,checkpoint_000007,True,False,8,aaac0ba4,2024-06-28_05-42-02,841.3607151508331,6754.37841963768,242786,r8i6n8,10.159.28.66,6754.37841963768,8
2040.9990743802289,1719546963,checkpoint_000008,True,False,9,aaac0ba4,2024-06-28_05-56-04,841.6030082702637,7595.981427907944,242786,r8i6n8,10.159.28.66,7595.981427907944,9
2036.6472908080093,1719547805,checkpoint_000009,True,False,10,aaac0ba4,2024-06-28_06-10-05,841.7590441703796,8437.740472078323,242786,r8i6n8,10.159.28.66,8437.740472078323,10
{"loss": 2050.3518681563733, "timestamp": 1719540226, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "aaac0ba4", "date": "2024-06-28_04-03-47", "time_this_iter_s": 859.009360074997, "time_total_s": 859.009360074997, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 859.009360074997, "iterations_since_restore": 1}
{"loss": 2052.4004069651205, "timestamp": 1719541069, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "aaac0ba4", "date": "2024-06-28_04-17-50", "time_this_iter_s": 843.1840569972992, "time_total_s": 1702.1934170722961, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 1702.1934170722961, "iterations_since_restore": 2}
{"loss": 2052.2435062438485, "timestamp": 1719541912, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "aaac0ba4", "date": "2024-06-28_04-31-53", "time_this_iter_s": 842.8456587791443, "time_total_s": 2545.0390758514404, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 2545.0390758514404, "iterations_since_restore": 3}
{"loss": 2050.0776482529527, "timestamp": 1719542755, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "aaac0ba4", "date": "2024-06-28_04-45-55", "time_this_iter_s": 842.6302690505981, "time_total_s": 3387.6693449020386, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 3387.6693449020386, "iterations_since_restore": 4}
{"loss": 2056.0218419352855, "timestamp": 1719543597, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "aaac0ba4", "date": "2024-06-28_04-59-57", "time_this_iter_s": 841.9567799568176, "time_total_s": 4229.626124858856, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 4229.626124858856, "iterations_since_restore": 5}
{"loss": 2051.6352116141734, "timestamp": 1719544438, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "aaac0ba4", "date": "2024-06-28_05-13-59", "time_this_iter_s": 841.3759407997131, "time_total_s": 5071.002065658569, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 5071.002065658569, "iterations_since_restore": 6}
{"loss": 2049.202928918553, "timestamp": 1719545280, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "aaac0ba4", "date": "2024-06-28_05-28-01", "time_this_iter_s": 842.0156388282776, "time_total_s": 5913.017704486847, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 5913.017704486847, "iterations_since_restore": 7}
{"loss": 2061.2424547090304, "timestamp": 1719546122, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "aaac0ba4", "date": "2024-06-28_05-42-02", "time_this_iter_s": 841.3607151508331, "time_total_s": 6754.37841963768, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 6754.37841963768, "iterations_since_restore": 8}
{"loss": 2040.9990743802289, "timestamp": 1719546963, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "aaac0ba4", "date": "2024-06-28_05-56-04", "time_this_iter_s": 841.6030082702637, "time_total_s": 7595.981427907944, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 7595.981427907944, "iterations_since_restore": 9}
{"loss": 2036.6472908080093, "timestamp": 1719547805, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "aaac0ba4", "date": "2024-06-28_06-10-05", "time_this_iter_s": 841.7590441703796, "time_total_s": 8437.740472078323, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.620662951741282, "lr": 0.00347738554616066, "batch_size": 2048}, "time_since_restore": 8437.740472078323, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 1,
"drop_rate": 0.9916390007635185,
"embedding_dim": 256,
"encoder_ff": 512,
"encoder_num_layer": 1,
"lr": 0.02082451872516576,
"n_head": 2
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2619.9586787186267,1719510844,checkpoint_000000,True,False,1,b167b5be,2024-06-27_19-54-04,71.05141735076904,71.05141735076904,242786,r8i6n8,10.159.28.66,71.05141735076904,1
2487.16093865342,1719510902,checkpoint_000001,True,False,2,b167b5be,2024-06-27_19-55-02,58.0530526638031,129.10447001457214,242786,r8i6n8,10.159.28.66,129.10447001457214,2
2400.946604330709,1719510960,checkpoint_000002,True,False,3,b167b5be,2024-06-27_19-56-00,57.610605001449585,186.71507501602173,242786,r8i6n8,10.159.28.66,186.71507501602173,3
2286.2255494125247,1719511017,checkpoint_000003,True,False,4,b167b5be,2024-06-27_19-56-57,57.45701217651367,244.1720871925354,242786,r8i6n8,10.159.28.66,244.1720871925354,4
2179.0624384842517,1719511075,checkpoint_000004,True,False,5,b167b5be,2024-06-27_19-57-55,57.4915931224823,301.6636803150177,242786,r8i6n8,10.159.28.66,301.6636803150177,5
{"loss": 2619.9586787186267, "timestamp": 1719510844, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "b167b5be", "date": "2024-06-27_19-54-04", "time_this_iter_s": 71.05141735076904, "time_total_s": 71.05141735076904, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9916390007635185, "lr": 0.02082451872516576, "batch_size": 2048}, "time_since_restore": 71.05141735076904, "iterations_since_restore": 1}
{"loss": 2487.16093865342, "timestamp": 1719510902, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "b167b5be", "date": "2024-06-27_19-55-02", "time_this_iter_s": 58.0530526638031, "time_total_s": 129.10447001457214, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9916390007635185, "lr": 0.02082451872516576, "batch_size": 2048}, "time_since_restore": 129.10447001457214, "iterations_since_restore": 2}
{"loss": 2400.946604330709, "timestamp": 1719510960, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "b167b5be", "date": "2024-06-27_19-56-00", "time_this_iter_s": 57.610605001449585, "time_total_s": 186.71507501602173, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9916390007635185, "lr": 0.02082451872516576, "batch_size": 2048}, "time_since_restore": 186.71507501602173, "iterations_since_restore": 3}
{"loss": 2286.2255494125247, "timestamp": 1719511017, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "b167b5be", "date": "2024-06-27_19-56-57", "time_this_iter_s": 57.45701217651367, "time_total_s": 244.1720871925354, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9916390007635185, "lr": 0.02082451872516576, "batch_size": 2048}, "time_since_restore": 244.1720871925354, "iterations_since_restore": 4}
{"loss": 2179.0624384842517, "timestamp": 1719511075, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "b167b5be", "date": "2024-06-27_19-57-55", "time_this_iter_s": 57.4915931224823, "time_total_s": 301.6636803150177, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 1, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 2, "drop_rate": 0.9916390007635185, "lr": 0.02082451872516576, "batch_size": 2048}, "time_since_restore": 301.6636803150177, "iterations_since_restore": 5}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 1,
"drop_rate": 0.2473324871734921,
"embedding_dim": 16,
"encoder_ff": 512,
"encoder_num_layer": 2,
"lr": 0.08715018752570974,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2342.1553426427167,1719547872,checkpoint_000000,True,False,1,b2637af1,2024-06-28_06-11-12,66.37161922454834,66.37161922454834,242786,r8i6n8,10.159.28.66,66.37161922454834,1
2230.1564153235727,1719547925,checkpoint_000001,True,False,2,b2637af1,2024-06-28_06-12-05,52.5772385597229,118.94885778427124,242786,r8i6n8,10.159.28.66,118.94885778427124,2
2089.4225843534696,1719547978,checkpoint_000002,True,False,3,b2637af1,2024-06-28_06-12-58,52.95364809036255,171.9025058746338,242786,r8i6n8,10.159.28.66,171.9025058746338,3
2035.8427676703986,1719548030,checkpoint_000003,True,False,4,b2637af1,2024-06-28_06-13-50,52.54171442985535,224.44422030448914,242786,r8i6n8,10.159.28.66,224.44422030448914,4
1988.0233682947835,1719548083,checkpoint_000004,True,False,5,b2637af1,2024-06-28_06-14-43,52.50262928009033,276.94684958457947,242786,r8i6n8,10.159.28.66,276.94684958457947,5
1975.179330900898,1719548135,checkpoint_000005,True,False,6,b2637af1,2024-06-28_06-15-35,52.05119323730469,328.99804282188416,242786,r8i6n8,10.159.28.66,328.99804282188416,6
1963.0230732114296,1719548187,checkpoint_000006,True,False,7,b2637af1,2024-06-28_06-16-27,52.52018332481384,381.518226146698,242786,r8i6n8,10.159.28.66,381.518226146698,7
1959.1778295321728,1719548239,checkpoint_000007,True,False,8,b2637af1,2024-06-28_06-17-19,52.090354681015015,433.608580827713,242786,r8i6n8,10.159.28.66,433.608580827713,8
1956.1374309870203,1719548292,checkpoint_000008,True,False,9,b2637af1,2024-06-28_06-18-12,52.52081227302551,486.1293931007385,242786,r8i6n8,10.159.28.66,486.1293931007385,9
1954.741417591966,1719548344,checkpoint_000009,True,False,10,b2637af1,2024-06-28_06-19-04,52.11372709274292,538.2431201934814,242786,r8i6n8,10.159.28.66,538.2431201934814,10
{"loss": 2342.1553426427167, "timestamp": 1719547872, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "b2637af1", "date": "2024-06-28_06-11-12", "time_this_iter_s": 66.37161922454834, "time_total_s": 66.37161922454834, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 66.37161922454834, "iterations_since_restore": 1}
{"loss": 2230.1564153235727, "timestamp": 1719547925, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "b2637af1", "date": "2024-06-28_06-12-05", "time_this_iter_s": 52.5772385597229, "time_total_s": 118.94885778427124, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 118.94885778427124, "iterations_since_restore": 2}
{"loss": 2089.4225843534696, "timestamp": 1719547978, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "b2637af1", "date": "2024-06-28_06-12-58", "time_this_iter_s": 52.95364809036255, "time_total_s": 171.9025058746338, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 171.9025058746338, "iterations_since_restore": 3}
{"loss": 2035.8427676703986, "timestamp": 1719548030, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "b2637af1", "date": "2024-06-28_06-13-50", "time_this_iter_s": 52.54171442985535, "time_total_s": 224.44422030448914, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 224.44422030448914, "iterations_since_restore": 4}
{"loss": 1988.0233682947835, "timestamp": 1719548083, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "b2637af1", "date": "2024-06-28_06-14-43", "time_this_iter_s": 52.50262928009033, "time_total_s": 276.94684958457947, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 276.94684958457947, "iterations_since_restore": 5}
{"loss": 1975.179330900898, "timestamp": 1719548135, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "b2637af1", "date": "2024-06-28_06-15-35", "time_this_iter_s": 52.05119323730469, "time_total_s": 328.99804282188416, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 328.99804282188416, "iterations_since_restore": 6}
{"loss": 1963.0230732114296, "timestamp": 1719548187, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "b2637af1", "date": "2024-06-28_06-16-27", "time_this_iter_s": 52.52018332481384, "time_total_s": 381.518226146698, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 381.518226146698, "iterations_since_restore": 7}
{"loss": 1959.1778295321728, "timestamp": 1719548239, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "b2637af1", "date": "2024-06-28_06-17-19", "time_this_iter_s": 52.090354681015015, "time_total_s": 433.608580827713, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 433.608580827713, "iterations_since_restore": 8}
{"loss": 1956.1374309870203, "timestamp": 1719548292, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "b2637af1", "date": "2024-06-28_06-18-12", "time_this_iter_s": 52.52081227302551, "time_total_s": 486.1293931007385, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 486.1293931007385, "iterations_since_restore": 9}
{"loss": 1954.741417591966, "timestamp": 1719548344, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "b2637af1", "date": "2024-06-28_06-19-04", "time_this_iter_s": 52.11372709274292, "time_total_s": 538.2431201934814, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.2473324871734921, "lr": 0.08715018752570974, "batch_size": 2048}, "time_since_restore": 538.2431201934814, "iterations_since_restore": 10}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 1,
"drop_rate": 0.40559456476144173,
"embedding_dim": 256,
"encoder_ff": 512,
"encoder_num_layer": 4,
"lr": 0.0012413894518875446,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1965.450800377553,1719490702,checkpoint_000000,True,False,1,b6132594,2024-06-27_14-18-22,88.76505160331726,88.76505160331726,35196,r8i6n8,10.159.28.66,88.76505160331726,1
1968.0043185977486,1719493918,checkpoint_000001,True,False,2,b6132594,2024-06-27_15-11-58,87.441162109375,176.20621371269226,69992,r8i6n8,10.159.28.66,87.441162109375,1
{"loss": 1965.450800377553, "timestamp": 1719490702, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "b6132594", "date": "2024-06-27_14-18-22", "time_this_iter_s": 88.76505160331726, "time_total_s": 88.76505160331726, "pid": 35196, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.40559456476144173, "lr": 0.0012413894518875446, "batch_size": 1024}, "time_since_restore": 88.76505160331726, "iterations_since_restore": 1}
{"loss": 1968.0043185977486, "timestamp": 1719493918, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "b6132594", "date": "2024-06-27_15-11-58", "time_this_iter_s": 87.441162109375, "time_total_s": 176.20621371269226, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.40559456476144173, "lr": 0.0012413894518875446, "batch_size": 1024}, "time_since_restore": 87.441162109375, "iterations_since_restore": 1}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment