Skip to content
Snippets Groups Projects
Commit 915e419e authored by Léo Schneider's avatar Léo Schneider Committed by Schneider Leo
Browse files

res

parent a790c5c2
No related branches found
No related tags found
No related merge requests found
Showing
with 114 additions and 0 deletions
{"loss": 1988.827157088152, "timestamp": 1719497844, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "ce13f5b5", "date": "2024-06-27_16-17-25", "time_this_iter_s": 980.2613203525543, "time_total_s": 980.2613203525543, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.5002709319790926, "lr": 0.0010079586534441948, "batch_size": 2048}, "time_since_restore": 980.2613203525543, "iterations_since_restore": 1}
{"loss": 1978.954319751169, "timestamp": 1719498811, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "ce13f5b5", "date": "2024-06-27_16-33-32", "time_this_iter_s": 966.9953911304474, "time_total_s": 1947.2567114830017, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.5002709319790926, "lr": 0.0010079586534441948, "batch_size": 2048}, "time_since_restore": 1947.2567114830017, "iterations_since_restore": 2}
{"loss": 1980.5698492095225, "timestamp": 1719503758, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "ce13f5b5", "date": "2024-06-27_17-55-58", "time_this_iter_s": 980.9022901058197, "time_total_s": 2928.1590015888214, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.5002709319790926, "lr": 0.0010079586534441948, "batch_size": 2048}, "time_since_restore": 980.9022901058197, "iterations_since_restore": 1}
{"loss": 1991.1704995463213, "timestamp": 1719504722, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "ce13f5b5", "date": "2024-06-27_18-12-02", "time_this_iter_s": 964.2863810062408, "time_total_s": 3892.4453825950623, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.5002709319790926, "lr": 0.0010079586534441948, "batch_size": 2048}, "time_since_restore": 1945.1886711120605, "iterations_since_restore": 2}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 8,
"drop_rate": 0.049653684746237436,
"embedding_dim": 256,
"encoder_ff": 512,
"encoder_num_layer": 8,
"lr": 0.0014072769080781988,
"n_head": 8
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1958.2553874338705,1719492460,checkpoint_000000,True,False,1,d2831dee,2024-06-27_14-47-40,306.8074827194214,306.8074827194214,69992,r8i6n8,10.159.28.66,306.8074827194214,1
1955.7293941467765,1719494783,checkpoint_000001,True,False,2,d2831dee,2024-06-27_15-26-23,309.7399287223816,616.547411441803,69992,r8i6n8,10.159.28.66,309.7399287223816,1
1960.276611328125,1719495627,checkpoint_000002,True,False,3,d2831dee,2024-06-27_15-40-27,308.24083185195923,924.7882432937622,69992,r8i6n8,10.159.28.66,308.24083185195923,1
1955.4441956046999,1719495922,checkpoint_000003,True,True,4,d2831dee,2024-06-27_15-45-22,294.4752688407898,1219.263512134552,69992,r8i6n8,10.159.28.66,602.716100692749,2
{"loss": 1958.2553874338705, "timestamp": 1719492460, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "d2831dee", "date": "2024-06-27_14-47-40", "time_this_iter_s": 306.8074827194214, "time_total_s": 306.8074827194214, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.049653684746237436, "lr": 0.0014072769080781988, "batch_size": 2048}, "time_since_restore": 306.8074827194214, "iterations_since_restore": 1}
{"loss": 1955.7293941467765, "timestamp": 1719494783, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "d2831dee", "date": "2024-06-27_15-26-23", "time_this_iter_s": 309.7399287223816, "time_total_s": 616.547411441803, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.049653684746237436, "lr": 0.0014072769080781988, "batch_size": 2048}, "time_since_restore": 309.7399287223816, "iterations_since_restore": 1}
{"loss": 1960.276611328125, "timestamp": 1719495627, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "d2831dee", "date": "2024-06-27_15-40-27", "time_this_iter_s": 308.24083185195923, "time_total_s": 924.7882432937622, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.049653684746237436, "lr": 0.0014072769080781988, "batch_size": 2048}, "time_since_restore": 308.24083185195923, "iterations_since_restore": 1}
{"loss": 1955.4441956046999, "timestamp": 1719495922, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": true, "training_iteration": 4, "trial_id": "d2831dee", "date": "2024-06-27_15-45-22", "time_this_iter_s": 294.4752688407898, "time_total_s": 1219.263512134552, "pid": 69992, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 256, "encoder_ff": 512, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.049653684746237436, "lr": 0.0014072769080781988, "batch_size": 2048}, "time_since_restore": 602.716100692749, "iterations_since_restore": 2}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 1,
"drop_rate": 0.6203685879053435,
"embedding_dim": 16,
"encoder_ff": 2048,
"encoder_num_layer": 8,
"lr": 0.00010069872409405673,
"n_head": 8
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1976.3505897822342,1719508904,checkpoint_000000,True,False,1,d78b949d,2024-06-27_19-21-44,89.9982442855835,89.9982442855835,242786,r8i6n8,10.159.28.66,89.9982442855835,1
1974.78995543953,1719508980,checkpoint_000001,True,False,2,d78b949d,2024-06-27_19-23-00,76.56657218933105,166.56481647491455,242786,r8i6n8,10.159.28.66,166.56481647491455,2
1974.6597150667446,1719509057,checkpoint_000002,True,False,3,d78b949d,2024-06-27_19-24-17,76.93186378479004,243.4966802597046,242786,r8i6n8,10.159.28.66,243.4966802597046,3
1624.830540454294,1719509134,checkpoint_000003,True,False,4,d78b949d,2024-06-27_19-25-34,76.92754554748535,320.42422580718994,242786,r8i6n8,10.159.28.66,320.42422580718994,4
1135.5229694036047,1719509211,checkpoint_000004,True,False,5,d78b949d,2024-06-27_19-26-51,76.4197883605957,396.84401416778564,242786,r8i6n8,10.159.28.66,396.84401416778564,5
935.2523327925073,1719512025,checkpoint_000005,True,False,6,d78b949d,2024-06-27_20-13-45,90.96137690544128,487.80539107322693,242786,r8i6n8,10.159.28.66,90.96137690544128,1
820.0057286540355,1719512102,checkpoint_000006,True,False,7,d78b949d,2024-06-27_20-15-02,77.09065294265747,564.8960440158844,242786,r8i6n8,10.159.28.66,168.05202984809875,2
743.699236531896,1719512179,checkpoint_000007,True,False,8,d78b949d,2024-06-27_20-16-19,76.6766791343689,641.5727231502533,242786,r8i6n8,10.159.28.66,244.72870898246765,3
700.0018103892409,1719512256,checkpoint_000008,True,False,9,d78b949d,2024-06-27_20-17-36,77.02537822723389,718.5981013774872,242786,r8i6n8,10.159.28.66,321.75408720970154,4
663.1148220272514,1719512333,checkpoint_000009,True,False,10,d78b949d,2024-06-27_20-18-53,77.04872226715088,795.6468236446381,242786,r8i6n8,10.159.28.66,398.8028094768524,5
626.2061512864481,1719513310,checkpoint_000010,True,False,11,d78b949d,2024-06-27_20-35-10,91.49215126037598,887.138974905014,242786,r8i6n8,10.159.28.66,91.49215126037598,1
593.4698202778974,1719513386,checkpoint_000011,True,False,12,d78b949d,2024-06-27_20-36-26,76.48094964027405,963.6199245452881,242786,r8i6n8,10.159.28.66,167.97310090065002,2
573.8050527497539,1719513463,checkpoint_000012,True,False,13,d78b949d,2024-06-27_20-37-43,76.42547011375427,1040.0453946590424,242786,r8i6n8,10.159.28.66,244.3985710144043,3
550.3660424900805,1719513539,checkpoint_000013,True,False,14,d78b949d,2024-06-27_20-38-59,76.87634134292603,1116.9217360019684,242786,r8i6n8,10.159.28.66,321.2749123573303,4
528.3513029804379,1719513616,checkpoint_000014,True,False,15,d78b949d,2024-06-27_20-40-16,76.89268255233765,1193.814418554306,242786,r8i6n8,10.159.28.66,398.16759490966797,5
507.8953131728285,1719513693,checkpoint_000015,True,False,16,d78b949d,2024-06-27_20-41-33,76.85758066177368,1270.6719992160797,242786,r8i6n8,10.159.28.66,475.02517557144165,6
487.3210242564284,1719513770,checkpoint_000016,True,False,17,d78b949d,2024-06-27_20-42-50,76.83648180961609,1347.5084810256958,242786,r8i6n8,10.159.28.66,551.8616573810577,7
475.32467507189654,1719513846,checkpoint_000017,True,False,18,d78b949d,2024-06-27_20-44-06,76.3912570476532,1423.899738073349,242786,r8i6n8,10.159.28.66,628.2529144287109,8
467.49932188499633,1719513923,checkpoint_000018,True,False,19,d78b949d,2024-06-27_20-45-23,76.9852523803711,1500.88499045372,242786,r8i6n8,10.159.28.66,705.238166809082,9
459.3377106433778,1719514000,checkpoint_000019,True,False,20,d78b949d,2024-06-27_20-46-40,76.87024331092834,1577.7552337646484,242786,r8i6n8,10.159.28.66,782.1084101200104,10
{"loss": 1976.3505897822342, "timestamp": 1719508904, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "d78b949d", "date": "2024-06-27_19-21-44", "time_this_iter_s": 89.9982442855835, "time_total_s": 89.9982442855835, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 89.9982442855835, "iterations_since_restore": 1}
{"loss": 1974.78995543953, "timestamp": 1719508980, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "d78b949d", "date": "2024-06-27_19-23-00", "time_this_iter_s": 76.56657218933105, "time_total_s": 166.56481647491455, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 166.56481647491455, "iterations_since_restore": 2}
{"loss": 1974.6597150667446, "timestamp": 1719509057, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "d78b949d", "date": "2024-06-27_19-24-17", "time_this_iter_s": 76.93186378479004, "time_total_s": 243.4966802597046, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 243.4966802597046, "iterations_since_restore": 3}
{"loss": 1624.830540454294, "timestamp": 1719509134, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "d78b949d", "date": "2024-06-27_19-25-34", "time_this_iter_s": 76.92754554748535, "time_total_s": 320.42422580718994, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 320.42422580718994, "iterations_since_restore": 4}
{"loss": 1135.5229694036047, "timestamp": 1719509211, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "d78b949d", "date": "2024-06-27_19-26-51", "time_this_iter_s": 76.4197883605957, "time_total_s": 396.84401416778564, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 396.84401416778564, "iterations_since_restore": 5}
{"loss": 935.2523327925073, "timestamp": 1719512025, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "d78b949d", "date": "2024-06-27_20-13-45", "time_this_iter_s": 90.96137690544128, "time_total_s": 487.80539107322693, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 90.96137690544128, "iterations_since_restore": 1}
{"loss": 820.0057286540355, "timestamp": 1719512102, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "d78b949d", "date": "2024-06-27_20-15-02", "time_this_iter_s": 77.09065294265747, "time_total_s": 564.8960440158844, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 168.05202984809875, "iterations_since_restore": 2}
{"loss": 743.699236531896, "timestamp": 1719512179, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "d78b949d", "date": "2024-06-27_20-16-19", "time_this_iter_s": 76.6766791343689, "time_total_s": 641.5727231502533, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 244.72870898246765, "iterations_since_restore": 3}
{"loss": 700.0018103892409, "timestamp": 1719512256, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "d78b949d", "date": "2024-06-27_20-17-36", "time_this_iter_s": 77.02537822723389, "time_total_s": 718.5981013774872, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 321.75408720970154, "iterations_since_restore": 4}
{"loss": 663.1148220272514, "timestamp": 1719512333, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "d78b949d", "date": "2024-06-27_20-18-53", "time_this_iter_s": 77.04872226715088, "time_total_s": 795.6468236446381, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 398.8028094768524, "iterations_since_restore": 5}
{"loss": 626.2061512864481, "timestamp": 1719513310, "checkpoint_dir_name": "checkpoint_000010", "should_checkpoint": true, "done": false, "training_iteration": 11, "trial_id": "d78b949d", "date": "2024-06-27_20-35-10", "time_this_iter_s": 91.49215126037598, "time_total_s": 887.138974905014, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 91.49215126037598, "iterations_since_restore": 1}
{"loss": 593.4698202778974, "timestamp": 1719513386, "checkpoint_dir_name": "checkpoint_000011", "should_checkpoint": true, "done": false, "training_iteration": 12, "trial_id": "d78b949d", "date": "2024-06-27_20-36-26", "time_this_iter_s": 76.48094964027405, "time_total_s": 963.6199245452881, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 167.97310090065002, "iterations_since_restore": 2}
{"loss": 573.8050527497539, "timestamp": 1719513463, "checkpoint_dir_name": "checkpoint_000012", "should_checkpoint": true, "done": false, "training_iteration": 13, "trial_id": "d78b949d", "date": "2024-06-27_20-37-43", "time_this_iter_s": 76.42547011375427, "time_total_s": 1040.0453946590424, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 244.3985710144043, "iterations_since_restore": 3}
{"loss": 550.3660424900805, "timestamp": 1719513539, "checkpoint_dir_name": "checkpoint_000013", "should_checkpoint": true, "done": false, "training_iteration": 14, "trial_id": "d78b949d", "date": "2024-06-27_20-38-59", "time_this_iter_s": 76.87634134292603, "time_total_s": 1116.9217360019684, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 321.2749123573303, "iterations_since_restore": 4}
{"loss": 528.3513029804379, "timestamp": 1719513616, "checkpoint_dir_name": "checkpoint_000014", "should_checkpoint": true, "done": false, "training_iteration": 15, "trial_id": "d78b949d", "date": "2024-06-27_20-40-16", "time_this_iter_s": 76.89268255233765, "time_total_s": 1193.814418554306, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 398.16759490966797, "iterations_since_restore": 5}
{"loss": 507.8953131728285, "timestamp": 1719513693, "checkpoint_dir_name": "checkpoint_000015", "should_checkpoint": true, "done": false, "training_iteration": 16, "trial_id": "d78b949d", "date": "2024-06-27_20-41-33", "time_this_iter_s": 76.85758066177368, "time_total_s": 1270.6719992160797, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 475.02517557144165, "iterations_since_restore": 6}
{"loss": 487.3210242564284, "timestamp": 1719513770, "checkpoint_dir_name": "checkpoint_000016", "should_checkpoint": true, "done": false, "training_iteration": 17, "trial_id": "d78b949d", "date": "2024-06-27_20-42-50", "time_this_iter_s": 76.83648180961609, "time_total_s": 1347.5084810256958, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 551.8616573810577, "iterations_since_restore": 7}
{"loss": 475.32467507189654, "timestamp": 1719513846, "checkpoint_dir_name": "checkpoint_000017", "should_checkpoint": true, "done": false, "training_iteration": 18, "trial_id": "d78b949d", "date": "2024-06-27_20-44-06", "time_this_iter_s": 76.3912570476532, "time_total_s": 1423.899738073349, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 628.2529144287109, "iterations_since_restore": 8}
{"loss": 467.49932188499633, "timestamp": 1719513923, "checkpoint_dir_name": "checkpoint_000018", "should_checkpoint": true, "done": false, "training_iteration": 19, "trial_id": "d78b949d", "date": "2024-06-27_20-45-23", "time_this_iter_s": 76.9852523803711, "time_total_s": 1500.88499045372, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 705.238166809082, "iterations_since_restore": 9}
{"loss": 459.3377106433778, "timestamp": 1719514000, "checkpoint_dir_name": "checkpoint_000019", "should_checkpoint": true, "done": false, "training_iteration": 20, "trial_id": "d78b949d", "date": "2024-06-27_20-46-40", "time_this_iter_s": 76.87024331092834, "time_total_s": 1577.7552337646484, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 8, "decoder_rt_num_layer": 1, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 8, "drop_rate": 0.6203685879053435, "lr": 0.00010069872409405673, "batch_size": 2048}, "time_since_restore": 782.1084101200104, "iterations_since_restore": 10}
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 4,
"drop_rate": 0.8381852944111963,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 4,
"lr": 0.012410063874557094,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
3407.492049089567,1719525405,checkpoint_000000,True,False,1,dd70a43a,2024-06-27_23-56-45,669.9892313480377,669.9892313480377,242786,r8i6n8,10.159.28.66,669.9892313480377,1
2909.059064729946,1719526061,checkpoint_000001,True,False,2,dd70a43a,2024-06-28_00-07-41,655.8329515457153,1325.822182893753,242786,r8i6n8,10.159.28.66,1325.822182893753,2
2528.305686169722,1719526717,checkpoint_000002,True,False,3,dd70a43a,2024-06-28_00-18-37,656.1143062114716,1981.9364891052246,242786,r8i6n8,10.159.28.66,1981.9364891052246,3
2249.8190595972255,1719527372,checkpoint_000003,True,False,4,dd70a43a,2024-06-28_00-29-33,655.7129034996033,2637.649392604828,242786,r8i6n8,10.159.28.66,2637.649392604828,4
2087.238125057671,1719528028,checkpoint_000004,True,False,5,dd70a43a,2024-06-28_00-40-29,655.7882659435272,3293.437658548355,242786,r8i6n8,10.159.28.66,3293.437658548355,5
2001.7169612373893,1719528684,checkpoint_000005,True,False,6,dd70a43a,2024-06-28_00-51-24,655.2618434429169,3948.699501991272,242786,r8i6n8,10.159.28.66,3948.699501991272,6
1972.5492111566498,1719529337,checkpoint_000006,True,False,7,dd70a43a,2024-06-28_01-02-17,652.9857096672058,4601.685211658478,242786,r8i6n8,10.159.28.66,4601.685211658478,7
1953.9414043276329,1719529988,checkpoint_000007,True,False,8,dd70a43a,2024-06-28_01-13-08,651.4188046455383,5253.104016304016,242786,r8i6n8,10.159.28.66,5253.104016304016,8
1953.827157088152,1719530635,checkpoint_000008,True,False,9,dd70a43a,2024-06-28_01-23-55,646.7677278518677,5899.871744155884,242786,r8i6n8,10.159.28.66,5899.871744155884,9
1955.5640210729885,1719531281,checkpoint_000009,True,False,10,dd70a43a,2024-06-28_01-34-41,645.8664748668671,6545.738219022751,242786,r8i6n8,10.159.28.66,6545.738219022751,10
{"loss": 3407.492049089567, "timestamp": 1719525405, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "dd70a43a", "date": "2024-06-27_23-56-45", "time_this_iter_s": 669.9892313480377, "time_total_s": 669.9892313480377, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 669.9892313480377, "iterations_since_restore": 1}
{"loss": 2909.059064729946, "timestamp": 1719526061, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "dd70a43a", "date": "2024-06-28_00-07-41", "time_this_iter_s": 655.8329515457153, "time_total_s": 1325.822182893753, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 1325.822182893753, "iterations_since_restore": 2}
{"loss": 2528.305686169722, "timestamp": 1719526717, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "dd70a43a", "date": "2024-06-28_00-18-37", "time_this_iter_s": 656.1143062114716, "time_total_s": 1981.9364891052246, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 1981.9364891052246, "iterations_since_restore": 3}
{"loss": 2249.8190595972255, "timestamp": 1719527372, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "dd70a43a", "date": "2024-06-28_00-29-33", "time_this_iter_s": 655.7129034996033, "time_total_s": 2637.649392604828, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 2637.649392604828, "iterations_since_restore": 4}
{"loss": 2087.238125057671, "timestamp": 1719528028, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "dd70a43a", "date": "2024-06-28_00-40-29", "time_this_iter_s": 655.7882659435272, "time_total_s": 3293.437658548355, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 3293.437658548355, "iterations_since_restore": 5}
{"loss": 2001.7169612373893, "timestamp": 1719528684, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "dd70a43a", "date": "2024-06-28_00-51-24", "time_this_iter_s": 655.2618434429169, "time_total_s": 3948.699501991272, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 3948.699501991272, "iterations_since_restore": 6}
{"loss": 1972.5492111566498, "timestamp": 1719529337, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "dd70a43a", "date": "2024-06-28_01-02-17", "time_this_iter_s": 652.9857096672058, "time_total_s": 4601.685211658478, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 4601.685211658478, "iterations_since_restore": 7}
{"loss": 1953.9414043276329, "timestamp": 1719529988, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "dd70a43a", "date": "2024-06-28_01-13-08", "time_this_iter_s": 651.4188046455383, "time_total_s": 5253.104016304016, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 5253.104016304016, "iterations_since_restore": 8}
{"loss": 1953.827157088152, "timestamp": 1719530635, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "dd70a43a", "date": "2024-06-28_01-23-55", "time_this_iter_s": 646.7677278518677, "time_total_s": 5899.871744155884, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 5899.871744155884, "iterations_since_restore": 9}
{"loss": 1955.5640210729885, "timestamp": 1719531281, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "dd70a43a", "date": "2024-06-28_01-34-41", "time_this_iter_s": 645.8664748668671, "time_total_s": 6545.738219022751, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8381852944111963, "lr": 0.012410063874557094, "batch_size": 1024}, "time_since_restore": 6545.738219022751, "iterations_since_restore": 10}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment