Skip to content
Snippets Groups Projects
Commit 915e419e authored by Léo Schneider's avatar Léo Schneider Committed by Schneider Leo
Browse files

res

parent a790c5c2
No related branches found
No related tags found
No related merge requests found
Showing
with 120 additions and 0 deletions
{
"batch_size": 1024,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 4,
"drop_rate": 0.8131411649283373,
"embedding_dim": 64,
"encoder_ff": 2048,
"encoder_num_layer": 4,
"lr": 0.001721462936357609,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2018.4078945850763,1719517449,checkpoint_000000,True,False,1,126476e9,2024-06-27_21-44-09,88.88545632362366,88.88545632362366,242786,r8i6n8,10.159.28.66,88.88545632362366,1
2020.5109916146346,1719517524,checkpoint_000001,True,False,2,126476e9,2024-06-27_21-45-25,75.36214351654053,164.24759984016418,242786,r8i6n8,10.159.28.66,164.24759984016418,2
2015.3791253998525,1719517600,checkpoint_000002,True,False,3,126476e9,2024-06-27_21-46-40,75.27645611763,239.5240559577942,242786,r8i6n8,10.159.28.66,239.5240559577942,3
2016.1682864211675,1719517675,checkpoint_000003,True,False,4,126476e9,2024-06-27_21-47-55,75.18996810913086,314.71402406692505,242786,r8i6n8,10.159.28.66,314.71402406692505,4
2017.8277967558133,1719517750,checkpoint_000004,True,False,5,126476e9,2024-06-27_21-49-10,75.32126784324646,390.0352919101715,242786,r8i6n8,10.159.28.66,390.0352919101715,5
2015.020520788478,1719517826,checkpoint_000005,True,False,6,126476e9,2024-06-27_21-50-26,75.29626226425171,465.3315541744232,242786,r8i6n8,10.159.28.66,465.3315541744232,6
2009.168837659941,1719517901,checkpoint_000006,True,False,7,126476e9,2024-06-27_21-51-41,75.25082015991211,540.5823743343353,242786,r8i6n8,10.159.28.66,540.5823743343353,7
2007.0465179203063,1719517976,checkpoint_000007,True,False,8,126476e9,2024-06-27_21-52-56,75.2639148235321,615.8462891578674,242786,r8i6n8,10.159.28.66,615.8462891578674,8
2014.9992853600208,1719518051,checkpoint_000008,True,False,9,126476e9,2024-06-27_21-54-11,75.32482957839966,691.1711187362671,242786,r8i6n8,10.159.28.66,691.1711187362671,9
2009.24755859375,1719518127,checkpoint_000009,True,False,10,126476e9,2024-06-27_21-55-27,75.30808591842651,766.4792046546936,242786,r8i6n8,10.159.28.66,766.4792046546936,10
{"loss": 2018.4078945850763, "timestamp": 1719517449, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "126476e9", "date": "2024-06-27_21-44-09", "time_this_iter_s": 88.88545632362366, "time_total_s": 88.88545632362366, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 88.88545632362366, "iterations_since_restore": 1}
{"loss": 2020.5109916146346, "timestamp": 1719517524, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "126476e9", "date": "2024-06-27_21-45-25", "time_this_iter_s": 75.36214351654053, "time_total_s": 164.24759984016418, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 164.24759984016418, "iterations_since_restore": 2}
{"loss": 2015.3791253998525, "timestamp": 1719517600, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "126476e9", "date": "2024-06-27_21-46-40", "time_this_iter_s": 75.27645611763, "time_total_s": 239.5240559577942, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 239.5240559577942, "iterations_since_restore": 3}
{"loss": 2016.1682864211675, "timestamp": 1719517675, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "126476e9", "date": "2024-06-27_21-47-55", "time_this_iter_s": 75.18996810913086, "time_total_s": 314.71402406692505, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 314.71402406692505, "iterations_since_restore": 4}
{"loss": 2017.8277967558133, "timestamp": 1719517750, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "126476e9", "date": "2024-06-27_21-49-10", "time_this_iter_s": 75.32126784324646, "time_total_s": 390.0352919101715, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 390.0352919101715, "iterations_since_restore": 5}
{"loss": 2015.020520788478, "timestamp": 1719517826, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "126476e9", "date": "2024-06-27_21-50-26", "time_this_iter_s": 75.29626226425171, "time_total_s": 465.3315541744232, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 465.3315541744232, "iterations_since_restore": 6}
{"loss": 2009.168837659941, "timestamp": 1719517901, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "126476e9", "date": "2024-06-27_21-51-41", "time_this_iter_s": 75.25082015991211, "time_total_s": 540.5823743343353, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 540.5823743343353, "iterations_since_restore": 7}
{"loss": 2007.0465179203063, "timestamp": 1719517976, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "126476e9", "date": "2024-06-27_21-52-56", "time_this_iter_s": 75.2639148235321, "time_total_s": 615.8462891578674, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 615.8462891578674, "iterations_since_restore": 8}
{"loss": 2014.9992853600208, "timestamp": 1719518051, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "126476e9", "date": "2024-06-27_21-54-11", "time_this_iter_s": 75.32482957839966, "time_total_s": 691.1711187362671, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 691.1711187362671, "iterations_since_restore": 9}
{"loss": 2009.24755859375, "timestamp": 1719518127, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "126476e9", "date": "2024-06-27_21-55-27", "time_this_iter_s": 75.30808591842651, "time_total_s": 766.4792046546936, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 4, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 64, "encoder_ff": 2048, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.8131411649283373, "lr": 0.001721462936357609, "batch_size": 1024}, "time_since_restore": 766.4792046546936, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 2048,
"decoder_rt_num_layer": 2,
"drop_rate": 0.6087504471626306,
"embedding_dim": 16,
"encoder_ff": 1024,
"encoder_num_layer": 2,
"lr": 0.0008285643583924775,
"n_head": 16
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
965.1062155896284,1719538848,checkpoint_000000,True,False,1,157597b6,2024-06-28_03-40-48,71.3250789642334,71.3250789642334,242786,r8i6n8,10.159.28.66,71.3250789642334,1
533.6004703551765,1719538906,checkpoint_000001,True,False,2,157597b6,2024-06-28_03-41-46,57.970258474349976,129.29533743858337,242786,r8i6n8,10.159.28.66,129.29533743858337,2
439.40802842988745,1719538964,checkpoint_000002,True,False,3,157597b6,2024-06-28_03-42-44,57.57848072052002,186.8738181591034,242786,r8i6n8,10.159.28.66,186.8738181591034,3
389.7957477719765,1719539022,checkpoint_000003,True,False,4,157597b6,2024-06-28_03-43-42,57.97455716133118,244.84837532043457,242786,r8i6n8,10.159.28.66,244.84837532043457,4
352.0992381178488,1719539079,checkpoint_000004,True,False,5,157597b6,2024-06-28_03-44-39,57.5468053817749,302.3951807022095,242786,r8i6n8,10.159.28.66,302.3951807022095,5
335.8013344111405,1719539137,checkpoint_000005,True,False,6,157597b6,2024-06-28_03-45-37,58.07193851470947,360.46711921691895,242786,r8i6n8,10.159.28.66,360.46711921691895,6
311.3050854299951,1719539195,checkpoint_000006,True,False,7,157597b6,2024-06-28_03-46-35,58.00367760658264,418.4707968235016,242786,r8i6n8,10.159.28.66,418.4707968235016,7
296.5074265847995,1719539252,checkpoint_000007,True,False,8,157597b6,2024-06-28_03-47-32,57.036139488220215,475.5069363117218,242786,r8i6n8,10.159.28.66,475.5069363117218,8
282.12910100981946,1719539310,checkpoint_000008,True,False,9,157597b6,2024-06-28_03-48-30,57.58231806755066,533.0892543792725,242786,r8i6n8,10.159.28.66,533.0892543792725,9
265.69599794402836,1719539367,checkpoint_000009,True,False,10,157597b6,2024-06-28_03-49-27,57.52658176422119,590.6158361434937,242786,r8i6n8,10.159.28.66,590.6158361434937,10
{"loss": 965.1062155896284, "timestamp": 1719538848, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "157597b6", "date": "2024-06-28_03-40-48", "time_this_iter_s": 71.3250789642334, "time_total_s": 71.3250789642334, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 71.3250789642334, "iterations_since_restore": 1}
{"loss": 533.6004703551765, "timestamp": 1719538906, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "157597b6", "date": "2024-06-28_03-41-46", "time_this_iter_s": 57.970258474349976, "time_total_s": 129.29533743858337, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 129.29533743858337, "iterations_since_restore": 2}
{"loss": 439.40802842988745, "timestamp": 1719538964, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "157597b6", "date": "2024-06-28_03-42-44", "time_this_iter_s": 57.57848072052002, "time_total_s": 186.8738181591034, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 186.8738181591034, "iterations_since_restore": 3}
{"loss": 389.7957477719765, "timestamp": 1719539022, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "157597b6", "date": "2024-06-28_03-43-42", "time_this_iter_s": 57.97455716133118, "time_total_s": 244.84837532043457, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 244.84837532043457, "iterations_since_restore": 4}
{"loss": 352.0992381178488, "timestamp": 1719539079, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "157597b6", "date": "2024-06-28_03-44-39", "time_this_iter_s": 57.5468053817749, "time_total_s": 302.3951807022095, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 302.3951807022095, "iterations_since_restore": 5}
{"loss": 335.8013344111405, "timestamp": 1719539137, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "157597b6", "date": "2024-06-28_03-45-37", "time_this_iter_s": 58.07193851470947, "time_total_s": 360.46711921691895, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 360.46711921691895, "iterations_since_restore": 6}
{"loss": 311.3050854299951, "timestamp": 1719539195, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "157597b6", "date": "2024-06-28_03-46-35", "time_this_iter_s": 58.00367760658264, "time_total_s": 418.4707968235016, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 418.4707968235016, "iterations_since_restore": 7}
{"loss": 296.5074265847995, "timestamp": 1719539252, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "157597b6", "date": "2024-06-28_03-47-32", "time_this_iter_s": 57.036139488220215, "time_total_s": 475.5069363117218, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 475.5069363117218, "iterations_since_restore": 8}
{"loss": 282.12910100981946, "timestamp": 1719539310, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "157597b6", "date": "2024-06-28_03-48-30", "time_this_iter_s": 57.58231806755066, "time_total_s": 533.0892543792725, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 533.0892543792725, "iterations_since_restore": 9}
{"loss": 265.69599794402836, "timestamp": 1719539367, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "157597b6", "date": "2024-06-28_03-49-27", "time_this_iter_s": 57.52658176422119, "time_total_s": 590.6158361434937, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 2, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 1024, "decoder_rt_ff": 2048, "decoder_int_ff": 512, "n_head": 16, "drop_rate": 0.6087504471626306, "lr": 0.0008285643583924775, "batch_size": 2048}, "time_since_restore": 590.6158361434937, "iterations_since_restore": 10}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 1024,
"decoder_rt_num_layer": 4,
"drop_rate": 0.28239791341845644,
"embedding_dim": 1024,
"encoder_ff": 512,
"encoder_num_layer": 2,
"lr": 0.0010916820440167283,
"n_head": 1
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
2012.1308699480192,1719499938,checkpoint_000000,True,False,1,1cd059c8,2024-06-27_16-52-19,525.3962342739105,525.3962342739105,242786,r8i6n8,10.159.28.66,525.3962342739105,1
2025.7421327125369,1719500447,checkpoint_000001,True,False,2,1cd059c8,2024-06-27_17-00-47,508.4716944694519,1033.8679287433624,242786,r8i6n8,10.159.28.66,1033.8679287433624,2
{"loss": 2012.1308699480192, "timestamp": 1719499938, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "1cd059c8", "date": "2024-06-27_16-52-19", "time_this_iter_s": 525.3962342739105, "time_total_s": 525.3962342739105, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.28239791341845644, "lr": 0.0010916820440167283, "batch_size": 2048}, "time_since_restore": 525.3962342739105, "iterations_since_restore": 1}
{"loss": 2025.7421327125369, "timestamp": 1719500447, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "1cd059c8", "date": "2024-06-27_17-00-47", "time_this_iter_s": 508.4716944694519, "time_total_s": 1033.8679287433624, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 4, "decoder_int_num_layer": 1, "embedding_dim": 1024, "encoder_ff": 512, "decoder_rt_ff": 1024, "decoder_int_ff": 512, "n_head": 1, "drop_rate": 0.28239791341845644, "lr": 0.0010916820440167283, "batch_size": 2048}, "time_since_restore": 1033.8679287433624, "iterations_since_restore": 2}
{
"batch_size": 2048,
"decoder_int_ff": 512,
"decoder_int_num_layer": 1,
"decoder_rt_ff": 512,
"decoder_rt_num_layer": 8,
"drop_rate": 0.04978685082912593,
"embedding_dim": 16,
"encoder_ff": 2048,
"encoder_num_layer": 2,
"lr": 0.010469557594705402,
"n_head": 4
}
\ No newline at end of file
loss,timestamp,checkpoint_dir_name,should_checkpoint,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
1959.484931525283,1719520982,checkpoint_000000,True,False,1,204212fd,2024-06-27_22-43-02,79.25631952285767,79.25631952285767,242786,r8i6n8,10.159.28.66,79.25631952285767,1
1957.8900761641855,1719521048,checkpoint_000001,True,False,2,204212fd,2024-06-27_22-44-08,65.62229895591736,144.87861847877502,242786,r8i6n8,10.159.28.66,144.87861847877502,2
2003.831690029835,1719521114,checkpoint_000002,True,False,3,204212fd,2024-06-27_22-45-14,65.94834351539612,210.82696199417114,242786,r8i6n8,10.159.28.66,210.82696199417114,3
1953.7773927703618,1719521179,checkpoint_000003,True,False,4,204212fd,2024-06-27_22-46-19,65.82598686218262,276.65294885635376,242786,r8i6n8,10.159.28.66,276.65294885635376,4
1955.5008218119463,1719521245,checkpoint_000004,True,False,5,204212fd,2024-06-27_22-47-25,65.71243810653687,342.3653869628906,242786,r8i6n8,10.159.28.66,342.3653869628906,5
1954.6096777728224,1719521311,checkpoint_000005,True,False,6,204212fd,2024-06-27_22-48-31,65.39359998703003,407.75898694992065,242786,r8i6n8,10.159.28.66,407.75898694992065,6
1970.7395346333662,1719521376,checkpoint_000006,True,False,7,204212fd,2024-06-27_22-49-36,65.79213690757751,473.55112385749817,242786,r8i6n8,10.159.28.66,473.55112385749817,7
1975.91074161079,1719521442,checkpoint_000007,True,False,8,204212fd,2024-06-27_22-50-42,65.49610257148743,539.0472264289856,242786,r8i6n8,10.159.28.66,539.0472264289856,8
1954.2565735343874,1719521508,checkpoint_000008,True,False,9,204212fd,2024-06-27_22-51-48,65.86425971984863,604.9114861488342,242786,r8i6n8,10.159.28.66,604.9114861488342,9
1955.8791032926304,1719521574,checkpoint_000009,True,False,10,204212fd,2024-06-27_22-52-54,65.99194502830505,670.9034311771393,242786,r8i6n8,10.159.28.66,670.9034311771393,10
{"loss": 1959.484931525283, "timestamp": 1719520982, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 1, "trial_id": "204212fd", "date": "2024-06-27_22-43-02", "time_this_iter_s": 79.25631952285767, "time_total_s": 79.25631952285767, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 79.25631952285767, "iterations_since_restore": 1}
{"loss": 1957.8900761641855, "timestamp": 1719521048, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": false, "training_iteration": 2, "trial_id": "204212fd", "date": "2024-06-27_22-44-08", "time_this_iter_s": 65.62229895591736, "time_total_s": 144.87861847877502, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 144.87861847877502, "iterations_since_restore": 2}
{"loss": 2003.831690029835, "timestamp": 1719521114, "checkpoint_dir_name": "checkpoint_000002", "should_checkpoint": true, "done": false, "training_iteration": 3, "trial_id": "204212fd", "date": "2024-06-27_22-45-14", "time_this_iter_s": 65.94834351539612, "time_total_s": 210.82696199417114, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 210.82696199417114, "iterations_since_restore": 3}
{"loss": 1953.7773927703618, "timestamp": 1719521179, "checkpoint_dir_name": "checkpoint_000003", "should_checkpoint": true, "done": false, "training_iteration": 4, "trial_id": "204212fd", "date": "2024-06-27_22-46-19", "time_this_iter_s": 65.82598686218262, "time_total_s": 276.65294885635376, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 276.65294885635376, "iterations_since_restore": 4}
{"loss": 1955.5008218119463, "timestamp": 1719521245, "checkpoint_dir_name": "checkpoint_000004", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "204212fd", "date": "2024-06-27_22-47-25", "time_this_iter_s": 65.71243810653687, "time_total_s": 342.3653869628906, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 342.3653869628906, "iterations_since_restore": 5}
{"loss": 1954.6096777728224, "timestamp": 1719521311, "checkpoint_dir_name": "checkpoint_000005", "should_checkpoint": true, "done": false, "training_iteration": 6, "trial_id": "204212fd", "date": "2024-06-27_22-48-31", "time_this_iter_s": 65.39359998703003, "time_total_s": 407.75898694992065, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 407.75898694992065, "iterations_since_restore": 6}
{"loss": 1970.7395346333662, "timestamp": 1719521376, "checkpoint_dir_name": "checkpoint_000006", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "204212fd", "date": "2024-06-27_22-49-36", "time_this_iter_s": 65.79213690757751, "time_total_s": 473.55112385749817, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 473.55112385749817, "iterations_since_restore": 7}
{"loss": 1975.91074161079, "timestamp": 1719521442, "checkpoint_dir_name": "checkpoint_000007", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "204212fd", "date": "2024-06-27_22-50-42", "time_this_iter_s": 65.49610257148743, "time_total_s": 539.0472264289856, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 539.0472264289856, "iterations_since_restore": 8}
{"loss": 1954.2565735343874, "timestamp": 1719521508, "checkpoint_dir_name": "checkpoint_000008", "should_checkpoint": true, "done": false, "training_iteration": 9, "trial_id": "204212fd", "date": "2024-06-27_22-51-48", "time_this_iter_s": 65.86425971984863, "time_total_s": 604.9114861488342, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 604.9114861488342, "iterations_since_restore": 9}
{"loss": 1955.8791032926304, "timestamp": 1719521574, "checkpoint_dir_name": "checkpoint_000009", "should_checkpoint": true, "done": false, "training_iteration": 10, "trial_id": "204212fd", "date": "2024-06-27_22-52-54", "time_this_iter_s": 65.99194502830505, "time_total_s": 670.9034311771393, "pid": 242786, "hostname": "r8i6n8", "node_ip": "10.159.28.66", "config": {"encoder_num_layer": 2, "decoder_rt_num_layer": 8, "decoder_int_num_layer": 1, "embedding_dim": 16, "encoder_ff": 2048, "decoder_rt_ff": 512, "decoder_int_ff": 512, "n_head": 4, "drop_rate": 0.04978685082912593, "lr": 0.010469557594705402, "batch_size": 2048}, "time_since_restore": 670.9034311771393, "iterations_since_restore": 10}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment