summaries
69 rows
This data as json, CSV (advanced)
Suggested facets: task_name, total_examples, duration_human
| id ▼ | task_name | model_tag | total_examples | correct | accuracy | no_answer_count | stop_reason_counts | duration_human | pass_k | temperature | top_p | max_tokens | error | model |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_50 | 1319 | 981 | 0.7437452615617892 | 7 | {"stop:-": 1312, "length:-": 7} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_50/actor/huggingface | |
| 2 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_50 | 5000 | 2493 | 0.4986 | 626 | {"stop:-": 4447, "length:-": 552, "stop:Problem:": 1} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_50/actor/huggingface | |
| 3 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_100 | 1319 | 993 | 0.7528430629264594 | 10 | {"stop:-": 1309, "length:-": 10} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_100/actor/huggingface | |
| 4 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_100 | 5000 | 2507 | 0.5014 | 624 | {"stop:-": 4452, "length:-": 546, "stop:Problem:": 2} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_100/actor/huggingface | |
| 5 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_150 | 1319 | 1019 | 0.7725549658832449 | 10 | {"stop:-": 1309, "length:-": 10} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_150/actor/huggingface | |
| 6 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_150 | 5000 | 2501 | 0.5002 | 650 | {"stop:-": 4428, "length:-": 570, "stop:Problem:": 2} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_150/actor/huggingface | |
| 7 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_200 | 1319 | 1034 | 0.7839272175890827 | 7 | {"stop:-": 1312, "length:-": 7} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_200/actor/huggingface | |
| 8 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_200 | 5000 | 2500 | 0.5 | 677 | {"stop:-": 4404, "length:-": 595, "stop:Problem:": 1} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_200/actor/huggingface | |
| 9 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_250 | 1319 | 1053 | 0.7983320697498104 | 9 | {"stop:-": 1310, "length:-": 9} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_250/actor/huggingface | |
| 10 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_250 | 5000 | 2531 | 0.5062 | 625 | {"stop:-": 4443, "length:-": 557} | 2m 37s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_250/actor/huggingface | |
| 11 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_300 | 1319 | 1058 | 0.8021228203184231 | 7 | {"stop:-": 1312, "length:-": 7} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_300/actor/huggingface | |
| 12 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_300 | 5000 | 2516 | 0.5032 | 659 | {"stop:-": 4405, "length:-": 595} | 2m 37s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_300/actor/huggingface | |
| 13 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_350 | 1319 | 1064 | 0.8066717210007581 | 7 | {"stop:-": 1312, "length:-": 7} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_350/actor/huggingface | |
| 14 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_350 | 5000 | 2540 | 0.508 | 662 | {"stop:-": 4407, "length:-": 591, "stop:Problem:": 2} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_350/actor/huggingface | |
| 15 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_400 | 1319 | 1095 | 0.8301743745261562 | 1 | {"stop:-": 1318, "length:-": 1} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_400/actor/huggingface | |
| 16 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_400 | 5000 | 2526 | 0.5052 | 644 | {"stop:-": 4432, "length:-": 567, "stop:Problem:": 1} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_400/actor/huggingface | |
| 17 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_450 | 1319 | 1098 | 0.8324488248673237 | 6 | {"stop:-": 1313, "length:-": 6} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_450/actor/huggingface | |
| 18 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_450 | 5000 | 2570 | 0.514 | 662 | {"stop:-": 4411, "length:-": 586, "stop:Problem:": 3} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_450/actor/huggingface | |
| 19 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_500 | 1319 | 1113 | 0.8438210765731615 | 3 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_500/actor/huggingface | |
| 20 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_500 | 5000 | 2549 | 0.5098 | 667 | {"stop:-": 4412, "length:-": 585, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_500/actor/huggingface | |
| 21 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_550 | 1319 | 1115 | 0.8453373768006065 | 4 | {"stop:-": 1315, "length:-": 4} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_550/actor/huggingface | |
| 22 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_550 | 5000 | 2561 | 0.5122 | 637 | {"stop:-": 4437, "length:-": 558, "stop:Problem:": 5} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_550/actor/huggingface | |
| 23 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_600 | 1319 | 1149 | 0.8711144806671721 | 3 | {"stop:-": 1316, "length:-": 3} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_600/actor/huggingface | |
| 24 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_600 | 5000 | 2547 | 0.5094 | 658 | {"stop:-": 4420, "length:-": 576, "stop:Problem:": 4} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_600/actor/huggingface | |
| 25 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_650 | 1319 | 1152 | 0.8733889310083397 | 3 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_650/actor/huggingface | |
| 26 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_650 | 5000 | 2552 | 0.5104 | 640 | {"stop:-": 4428, "length:-": 570, "stop:Problem:": 2} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_650/actor/huggingface | |
| 27 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_700 | 1319 | 1141 | 0.865049279757392 | 4 | {"stop:-": 1315, "length:-": 4} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_700/actor/huggingface | |
| 28 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_700 | 5000 | 2558 | 0.5116 | 646 | {"stop:-": 4436, "length:-": 560, "stop:Problem:": 4} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_700/actor/huggingface | |
| 29 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_750 | 1319 | 1151 | 0.8726307808946171 | 2 | {"stop:-": 1317, "length:-": 2} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_750/actor/huggingface | |
| 30 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_750 | 5000 | 2532 | 0.5064 | 662 | {"stop:-": 4410, "length:-": 588, "stop:Problem:": 2} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_750/actor/huggingface | |
| 31 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_800 | 1319 | 1169 | 0.8862774829416225 | 3 | {"stop:-": 1315, "length:-": 4} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_800/actor/huggingface | |
| 32 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_800 | 5000 | 2573 | 0.5146 | 665 | {"stop:-": 4413, "length:-": 584, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_800/actor/huggingface | |
| 33 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_850 | 1319 | 1187 | 0.8999241849886277 | 5 | {"stop:-": 1314, "length:-": 5} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_850/actor/huggingface | |
| 34 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_850 | 5000 | 2547 | 0.5094 | 626 | {"stop:-": 4451, "length:-": 545, "stop:Problem:": 4} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_850/actor/huggingface | |
| 35 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_900 | 1319 | 1197 | 0.9075056861258529 | 6 | {"stop:-": 1313, "length:-": 6} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_900/actor/huggingface | |
| 36 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_900 | 5000 | 2535 | 0.507 | 607 | {"stop:-": 4479, "length:-": 519, "stop:Problem:": 2} | 2m 31s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_900/actor/huggingface | |
| 37 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_950 | 1319 | 1213 | 0.9196360879454132 | 2 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_950/actor/huggingface | |
| 38 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_950 | 5000 | 2547 | 0.5094 | 571 | {"stop:-": 4514, "length:-": 485, "stop:Problem:": 1} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_950/actor/huggingface | |
| 39 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1000 | 1319 | 1207 | 0.9150871872630781 | 2 | {"stop:-": 1317, "length:-": 2} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1000/actor/huggingface | |
| 40 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1000 | 5000 | 2574 | 0.5148 | 596 | {"stop:-": 4486, "length:-": 513, "stop:Problem:": 1} | 2m 31s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1000/actor/huggingface | |
| 41 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1050 | 1319 | 1204 | 0.9128127369219106 | 1 | {"stop:-": 1318, "length:-": 1} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1050/actor/huggingface | |
| 42 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1050 | 5000 | 2525 | 0.505 | 644 | {"stop:-": 4439, "length:-": 558, "stop:Problem:": 3} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1050/actor/huggingface | |
| 43 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1100 | 1319 | 1219 | 0.9241849886277483 | 2 | {"stop:-": 1317, "length:-": 2} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1100/actor/huggingface | |
| 44 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1100 | 5000 | 2551 | 0.5102 | 631 | {"stop:-": 4440, "length:-": 557, "stop:Problem:": 3} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1100/actor/huggingface | |
| 45 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1150 | 1319 | 1223 | 0.9272175890826384 | 1 | {"stop:-": 1318, "length:-": 1} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1150/actor/huggingface | |
| 46 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1150 | 5000 | 2572 | 0.5144 | 631 | {"stop:-": 4447, "length:-": 550, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1150/actor/huggingface | |
| 47 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1200 | 1319 | 1232 | 0.934040940106141 | 4 | {"stop:-": 1315, "length:-": 4} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1200/actor/huggingface | |
| 48 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1200 | 5000 | 2559 | 0.5118 | 643 | {"stop:-": 4425, "length:-": 572, "stop:Problem:": 3} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1200/actor/huggingface | |
| 49 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1250 | 1319 | 1241 | 0.9408642911296436 | 1 | {"stop:-": 1318, "length:-": 1} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1250/actor/huggingface | |
| 50 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1250 | 5000 | 2581 | 0.5162 | 628 | {"stop:-": 4446, "length:-": 551, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1250/actor/huggingface | |
| 51 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1300 | 1319 | 1237 | 0.9378316906747536 | 2 | {"stop:-": 1317, "length:-": 2} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1300/actor/huggingface | |
| 52 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1300 | 5000 | 2588 | 0.5176 | 608 | {"stop:-": 4470, "length:-": 527, "stop:Problem:": 3} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1300/actor/huggingface | |
| 53 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1350 | 1319 | 1243 | 0.9423805913570887 | 2 | {"stop:-": 1317, "length:-": 2} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1350/actor/huggingface | |
| 54 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1350 | 5000 | 2577 | 0.5154 | 612 | {"stop:-": 4459, "length:-": 538, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1350/actor/huggingface | |
| 55 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1400 | 1319 | 1249 | 0.9469294920394238 | 0 | {"stop:-": 1319} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1400/actor/huggingface | |
| 56 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1400 | 5000 | 2577 | 0.5154 | 629 | {"stop:-": 4443, "length:-": 554, "stop:Problem:": 3} | 2m 37s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1400/actor/huggingface | |
| 57 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1450 | 1319 | 1247 | 0.9454131918119788 | 0 | {"stop:-": 1319} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1450/actor/huggingface | |
| 58 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1450 | 5000 | 2560 | 0.512 | 663 | {"stop:-": 4404, "length:-": 593, "stop:Problem:": 3} | 2m 37s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1450/actor/huggingface | |
| 59 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1500 | 1319 | 1247 | 0.9454131918119788 | 1 | {"stop:-": 1317, "length:-": 2} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1500/actor/huggingface | |
| 60 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1500 | 5000 | 2589 | 0.5178 | 620 | {"stop:-": 4456, "length:-": 541, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1500/actor/huggingface | |
| 61 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1550 | 1319 | 1264 | 0.9583017437452616 | 0 | {"stop:-": 1319} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1550/actor/huggingface | |
| 62 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1550 | 5000 | 2585 | 0.517 | 633 | {"stop:-": 4445, "length:-": 552, "stop:Problem:": 3} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1550/actor/huggingface | |
| 63 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1600 | 1319 | 1254 | 0.9507202426080363 | 0 | {"stop:-": 1319} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1600/actor/huggingface | |
| 64 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1600 | 5000 | 2596 | 0.5192 | 620 | {"stop:-": 4443, "length:-": 555, "stop:Problem:": 2} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1600/actor/huggingface | |
| 65 | gsm8k_main(0) | grpo-gsm8k-test-30ep-success_global_step_1620 | 1319 | 1255 | 0.9514783927217589 | 2 | {"stop:-": 1317, "length:-": 2} | 22s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1620/actor/huggingface | |
| 66 | hendrycks_math(0) | grpo-gsm8k-test-30ep-success_global_step_1620 | 5000 | 2585 | 0.517 | 633 | {"stop:-": 4439, "length:-": 559, "stop:Problem:": 2} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-gsm8k-test-30ep-success/global_step_1620/actor/huggingface | |
| 67 | gsm8k_main(0) | Qwen_Qwen2.5-1.5B-Instruct | 1319 | 920 | 0.6974981046247157 | 44 | {"stop:-": 1311, "length:-": 8} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/2data/Documents/safetensors/Qwen_Qwen2.5-1.5B-Instruct | |
| 68 | hendrycks_math(0) | Qwen_Qwen2.5-1.5B-Instruct | 5000 | 2457 | 0.4914 | 604 | {"stop:-": 4475, "length:-": 519, "stop:Problem:": 6} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/2data/Documents/safetensors/Qwen_Qwen2.5-1.5B-Instruct | |
| 69 | TOTAL | 34 models | 214846 | 125885 | 0.585931318246558 | 21764 | {} | 1h 45m 38s | 1 | 0.0 | 0.95 | 1024 | Models: grpo-gsm8k-test-30ep-success_global_step_50, grpo-gsm8k-test-30ep-success_global_step_100, grpo-gsm8k-test-30ep-success_global_step_150, grpo-gsm8k-test-30ep-success_global_step_200, grpo-gsm8k-test-30ep-success_global_step_250, grpo-gsm8k-test-30ep-success_global_step_300, grpo-gsm8k-test-30ep-success_global_step_350, grpo-gsm8k-test-30ep-success_global_step_400, grpo-gsm8k-test-30ep-success_global_step_450, grpo-gsm8k-test-30ep-success_global_step_500, grpo-gsm8k-test-30ep-success_global_step_550, grpo-gsm8k-test-30ep-success_global_step_600, grpo-gsm8k-test-30ep-success_global_step_650, grpo-gsm8k-test-30ep-success_global_step_700, grpo-gsm8k-test-30ep-success_global_step_750, grpo-gsm8k-test-30ep-success_global_step_800, grpo-gsm8k-test-30ep-success_global_step_850, grpo-gsm8k-test-30ep-success_global_step_900, grpo-gsm8k-test-30ep-success_global_step_950, grpo-gsm8k-test-30ep-success_global_step_1000, grpo-gsm8k-test-30ep-success_global_step_1050, grpo-gsm8k-test-30ep-success_global_step_1100, grpo-gsm8k-test-30ep-success_global_step_1150, grpo-gsm8k-test-30ep-success_global_step_1200, grpo-gsm8k-test-30ep-success_global_step_1250, grpo-gsm8k-test-30ep-success_global_step_1300, grpo-gsm8k-test-30ep-success_global_step_1350, grpo-gsm8k-test-30ep-success_global_step_1400, grpo-gsm8k-test-30ep-success_global_step_1450, grpo-gsm8k-test-30ep-success_global_step_1500, grpo-gsm8k-test-30ep-success_global_step_1550, grpo-gsm8k-test-30ep-success_global_step_1600, grpo-gsm8k-test-30ep-success_global_step_1620, Qwen_Qwen2.5-1.5B-Instruct | Tasks: gsm8k_main(0), hendrycks_math(0) |
Advanced export
JSON shape: default, array, newline-delimited, object
CREATE TABLE summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_name TEXT NOT NULL,
model_tag TEXT NOT NULL,
total_examples INTEGER,
correct INTEGER,
accuracy REAL,
no_answer_count INTEGER,
stop_reason_counts TEXT,
duration_human TEXT,
pass_k INTEGER,
temperature REAL,
top_p REAL,
max_tokens INTEGER,
error TEXT,
model TEXT NOT NULL
);
CREATE INDEX idx_summaries_model ON summaries(model_tag);
CREATE INDEX idx_summaries_task ON summaries(task_name);