summaries
83 rows
This data as json, CSV (advanced)
Suggested facets: task_name, total_examples, duration_human
| id ▼ | task_name | model_tag | total_examples | correct | accuracy | no_answer_count | stop_reason_counts | duration_human | pass_k | temperature | top_p | max_tokens | error | model |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | gsm8k_main(0) | dsr-one-example-grpo_global_step_50 | 1319 | 945 | 0.7164518574677786 | 8 | {"stop:-": 1313, "length:-": 6} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_50/actor/huggingface | |
| 2 | hendrycks_math(0) | dsr-one-example-grpo_global_step_50 | 5000 | 2462 | 0.4924 | 623 | {"stop:-": 4454, "length:-": 537, "stop:Problem:": 9} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_50/actor/huggingface | |
| 3 | gsm8k_main(0) | dsr-one-example-grpo_global_step_100 | 1319 | 949 | 0.7194844579226687 | 6 | {"stop:-": 1313, "length:-": 6} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_100/actor/huggingface | |
| 4 | hendrycks_math(0) | dsr-one-example-grpo_global_step_100 | 5000 | 2467 | 0.4934 | 617 | {"stop:-": 4457, "length:-": 537, "stop:Problem:": 6} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_100/actor/huggingface | |
| 5 | gsm8k_main(0) | dsr-one-example-grpo_global_step_150 | 1319 | 947 | 0.7179681576952237 | 5 | {"stop:-": 1314, "length:-": 5} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_150/actor/huggingface | |
| 6 | hendrycks_math(0) | dsr-one-example-grpo_global_step_150 | 5000 | 2477 | 0.4954 | 644 | {"stop:-": 4430, "length:-": 564, "stop:Problem:": 6} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_150/actor/huggingface | |
| 7 | gsm8k_main(0) | dsr-one-example-grpo_global_step_200 | 1319 | 967 | 0.733131159969674 | 3 | {"stop:-": 1316, "length:-": 3} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_200/actor/huggingface | |
| 8 | hendrycks_math(0) | dsr-one-example-grpo_global_step_200 | 5000 | 2487 | 0.4974 | 632 | {"stop:-": 4453, "length:-": 539, "stop:Problem:": 8} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_200/actor/huggingface | |
| 9 | gsm8k_main(0) | dsr-one-example-grpo_global_step_250 | 1319 | 955 | 0.7240333586050038 | 6 | {"stop:-": 1313, "length:-": 6} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_250/actor/huggingface | |
| 10 | hendrycks_math(0) | dsr-one-example-grpo_global_step_250 | 5000 | 2466 | 0.4932 | 617 | {"stop:-": 4458, "length:-": 537, "stop:Problem:": 5} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_250/actor/huggingface | |
| 11 | gsm8k_main(0) | dsr-one-example-grpo_global_step_300 | 1319 | 949 | 0.7194844579226687 | 6 | {"stop:-": 1313, "length:-": 6} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_300/actor/huggingface | |
| 12 | hendrycks_math(0) | dsr-one-example-grpo_global_step_300 | 5000 | 2493 | 0.4986 | 624 | {"stop:-": 4451, "length:-": 545, "stop:Problem:": 4} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_300/actor/huggingface | |
| 13 | gsm8k_main(0) | dsr-one-example-grpo_global_step_350 | 1319 | 950 | 0.7202426080363912 | 1 | {"stop:-": 1318, "length:-": 1} | 21s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_350/actor/huggingface | |
| 14 | hendrycks_math(0) | dsr-one-example-grpo_global_step_350 | 5000 | 2490 | 0.498 | 622 | {"stop:-": 4450, "length:-": 538, "stop:Problem:": 12} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_350/actor/huggingface | |
| 15 | gsm8k_main(0) | dsr-one-example-grpo_global_step_400 | 1319 | 948 | 0.7187263078089462 | 3 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_400/actor/huggingface | |
| 16 | hendrycks_math(0) | dsr-one-example-grpo_global_step_400 | 5000 | 2482 | 0.4964 | 623 | {"stop:-": 4457, "length:-": 532, "stop:Problem:": 11} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_400/actor/huggingface | |
| 17 | gsm8k_main(0) | dsr-one-example-grpo_global_step_450 | 1319 | 965 | 0.731614859742229 | 5 | {"stop:-": 1314, "length:-": 5} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_450/actor/huggingface | |
| 18 | hendrycks_math(0) | dsr-one-example-grpo_global_step_450 | 5000 | 2462 | 0.4924 | 580 | {"stop:-": 4490, "length:-": 497, "stop:Problem:": 13} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_450/actor/huggingface | |
| 19 | gsm8k_main(0) | dsr-one-example-grpo_global_step_500 | 1319 | 960 | 0.7278241091736164 | 7 | {"stop:-": 1312, "length:-": 7} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_500/actor/huggingface | |
| 20 | hendrycks_math(0) | dsr-one-example-grpo_global_step_500 | 5000 | 2428 | 0.4856 | 613 | {"stop:-": 4456, "length:-": 531, "stop:Problem:": 13} | 2m 28s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_500/actor/huggingface | |
| 21 | gsm8k_main(0) | dsr-one-example-grpo_global_step_550 | 1319 | 948 | 0.7187263078089462 | 9 | {"stop:-": 1310, "length:-": 9} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_550/actor/huggingface | |
| 22 | hendrycks_math(0) | dsr-one-example-grpo_global_step_550 | 5000 | 2445 | 0.489 | 602 | {"stop:-": 4474, "length:-": 512, "stop:Problem:": 14} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_550/actor/huggingface | |
| 23 | gsm8k_main(0) | dsr-one-example-grpo_global_step_600 | 1319 | 959 | 0.7270659590598939 | 4 | {"stop:-": 1315, "length:-": 4} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_600/actor/huggingface | |
| 24 | hendrycks_math(0) | dsr-one-example-grpo_global_step_600 | 5000 | 2485 | 0.497 | 610 | {"stop:-": 4465, "length:-": 522, "stop:Problem:": 13} | 2m 29s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_600/actor/huggingface | |
| 25 | gsm8k_main(0) | dsr-one-example-grpo_global_step_650 | 1319 | 960 | 0.7278241091736164 | 7 | {"stop:-": 1312, "length:-": 7} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_650/actor/huggingface | |
| 26 | hendrycks_math(0) | dsr-one-example-grpo_global_step_650 | 5000 | 2433 | 0.4866 | 602 | {"stop:-": 4480, "length:-": 507, "stop:Problem:": 13} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_650/actor/huggingface | |
| 27 | gsm8k_main(0) | dsr-one-example-grpo_global_step_700 | 1319 | 964 | 0.7308567096285065 | 8 | {"stop:-": 1311, "length:-": 8} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_700/actor/huggingface | |
| 28 | hendrycks_math(0) | dsr-one-example-grpo_global_step_700 | 5000 | 2431 | 0.4862 | 625 | {"stop:-": 4451, "length:-": 534, "stop:Problem:": 15} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_700/actor/huggingface | |
| 29 | gsm8k_main(0) | dsr-one-example-grpo_global_step_750 | 1319 | 960 | 0.7278241091736164 | 7 | {"stop:-": 1312, "length:-": 7} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_750/actor/huggingface | |
| 30 | hendrycks_math(0) | dsr-one-example-grpo_global_step_750 | 5000 | 2432 | 0.4864 | 652 | {"stop:-": 4414, "length:-": 573, "stop:Problem:": 13} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_750/actor/huggingface | |
| 31 | gsm8k_main(0) | dsr-one-example-grpo_global_step_800 | 1319 | 972 | 0.7369219105382866 | 4 | {"stop:-": 1315, "length:-": 4} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_800/actor/huggingface | |
| 32 | hendrycks_math(0) | dsr-one-example-grpo_global_step_800 | 5000 | 2419 | 0.4838 | 623 | {"stop:-": 4450, "length:-": 533, "stop:Problem:": 17} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_800/actor/huggingface | |
| 33 | gsm8k_main(0) | dsr-one-example-grpo_global_step_850 | 1319 | 967 | 0.733131159969674 | 10 | {"stop:-": 1309, "length:-": 10} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_850/actor/huggingface | |
| 34 | hendrycks_math(0) | dsr-one-example-grpo_global_step_850 | 5000 | 2442 | 0.4884 | 642 | {"stop:-": 4432, "length:-": 552, "stop:Problem:": 16} | 2m 31s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_850/actor/huggingface | |
| 35 | gsm8k_main(0) | dsr-one-example-grpo_global_step_900 | 1319 | 976 | 0.7399545109931767 | 3 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_900/actor/huggingface | |
| 36 | hendrycks_math(0) | dsr-one-example-grpo_global_step_900 | 5000 | 2439 | 0.4878 | 599 | {"stop:-": 4469, "length:-": 516, "stop:Problem:": 15} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_900/actor/huggingface | |
| 37 | gsm8k_main(0) | dsr-one-example-grpo_global_step_950 | 1319 | 971 | 0.7361637604245641 | 5 | {"stop:-": 1314, "length:-": 5} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_950/actor/huggingface | |
| 38 | hendrycks_math(0) | dsr-one-example-grpo_global_step_950 | 5000 | 2458 | 0.4916 | 648 | {"stop:-": 4418, "length:-": 569, "stop:Problem:": 13} | 2m 31s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_950/actor/huggingface | |
| 39 | gsm8k_main(0) | dsr-one-example-grpo_global_step_1000 | 1319 | 979 | 0.7422289613343442 | 5 | {"stop:-": 1314, "length:-": 5} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_1000/actor/huggingface | |
| 40 | hendrycks_math(0) | dsr-one-example-grpo_global_step_1000 | 5000 | 2469 | 0.4938 | 620 | {"stop:-": 4455, "length:-": 524, "stop:Problem:": 21} | 2m 30s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/dsr-one-example-grpo/global_step_1000/actor/huggingface | |
| 41 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_50 | 1319 | 962 | 0.7293404094010614 | 7 | {"stop:-": 1312, "length:-": 7} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_50/actor/huggingface | |
| 42 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_50 | 5000 | 2468 | 0.4936 | 637 | {"stop:-": 4438, "length:-": 559, "stop:Problem:": 3} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_50/actor/huggingface | |
| 43 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_100 | 1319 | 985 | 0.7467778620166793 | 2 | {"stop:-": 1317, "length:-": 2} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_100/actor/huggingface | |
| 44 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_100 | 5000 | 2480 | 0.496 | 636 | {"stop:-": 4441, "length:-": 556, "stop:Problem:": 3} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_100/actor/huggingface | |
| 45 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_150 | 1319 | 982 | 0.7445034116755117 | 5 | {"stop:-": 1314, "length:-": 5} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_150/actor/huggingface | |
| 46 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_150 | 5000 | 2499 | 0.4998 | 633 | {"stop:-": 4453, "length:-": 545, "stop:Problem:": 2} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_150/actor/huggingface | |
| 47 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_200 | 1319 | 986 | 0.7475360121304018 | 6 | {"stop:-": 1313, "length:-": 6} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_200/actor/huggingface | |
| 48 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_200 | 5000 | 2451 | 0.4902 | 667 | {"stop:-": 4416, "length:-": 582, "stop:Problem:": 2} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_200/actor/huggingface | |
| 49 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_250 | 1319 | 969 | 0.734647460197119 | 3 | {"stop:-": 1316, "length:-": 3} | 23s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_250/actor/huggingface | |
| 50 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_250 | 5000 | 2456 | 0.4912 | 630 | {"stop:-": 4452, "length:-": 545, "stop:Problem:": 3} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_250/actor/huggingface | |
| 51 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_300 | 1319 | 989 | 0.7498104624715694 | 4 | {"stop:-": 1315, "length:-": 4} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_300/actor/huggingface | |
| 52 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_300 | 5000 | 2502 | 0.5004 | 620 | {"stop:-": 4460, "length:-": 537, "stop:Problem:": 3} | 2m 31s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_300/actor/huggingface | |
| 53 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_350 | 1319 | 974 | 0.7384382107657316 | 6 | {"stop:-": 1313, "length:-": 6} | 24s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_350/actor/huggingface | |
| 54 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_350 | 5000 | 2493 | 0.4986 | 651 | {"stop:-": 4423, "length:-": 572, "stop:Problem:": 5} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_350/actor/huggingface | |
| 55 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_400 | 1319 | 984 | 0.7460197119029568 | 6 | {"stop:-": 1313, "length:-": 6} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_400/actor/huggingface | |
| 56 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_400 | 5000 | 2501 | 0.5002 | 616 | {"stop:-": 4457, "length:-": 541, "stop:Problem:": 2} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_400/actor/huggingface | |
| 57 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_450 | 1319 | 968 | 0.7338893100833965 | 6 | {"stop:-": 1313, "length:-": 6} | 27s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_450/actor/huggingface | |
| 58 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_450 | 5000 | 2539 | 0.5078 | 597 | {"stop:-": 4481, "length:-": 516, "stop:Problem:": 3} | 2m 33s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_450/actor/huggingface | |
| 59 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_500 | 1319 | 978 | 0.7414708112206216 | 2 | {"stop:-": 1317, "length:-": 2} | 27s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_500/actor/huggingface | |
| 60 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_500 | 5000 | 2474 | 0.4948 | 640 | {"stop:-": 4441, "length:-": 556, "stop:Problem:": 3} | 2m 34s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_500/actor/huggingface | |
| 61 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_550 | 1319 | 983 | 0.7452615617892343 | 3 | {"stop:-": 1316, "length:-": 3} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_550/actor/huggingface | |
| 62 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_550 | 5000 | 2483 | 0.4966 | 600 | {"stop:-": 4480, "length:-": 519, "stop:Problem:": 1} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_550/actor/huggingface | |
| 63 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_600 | 1319 | 985 | 0.7467778620166793 | 4 | {"stop:-": 1315, "length:-": 4} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_600/actor/huggingface | |
| 64 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_600 | 5000 | 2485 | 0.497 | 618 | {"stop:-": 4457, "length:-": 539, "stop:Problem:": 4} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_600/actor/huggingface | |
| 65 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_650 | 1319 | 971 | 0.7361637604245641 | 4 | {"stop:-": 1315, "length:-": 4} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_650/actor/huggingface | |
| 66 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_650 | 5000 | 2463 | 0.4926 | 605 | {"stop:-": 4469, "length:-": 528, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_650/actor/huggingface | |
| 67 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_700 | 1319 | 977 | 0.7407126611068992 | 1 | {"stop:-": 1318, "length:-": 1} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_700/actor/huggingface | |
| 68 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_700 | 5000 | 2473 | 0.4946 | 651 | {"stop:-": 4425, "length:-": 568, "stop:Problem:": 7} | 2m 37s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_700/actor/huggingface | |
| 69 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_750 | 1319 | 976 | 0.7399545109931767 | 4 | {"stop:-": 1315, "length:-": 4} | 27s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_750/actor/huggingface | |
| 70 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_750 | 5000 | 2457 | 0.4914 | 632 | {"stop:-": 4450, "length:-": 547, "stop:Problem:": 3} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_750/actor/huggingface | |
| 71 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_800 | 1319 | 980 | 0.7429871114480667 | 8 | {"stop:-": 1311, "length:-": 8} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_800/actor/huggingface | |
| 72 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_800 | 5000 | 2471 | 0.4942 | 621 | {"stop:-": 4456, "length:-": 542, "stop:Problem:": 2} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_800/actor/huggingface | |
| 73 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_850 | 1319 | 975 | 0.7391963608794542 | 2 | {"stop:-": 1317, "length:-": 2} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_850/actor/huggingface | |
| 74 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_850 | 5000 | 2439 | 0.4878 | 627 | {"stop:-": 4449, "length:-": 547, "stop:Problem:": 4} | 2m 36s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_850/actor/huggingface | |
| 75 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_900 | 1319 | 986 | 0.7475360121304018 | 5 | {"stop:-": 1314, "length:-": 5} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_900/actor/huggingface | |
| 76 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_900 | 5000 | 2474 | 0.4948 | 616 | {"stop:-": 4459, "length:-": 538, "stop:Problem:": 3} | 2m 32s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_900/actor/huggingface | |
| 77 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_950 | 1319 | 958 | 0.7263078089461713 | 6 | {"stop:-": 1313, "length:-": 6} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_950/actor/huggingface | |
| 78 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_950 | 5000 | 2447 | 0.4894 | 663 | {"stop:-": 4420, "length:-": 575, "stop:Problem:": 5} | 2m 40s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_950/actor/huggingface | |
| 79 | gsm8k_main(0) | grpo-one-example-gsm8k_global_step_1000 | 1319 | 979 | 0.7422289613343442 | 3 | {"stop:-": 1316, "length:-": 3} | 25s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_1000/actor/huggingface | |
| 80 | hendrycks_math(0) | grpo-one-example-gsm8k_global_step_1000 | 5000 | 2470 | 0.494 | 665 | {"stop:-": 4414, "length:-": 580, "stop:Problem:": 6} | 2m 40s | 1 | 0.0 | 0.95 | 1024 | /mnt/data8tb/Documents/project/rlvr_winter/verl-my-rlvr/finished-models/grpo-one-example-gsm8k/global_step_1000/actor/huggingface | |
| 81 | gsm8k_main(0) | Qwen_Qwen2.5-1.5B-Instruct | 1319 | 920 | 0.6974981046247157 | 44 | {"stop:-": 1311, "length:-": 8} | 26s | 1 | 0.0 | 0.95 | 1024 | /mnt/2data/Documents/safetensors/Qwen_Qwen2.5-1.5B-Instruct | |
| 82 | hendrycks_math(0) | Qwen_Qwen2.5-1.5B-Instruct | 5000 | 2457 | 0.4914 | 625 | {"stop:-": 4461, "length:-": 533, "stop:Problem:": 6} | 2m 35s | 1 | 0.0 | 0.95 | 1024 | /mnt/2data/Documents/safetensors/Qwen_Qwen2.5-1.5B-Instruct | |
| 83 | TOTAL | 41 models | 259079 | 140807 | 0.5434905955326368 | 25911 | {} | 2h 06m 20s | 1 | 0.0 | 0.95 | 1024 | Models: dsr-one-example-grpo_global_step_50, dsr-one-example-grpo_global_step_100, dsr-one-example-grpo_global_step_150, dsr-one-example-grpo_global_step_200, dsr-one-example-grpo_global_step_250, dsr-one-example-grpo_global_step_300, dsr-one-example-grpo_global_step_350, dsr-one-example-grpo_global_step_400, dsr-one-example-grpo_global_step_450, dsr-one-example-grpo_global_step_500, dsr-one-example-grpo_global_step_550, dsr-one-example-grpo_global_step_600, dsr-one-example-grpo_global_step_650, dsr-one-example-grpo_global_step_700, dsr-one-example-grpo_global_step_750, dsr-one-example-grpo_global_step_800, dsr-one-example-grpo_global_step_850, dsr-one-example-grpo_global_step_900, dsr-one-example-grpo_global_step_950, dsr-one-example-grpo_global_step_1000, grpo-one-example-gsm8k_global_step_50, grpo-one-example-gsm8k_global_step_100, grpo-one-example-gsm8k_global_step_150, grpo-one-example-gsm8k_global_step_200, grpo-one-example-gsm8k_global_step_250, grpo-one-example-gsm8k_global_step_300, grpo-one-example-gsm8k_global_step_350, grpo-one-example-gsm8k_global_step_400, grpo-one-example-gsm8k_global_step_450, grpo-one-example-gsm8k_global_step_500, grpo-one-example-gsm8k_global_step_550, grpo-one-example-gsm8k_global_step_600, grpo-one-example-gsm8k_global_step_650, grpo-one-example-gsm8k_global_step_700, grpo-one-example-gsm8k_global_step_750, grpo-one-example-gsm8k_global_step_800, grpo-one-example-gsm8k_global_step_850, grpo-one-example-gsm8k_global_step_900, grpo-one-example-gsm8k_global_step_950, grpo-one-example-gsm8k_global_step_1000, Qwen_Qwen2.5-1.5B-Instruct | Tasks: gsm8k_main(0), hendrycks_math(0) |
Advanced export
JSON shape: default, array, newline-delimited, object
CREATE TABLE summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_name TEXT NOT NULL,
model_tag TEXT NOT NULL,
total_examples INTEGER,
correct INTEGER,
accuracy REAL,
no_answer_count INTEGER,
stop_reason_counts TEXT,
duration_human TEXT,
pass_k INTEGER,
temperature REAL,
top_p REAL,
max_tokens INTEGER,
error TEXT,
model TEXT NOT NULL
);
CREATE INDEX idx_summaries_model ON summaries(model_tag);
CREATE INDEX idx_summaries_task ON summaries(task_name);