[Performance] Faster _get_item#1288
Merged
vmoens merged 1 commit intogh/vmoens/51/basefrom Apr 17, 2025
Merged
Conversation
Contributor
|
| Name | Max | Mean | Ops | Ops on Repo HEAD
|
Change |
|---|---|---|---|---|---|
| test_plain_set_nested | 48.0600μs | 11.2876μs | 88.5928 KOps/s | 88.1983 KOps/s | |
| test_plain_set_stack_nested | 30.0800μs | 11.4308μs | 87.4830 KOps/s | 87.9144 KOps/s | |
| test_plain_set_nested_inplace | 42.5600μs | 12.4488μs | 80.3292 KOps/s | 80.0719 KOps/s | |
| test_plain_set_stack_nested_inplace | 40.7510μs | 12.3835μs | 80.7528 KOps/s | 80.6972 KOps/s | |
| test_items | 21.3600μs | 2.9391μs | 340.2434 KOps/s | 343.4497 KOps/s | |
| test_items_nested | 0.3881ms | 0.3588ms | 2.7867 KOps/s | 2.7520 KOps/s | |
| test_items_nested_locked | 0.4416ms | 0.3602ms | 2.7765 KOps/s | 2.7270 KOps/s | |
| test_items_nested_leaf | 88.2510μs | 60.5628μs | 16.5118 KOps/s | 16.5350 KOps/s | |
| test_items_stack_nested | 0.4217ms | 0.3602ms | 2.7760 KOps/s | 2.7065 KOps/s | |
| test_items_stack_nested_leaf | 82.0110μs | 60.6773μs | 16.4806 KOps/s | 16.4886 KOps/s | |
| test_items_stack_nested_locked | 0.4203ms | 0.3606ms | 2.7732 KOps/s | 2.7134 KOps/s | |
| test_keys | 66.6910μs | 3.9286μs | 254.5425 KOps/s | 288.6116 KOps/s | |
| test_keys_nested | 0.1254ms | 87.8920μs | 11.3776 KOps/s | 11.2331 KOps/s | |
| test_keys_nested_locked | 0.8195ms | 93.9604μs | 10.6428 KOps/s | 10.4902 KOps/s | |
| test_keys_nested_leaf | 0.1224ms | 79.3350μs | 12.6048 KOps/s | 12.5225 KOps/s | |
| test_keys_stack_nested | 0.1299ms | 87.4407μs | 11.4363 KOps/s | 11.1932 KOps/s | |
| test_keys_stack_nested_leaf | 0.1220ms | 78.4779μs | 12.7424 KOps/s | 12.4492 KOps/s | |
| test_keys_stack_nested_locked | 0.1349ms | 93.3558μs | 10.7117 KOps/s | 10.5507 KOps/s | |
| test_values | 6.6700μs | 0.8547μs | 1.1700 MOps/s | 1.1672 MOps/s | |
| test_values_nested | 71.9310μs | 37.7296μs | 26.5044 KOps/s | 26.5630 KOps/s | |
| test_values_nested_locked | 97.4710μs | 39.5888μs | 25.2597 KOps/s | 25.1484 KOps/s | |
| test_values_nested_leaf | 70.1110μs | 42.7547μs | 23.3893 KOps/s | 23.3155 KOps/s | |
| test_values_stack_nested | 68.3210μs | 37.8618μs | 26.4119 KOps/s | 26.3393 KOps/s | |
| test_values_stack_nested_leaf | 69.0310μs | 42.9054μs | 23.3071 KOps/s | 23.2580 KOps/s | |
| test_values_stack_nested_locked | 70.6710μs | 40.0552μs | 24.9655 KOps/s | 25.0675 KOps/s | |
| test_membership | 3.9686μs | 0.4988μs | 2.0049 MOps/s | 1.9950 MOps/s | |
| test_membership_nested | 14.3050μs | 1.9872μs | 503.2090 KOps/s | 485.1320 KOps/s | |
| test_membership_nested_leaf | 14.0455μs | 2.0163μs | 495.9602 KOps/s | 495.4655 KOps/s | |
| test_membership_stacked_nested | 24.8800μs | 2.0341μs | 491.6189 KOps/s | 483.9398 KOps/s | |
| test_membership_stacked_nested_leaf | 28.6900μs | 2.0305μs | 492.5003 KOps/s | 491.4090 KOps/s | |
| test_membership_nested_last | 39.9300μs | 3.0282μs | 330.2330 KOps/s | 324.1073 KOps/s | |
| test_membership_nested_leaf_last | 66.1510μs | 3.0203μs | 331.0962 KOps/s | 326.2141 KOps/s | |
| test_membership_stacked_nested_last | 33.1110μs | 3.0048μs | 332.7999 KOps/s | 324.9040 KOps/s | |
| test_membership_stacked_nested_leaf_last | 34.1210μs | 3.0237μs | 330.7230 KOps/s | 326.3436 KOps/s | |
| test_nested_getleaf | 40.5010μs | 12.9856μs | 77.0082 KOps/s | 76.3140 KOps/s | |
| test_nested_get | 78.4710μs | 12.3679μs | 80.8547 KOps/s | 80.8452 KOps/s | |
| test_stacked_getleaf | 46.0510μs | 13.0094μs | 76.8673 KOps/s | 76.9830 KOps/s | |
| test_stacked_get | 31.0510μs | 12.2091μs | 81.9059 KOps/s | 80.4098 KOps/s | |
| test_nested_getitemleaf | 37.1700μs | 13.3882μs | 74.6927 KOps/s | 74.8070 KOps/s | |
| test_nested_getitem | 37.4100μs | 12.7524μs | 78.4166 KOps/s | 78.6500 KOps/s | |
| test_stacked_getitemleaf | 34.9710μs | 13.3846μs | 74.7128 KOps/s | 74.8303 KOps/s | |
| test_stacked_getitem | 38.5000μs | 12.4675μs | 80.2084 KOps/s | 78.4898 KOps/s | |
| test_lock_nested | 0.8313ms | 0.3540ms | 2.8246 KOps/s | 2.8833 KOps/s | |
| test_lock_stack_nested | 0.3830ms | 0.3433ms | 2.9127 KOps/s | 2.9240 KOps/s | |
| test_unlock_nested | 0.5067ms | 0.2955ms | 3.3846 KOps/s | 3.4794 KOps/s | |
| test_unlock_stack_nested | 0.3140ms | 0.2831ms | 3.5320 KOps/s | 3.5803 KOps/s | |
| test_flatten_speed | 0.1165ms | 77.0379μs | 12.9806 KOps/s | 13.0580 KOps/s | |
| test_unflatten_speed | 0.4656ms | 0.3972ms | 2.5177 KOps/s | 2.5120 KOps/s | |
| test_common_ops | 0.8786ms | 0.6284ms | 1.5912 KOps/s | 1.5989 KOps/s | |
| test_creation | 0.1815ms | 1.7097μs | 584.8818 KOps/s | 583.2323 KOps/s | |
| test_creation_empty | 0.8029ms | 7.0054μs | 142.7467 KOps/s | 141.8592 KOps/s | |
| test_creation_nested_1 | 0.1999ms | 9.9381μs | 100.6228 KOps/s | 100.7540 KOps/s | |
| test_creation_nested_2 | 0.1036ms | 12.7433μs | 78.4723 KOps/s | 77.6816 KOps/s | |
| test_clone | 80.8810μs | 10.9530μs | 91.2992 KOps/s | 93.1783 KOps/s | |
| test_getitem[int] | 0.1575ms | 10.2912μs | 97.1708 KOps/s | 95.6069 KOps/s | |
| test_getitem[slice_int] | 0.1138ms | 19.9519μs | 50.1206 KOps/s | 47.7606 KOps/s | |
| test_getitem[range] | 0.1368ms | 37.2879μs | 26.8183 KOps/s | 25.1851 KOps/s | |
| test_getitem[tuple] | 0.1939ms | 17.6590μs | 56.6283 KOps/s | 55.5713 KOps/s | |
| test_getitem[list] | 0.1288ms | 33.2672μs | 30.0596 KOps/s | 29.5081 KOps/s | |
| test_setitem_dim[int] | 0.1347ms | 18.9874μs | 52.6665 KOps/s | 50.3883 KOps/s | |
| test_setitem_dim[slice_int] | 60.4310μs | 37.5852μs | 26.6062 KOps/s | 26.2278 KOps/s | |
| test_setitem_dim[range] | 98.3510μs | 52.0962μs | 19.1953 KOps/s | 18.0956 KOps/s | |
| test_setitem_dim[tuple] | 53.2310μs | 31.2331μs | 32.0173 KOps/s | 30.0832 KOps/s | |
| test_setitem | 0.2138ms | 15.6434μs | 63.9246 KOps/s | 64.6506 KOps/s | |
| test_set | 0.2127ms | 15.2160μs | 65.7205 KOps/s | 67.2031 KOps/s | |
| test_set_shared | 0.5192ms | 0.1577ms | 6.3393 KOps/s | 6.3099 KOps/s | |
| test_update | 0.4176ms | 18.4725μs | 54.1345 KOps/s | 55.2185 KOps/s | |
| test_update_nested | 0.1187ms | 28.8410μs | 34.6728 KOps/s | 34.9929 KOps/s | |
| test_update__nested | 70.0710μs | 25.4557μs | 39.2839 KOps/s | 38.9776 KOps/s | |
| test_set_nested | 0.1379ms | 16.4721μs | 60.7088 KOps/s | 63.4043 KOps/s | |
| test_set_nested_new | 0.1139ms | 19.0783μs | 52.4157 KOps/s | 52.7058 KOps/s | |
| test_select | 0.1274ms | 31.1634μs | 32.0889 KOps/s | 32.1437 KOps/s | |
| test_select_nested | 77.3910μs | 42.7895μs | 23.3702 KOps/s | 22.8609 KOps/s | |
| test_exclude_nested | 89.9610μs | 61.3031μs | 16.3124 KOps/s | 16.1029 KOps/s | |
| test_empty[True] | 0.3993ms | 0.2920ms | 3.4246 KOps/s | 3.4049 KOps/s | |
| test_empty[False] | 3.4771μs | 0.8080μs | 1.2377 MOps/s | 1.2223 MOps/s | |
| test_to | 88.4920μs | 59.2080μs | 16.8896 KOps/s | 17.1870 KOps/s | |
| test_to_nonblocking | 0.1961ms | 49.5292μs | 20.1901 KOps/s | 19.1920 KOps/s | |
| test_unbind_speed | 0.8760ms | 0.2401ms | 4.1651 KOps/s | 4.1923 KOps/s | |
| test_unbind_speed_stack0 | 0.2816ms | 0.2409ms | 4.1511 KOps/s | 4.2661 KOps/s | |
| test_unbind_speed_stack1 | 95.9333ms | 0.7433ms | 1.3453 KOps/s | 1.3464 KOps/s | |
| test_split | 94.8847ms | 1.5816ms | 632.2764 Ops/s | 617.6797 Ops/s | |
| test_chunk | 99.8207ms | 1.5932ms | 627.6745 Ops/s | 614.4800 Ops/s | |
| test_consolidate[False-None] | 99.3116ms | 3.0611ms | 326.6794 Ops/s | 328.5690 Ops/s | |
| test_consolidate[default-None] | 1.8271ms | 1.6785ms | 595.7744 Ops/s | 588.7234 Ops/s | |
| test_consolidate[reduce-overhead-None] | 1.8648ms | 1.7163ms | 582.6610 Ops/s | 578.6402 Ops/s | |
| test_consolidate_njt[False-None] | 6.8052ms | 6.4463ms | 155.1271 Ops/s | 155.2273 Ops/s | |
| test_to[False-False-None] | 1.9607ms | 1.7858ms | 559.9790 Ops/s | 564.0883 Ops/s | |
| test_to[True-False-None] | 1.5046ms | 1.3838ms | 722.6697 Ops/s | 719.3109 Ops/s | |
| test_to[within-False-None] | 4.4170ms | 4.2706ms | 234.1585 Ops/s | 233.8602 Ops/s | |
| test_to[True-default-None] | 5.4019ms | 5.1849ms | 192.8678 Ops/s | 191.5057 Ops/s | |
| test_to_njt[False-False-None] | 7.2741ms | 6.9208ms | 144.4919 Ops/s | 145.3772 Ops/s | |
| test_to_njt[True-False-None] | 5.5314ms | 5.3917ms | 185.4703 Ops/s | 185.1955 Ops/s | |
| test_to_njt[within-False-None] | 0.3233s | 15.8378ms | 63.1402 Ops/s | 83.4748 Ops/s | |
| test_creation[device0] | 0.4651ms | 78.1292μs | 12.7993 KOps/s | 12.8573 KOps/s | |
| test_creation_from_tensor | 0.5398ms | 81.1089μs | 12.3291 KOps/s | 12.1720 KOps/s | |
| test_add_one[memmap_tensor0] | 0.4123ms | 6.7107μs | 149.0150 KOps/s | 148.5790 KOps/s | |
| test_contiguous[memmap_tensor0] | 2.3695μs | 0.3954μs | 2.5292 MOps/s | 2.4888 MOps/s | |
| test_stack[memmap_tensor0] | 30.7000μs | 4.1730μs | 239.6344 KOps/s | 234.3215 KOps/s | |
| test_memmaptd_index | 1.7484ms | 0.2378ms | 4.2055 KOps/s | 4.0802 KOps/s | |
| test_memmaptd_index_astensor | 0.4572ms | 0.3006ms | 3.3271 KOps/s | 3.2644 KOps/s | |
| test_memmaptd_index_op | 0.9701ms | 0.5453ms | 1.8339 KOps/s | 1.8125 KOps/s | |
| test_serialize_model | 0.1332s | 0.1323s | 7.5596 Ops/s | 7.5573 Ops/s | |
| test_serialize_model_pickle | 1.3474s | 1.1875s | 0.8421 Ops/s | 0.8214 Ops/s | |
| test_serialize_weights | 0.1316s | 0.1310s | 7.6365 Ops/s | 7.5723 Ops/s | |
| test_serialize_weights_returnearly | 0.3105s | 52.6205ms | 19.0040 Ops/s | 14.7155 Ops/s | |
| test_serialize_weights_pickle | 1.3874s | 1.1931s | 0.8382 Ops/s | 0.8217 Ops/s | |
| test_reshape_pytree | 52.9410μs | 22.1249μs | 45.1979 KOps/s | 45.0255 KOps/s | |
| test_reshape_td | 53.0510μs | 28.2520μs | 35.3958 KOps/s | 38.5940 KOps/s | |
| test_view_pytree | 89.2710μs | 22.1137μs | 45.2209 KOps/s | 46.7526 KOps/s | |
| test_view_td | 0.1606ms | 34.1611μs | 29.2730 KOps/s | 29.2371 KOps/s | |
| test_unbind_pytree | 0.1175ms | 27.7086μs | 36.0899 KOps/s | 35.8258 KOps/s | |
| test_unbind_td | 0.7988ms | 37.0922μs | 26.9598 KOps/s | 25.5829 KOps/s | |
| test_split_pytree | 0.1127ms | 29.1937μs | 34.2539 KOps/s | 32.5563 KOps/s | |
| test_split_td | 0.1836ms | 38.0058μs | 26.3118 KOps/s | 23.0204 KOps/s | |
| test_add_pytree | 68.4710μs | 36.8419μs | 27.1430 KOps/s | 27.9065 KOps/s | |
| test_add_td | 0.2890ms | 50.9628μs | 19.6222 KOps/s | 19.3048 KOps/s | |
| test_compile_add_one_nested[tensordict-compile] | 0.1859ms | 0.1241ms | 8.0587 KOps/s | 7.9022 KOps/s | |
| test_compile_add_one_nested[tensordict-eager] | 0.2861ms | 0.1434ms | 6.9731 KOps/s | 7.0170 KOps/s | |
| test_compile_add_one_nested[pytree-compile] | 0.2666ms | 97.2410μs | 10.2837 KOps/s | 10.4046 KOps/s | |
| test_compile_add_one_nested[pytree-eager] | 1.7175ms | 0.1570ms | 6.3686 KOps/s | 6.7199 KOps/s | |
| test_compile_copy_nested[tensordict-compile] | 77.5510μs | 24.9066μs | 40.1501 KOps/s | 39.9380 KOps/s | |
| test_compile_copy_nested[tensordict-eager] | 63.8900μs | 34.5825μs | 28.9164 KOps/s | 28.6005 KOps/s | |
| test_compile_copy_nested[pytree-compile] | 0.3100ms | 62.8078μs | 15.9216 KOps/s | 15.6459 KOps/s | |
| test_compile_copy_nested[pytree-eager] | 78.5710μs | 48.0086μs | 20.8296 KOps/s | 20.1921 KOps/s | |
| test_compile_add_one_flat[tensordict-compile] | 0.2113ms | 0.1417ms | 7.0562 KOps/s | 7.0435 KOps/s | |
| test_compile_add_one_flat[tensordict-eager] | 0.3307ms | 0.2181ms | 4.5860 KOps/s | 4.5877 KOps/s | |
| test_compile_add_one_flat[tensorclass-compile] | 0.1590ms | 96.1986μs | 10.3952 KOps/s | 10.4351 KOps/s | |
| test_compile_add_one_flat[tensorclass-eager] | 0.2514ms | 62.0721μs | 16.1103 KOps/s | 17.6107 KOps/s | |
| test_compile_add_one_flat[pytree-compile] | 0.1864ms | 0.1358ms | 7.3612 KOps/s | 7.4172 KOps/s | |
| test_compile_add_one_flat[pytree-eager] | 0.6333ms | 0.4843ms | 2.0648 KOps/s | 2.0918 KOps/s | |
| test_compile_add_self_flat[tensordict-eager] | 0.4544ms | 0.2654ms | 3.7685 KOps/s | 3.7908 KOps/s | |
| test_compile_add_self_flat[tensordict-compile] | 0.1911ms | 0.1428ms | 7.0020 KOps/s | 7.0441 KOps/s | |
| test_compile_add_self_flat[tensorclass-eager] | 0.2081ms | 69.8316μs | 14.3202 KOps/s | 14.6125 KOps/s | |
| test_compile_add_self_flat[tensorclass-compile] | 0.2545ms | 0.1037ms | 9.6429 KOps/s | 10.4184 KOps/s | |
| test_compile_add_self_flat[pytree-eager] | 0.5954ms | 0.4258ms | 2.3484 KOps/s | 2.4357 KOps/s | |
| test_compile_add_self_flat[pytree-compile] | 0.2091ms | 0.1427ms | 7.0067 KOps/s | 7.5066 KOps/s | |
| test_compile_copy_flat[tensordict-compile] | 0.1971ms | 20.4188μs | 48.9746 KOps/s | 53.0272 KOps/s | |
| test_compile_copy_flat[tensordict-eager] | 98.8510μs | 31.9568μs | 31.2923 KOps/s | 31.3872 KOps/s | |
| test_compile_copy_flat[pytree-compile] | 0.2332ms | 69.3687μs | 14.4157 KOps/s | 14.4565 KOps/s | |
| test_compile_copy_flat[pytree-eager] | 0.1436ms | 51.8862μs | 19.2729 KOps/s | 19.2388 KOps/s | |
| test_compile_assign_and_add[tensordict-compile] | 1.6201ms | 0.3969ms | 2.5193 KOps/s | 2.2340 KOps/s | |
| test_compile_assign_and_add[tensordict-eager] | 3.1890ms | 2.7801ms | 359.6938 Ops/s | 363.3327 Ops/s | |
| test_compile_assign_and_add[pytree-compile] | 1.5997ms | 0.4336ms | 2.3064 KOps/s | 2.3019 KOps/s | |
| test_compile_assign_and_add[pytree-eager] | 2.8788ms | 2.6634ms | 375.4648 Ops/s | 382.5620 Ops/s | |
| test_compile_indexing[tensor-tensordict-compile] | 0.2574ms | 0.1117ms | 8.9551 KOps/s | 9.0255 KOps/s | |
| test_compile_indexing[tensor-tensordict-eager] | 0.5547ms | 84.0941μs | 11.8914 KOps/s | 12.1575 KOps/s | |
| test_compile_indexing[tensor-tensorclass-compile] | 0.6462ms | 0.1097ms | 9.1149 KOps/s | 9.4452 KOps/s | |
| test_compile_indexing[tensor-tensorclass-eager] | 0.2332ms | 67.3116μs | 14.8563 KOps/s | 14.7726 KOps/s | |
| test_compile_indexing[tensor-pytree-compile] | 0.2683ms | 0.1088ms | 9.1924 KOps/s | 9.4240 KOps/s | |
| test_compile_indexing[tensor-pytree-eager] | 0.2119ms | 67.4674μs | 14.8220 KOps/s | 14.2772 KOps/s | |
| test_compile_indexing[slice-tensordict-compile] | 0.1666ms | 98.9796μs | 10.1031 KOps/s | 10.1640 KOps/s | |
| test_compile_indexing[slice-tensordict-eager] | 0.2551ms | 19.3421μs | 51.7006 KOps/s | 52.1869 KOps/s | |
| test_compile_indexing[slice-tensorclass-compile] | 0.1476ms | 94.9503μs | 10.5318 KOps/s | 10.4056 KOps/s | |
| test_compile_indexing[slice-tensorclass-eager] | 52.1410μs | 15.4739μs | 64.6248 KOps/s | 64.2434 KOps/s | |
| test_compile_indexing[slice-pytree-compile] | 0.2482ms | 96.1522μs | 10.4002 KOps/s | 10.4912 KOps/s | |
| test_compile_indexing[slice-pytree-eager] | 45.1800μs | 15.2668μs | 65.5018 KOps/s | 64.1249 KOps/s | |
| test_compile_indexing[int-tensordict-compile] | 0.2506ms | 99.2241μs | 10.0782 KOps/s | 10.1359 KOps/s | |
| test_compile_indexing[int-tensordict-eager] | 0.7854ms | 18.7032μs | 53.4669 KOps/s | 52.4954 KOps/s | |
| test_compile_indexing[int-tensorclass-compile] | 0.2447ms | 95.6347μs | 10.4565 KOps/s | 10.4861 KOps/s | |
| test_compile_indexing[int-tensorclass-eager] | 47.5210μs | 15.8352μs | 63.1504 KOps/s | 64.4681 KOps/s | |
| test_compile_indexing[int-pytree-compile] | 0.2547ms | 99.7361μs | 10.0265 KOps/s | 10.5100 KOps/s | |
| test_compile_indexing[int-pytree-eager] | 79.4110μs | 15.1334μs | 66.0789 KOps/s | 65.0284 KOps/s | |
| test_mod_add[eager] | 76.0610μs | 37.2148μs | 26.8711 KOps/s | 25.5133 KOps/s | |
| test_mod_add[compile] | 0.2927ms | 80.3199μs | 12.4502 KOps/s | 12.2438 KOps/s | |
| test_mod_add[compile-overhead] | 0.3330ms | 0.1681ms | 5.9488 KOps/s | 5.7555 KOps/s | |
| test_mod_wrap[eager] | 0.3325ms | 0.2465ms | 4.0567 KOps/s | 3.9560 KOps/s | |
| test_mod_wrap[compile] | 1.1198ms | 0.2928ms | 3.4156 KOps/s | 3.3430 KOps/s | |
| test_mod_wrap[compile-overhead] | 7.3365ms | 3.8564ms | 259.3078 Ops/s | 267.9198 Ops/s | |
| test_mod_wrap_and_backward[eager] | 1.6142ms | 1.3668ms | 731.6338 Ops/s | 680.5066 Ops/s | |
| test_mod_wrap_and_backward[compile] | 1.8173ms | 1.2620ms | 792.4098 Ops/s | 728.3212 Ops/s | |
| test_mod_wrap_and_backward[compile-overhead] | 1.3841ms | 0.9155ms | 1.0923 KOps/s | 959.5106 Ops/s | |
| test_seq_add[eager] | 0.3104ms | 0.1267ms | 7.8944 KOps/s | 7.9219 KOps/s | |
| test_seq_add[compile] | 0.2388ms | 88.6392μs | 11.2817 KOps/s | 11.1878 KOps/s | |
| test_seq_add[compile-overhead] | 0.2882ms | 0.1281ms | 7.8057 KOps/s | 7.7561 KOps/s | |
| test_seq_wrap[eager] | 1.0253ms | 0.4320ms | 2.3147 KOps/s | 2.2940 KOps/s | |
| test_seq_wrap[compile] | 1.1018ms | 0.3016ms | 3.3154 KOps/s | 3.2948 KOps/s | |
| test_seq_wrap[compile-overhead] | 0.3568ms | 0.2231ms | 4.4814 KOps/s | 4.4256 KOps/s | |
| test_func_call_runtime[False-eager] | 0.8118ms | 0.7373ms | 1.3563 KOps/s | 1.3336 KOps/s | |
| test_func_call_runtime[False-compile] | 0.7890ms | 0.7369ms | 1.3571 KOps/s | 1.3568 KOps/s | |
| test_func_call_runtime[False-compile-overhead] | 0.4021ms | 0.3565ms | 2.8051 KOps/s | 2.7741 KOps/s | |
| test_func_call_runtime[True-eager] | 1.0549ms | 0.8975ms | 1.1141 KOps/s | 1.0427 KOps/s | |
| test_func_call_runtime[True-compile] | 1.0401ms | 0.7715ms | 1.2962 KOps/s | 1.3140 KOps/s | |
| test_func_call_runtime[True-compile-overhead] | 0.4345ms | 0.3807ms | 2.6267 KOps/s | 2.6266 KOps/s | |
| test_func_call_cm_runtime[False-eager] | 0.8740ms | 0.7318ms | 1.3664 KOps/s | 1.3434 KOps/s | |
| test_func_call_cm_runtime[False-compile] | 0.8281ms | 0.7425ms | 1.3468 KOps/s | 1.3463 KOps/s | |
| test_func_call_cm_runtime[False-compile-overhead] | 0.4955ms | 0.3591ms | 2.7846 KOps/s | 2.7760 KOps/s | |
| test_func_call_cm_runtime[True-eager] | 1.2710ms | 1.0019ms | 998.0660 Ops/s | 987.7072 Ops/s | |
| test_func_call_cm_runtime[True-compile] | 1.1260ms | 0.9855ms | 1.0147 KOps/s | 964.8854 Ops/s | |
| test_func_call_cm_runtime[True-compile-overhead] | 1.1420ms | 0.9921ms | 1.0080 KOps/s | 991.0679 Ops/s | |
| test_vmap_func_call_cm_runtime[eager] | 2.5179ms | 2.0923ms | 477.9463 Ops/s | 474.6373 Ops/s | |
| test_vmap_func_call_cm_runtime[compile] | 0.9030ms | 0.8049ms | 1.2424 KOps/s | 1.2379 KOps/s | |
| test_vmap_func_call_cm_runtime[compile-overhead] | 0.5694ms | 0.4084ms | 2.4487 KOps/s | 2.3991 KOps/s | |
| test_distributed | 2.7141ms | 0.3569ms | 2.8018 KOps/s | 8.7425 KOps/s | |
| test_tdmodule | 38.5410μs | 19.9003μs | 50.2506 KOps/s | 49.8852 KOps/s | |
| test_tdmodule_dispatch | 58.2600μs | 37.7036μs | 26.5227 KOps/s | 26.4505 KOps/s | |
| test_tdseq | 42.4100μs | 20.0159μs | 49.9603 KOps/s | 50.0190 KOps/s | |
| test_tdseq_dispatch | 59.7610μs | 39.6591μs | 25.2149 KOps/s | 25.0528 KOps/s | |
| test_instantiation_functorch | 1.6266ms | 1.5360ms | 651.0292 Ops/s | 635.9737 Ops/s | |
| test_exec_functorch | 0.2767ms | 0.1428ms | 7.0030 KOps/s | 6.8344 KOps/s | |
| test_exec_functional_call | 0.1861ms | 0.1382ms | 7.2339 KOps/s | 7.2609 KOps/s | |
| test_exec_td_decorator | 0.3790ms | 0.1875ms | 5.3327 KOps/s | 5.2657 KOps/s | |
| test_vmap_mlp_speed_decorator[True-True] | 0.8969ms | 0.6922ms | 1.4446 KOps/s | 1.4318 KOps/s | |
| test_vmap_mlp_speed_decorator[True-False] | 0.8923ms | 0.6903ms | 1.4487 KOps/s | 1.4340 KOps/s | |
| test_vmap_mlp_speed_decorator[False-True] | 0.7449ms | 0.5989ms | 1.6697 KOps/s | 1.5746 KOps/s | |
| test_vmap_mlp_speed_decorator[False-False] | 0.7646ms | 0.6138ms | 1.6291 KOps/s | 1.6694 KOps/s | |
| test_vmap_transformer_speed_decorator[True-True] | 20.0109ms | 19.3911ms | 51.5700 Ops/s | 52.1031 Ops/s | |
| test_vmap_transformer_speed_decorator[True-False] | 19.8340ms | 19.3468ms | 51.6883 Ops/s | 52.3257 Ops/s | |
| test_vmap_transformer_speed_decorator[False-True] | 20.0566ms | 19.2090ms | 52.0591 Ops/s | 52.6514 Ops/s | |
| test_vmap_transformer_speed_decorator[False-False] | 20.2184ms | 19.2792ms | 51.8694 Ops/s | 52.0193 Ops/s | |
| test_to_module_speed[True] | 1.3692ms | 0.9574ms | 1.0445 KOps/s | 1.0293 KOps/s | |
| test_to_module_speed[False] | 1.3888ms | 0.9449ms | 1.0583 KOps/s | 1.0568 KOps/s | |
| test_tc_init | 0.1570ms | 34.6194μs | 28.8855 KOps/s | 29.0144 KOps/s | |
| test_tc_init_tensor_only | 0.1053ms | 10.4634μs | 95.5716 KOps/s | 93.0081 KOps/s | |
| test_tc_init_nested | 0.1817ms | 69.5764μs | 14.3727 KOps/s | 14.4986 KOps/s | |
| test_tc_first_layer_tensor | 25.2900μs | 0.8980μs | 1.1136 MOps/s | 1.1131 MOps/s | |
| test_tc_first_layer_tensor_only | 1.9010μs | 0.4194μs | 2.3845 MOps/s | 2.3553 MOps/s | |
| test_tc_first_layer_tensor_set | 34.0800μs | 2.8642μs | 349.1335 KOps/s | 336.9807 KOps/s | |
| test_tc_first_layer_tensor_only_set | 16.4655μs | 1.8194μs | 549.6212 KOps/s | 550.5454 KOps/s | |
| test_tc_first_layer_nontensor | 32.1410μs | 2.3346μs | 428.3348 KOps/s | 431.5547 KOps/s | |
| test_tc_second_layer_tensor | 31.6000μs | 1.7358μs | 576.0978 KOps/s | 576.2821 KOps/s | |
| test_tc_second_layer_nontensor | 30.0600μs | 3.1584μs | 316.6112 KOps/s | 319.1979 KOps/s | |
| test_unbind | 0.2393s | 10.8003ms | 92.5897 Ops/s | 145.2140 Ops/s | |
| test_full_like | 11.9596ms | 8.8051ms | 113.5706 Ops/s | 221.6308 Ops/s | |
| test_zeros_like | 9.6537ms | 7.3313ms | 136.4023 Ops/s | 138.2435 Ops/s | |
| test_ones_like | 9.5005ms | 6.4866ms | 154.1647 Ops/s | 227.9121 Ops/s | |
| test_clone | 12.1580ms | 9.5025ms | 105.2350 Ops/s | 149.4636 Ops/s | |
| test_squeeze | 79.7410μs | 9.8312μs | 101.7166 KOps/s | 102.8951 KOps/s | |
| test_unsqueeze | 0.1679ms | 73.3897μs | 13.6259 KOps/s | 13.7453 KOps/s | |
| test_split | 0.4227ms | 0.1582ms | 6.3211 KOps/s | 6.0272 KOps/s | |
| test_permute | 0.2262ms | 0.1786ms | 5.6003 KOps/s | 5.5529 KOps/s | |
| test_stack | 53.6563ms | 53.3219ms | 18.7540 Ops/s | 36.7587 Ops/s | |
| test_cat | 53.4677ms | 53.0698ms | 18.8431 Ops/s | 23.3215 Ops/s |
This was referenced Apr 17, 2025
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Stack from ghstack (oldest at bottom):
_get_item#1288