From ed7add71be532549b2c525e2d97aa268bbc69671 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 14 Dec 2025 23:13:54 +0900 Subject: [PATCH 1/5] gh-134584: Eliminate redundant refcounting from _STORE_SUBSCR_DICT --- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 ++++---- Lib/test/test_capi/test_opt.py | 19 +++++++++++++++++++ Python/bytecodes.c | 17 +++++++++++++---- Python/executor_cases.c.h | 15 +++++++-------- Python/generated_cases.c.h | 21 +++++++++++++++++---- Python/opcode_targets.h | 1 + Python/optimizer_bytecodes.c | 5 +++++ Python/optimizer_cases.c.h | 12 ++++++++++-- 10 files changed, 79 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0a29fabe7676dc..23028871c039f6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1283,7 +1283,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [STORE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1493,7 +1493,7 @@ _PyOpcode_macro_expansion[256] = { [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, OPARG_SIMPLE, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, OPARG_SIMPLE, 0 } } }, - [STORE_SUBSCR_DICT] = { .nuops = 2, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 } } }, + [STORE_SUBSCR_DICT] = { .nuops = 3, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [STORE_SUBSCR_LIST_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 }, { _POP_TOP_INT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [SWAP] = { .nuops = 1, .uops = { { _SWAP, OPARG_SIMPLE, 0 } } }, [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, OPARG_SIMPLE, 2 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 64e51bd2b8bb58..d1cc7538ab6d02 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1067,7 +1067,7 @@ extern "C" { #define _STORE_NAME_r10 1260 #define _STORE_SLICE_r30 1261 #define _STORE_SUBSCR_r30 1262 -#define _STORE_SUBSCR_DICT_r30 1263 +#define _STORE_SUBSCR_DICT_r31 1263 #define _STORE_SUBSCR_LIST_INT_r32 1264 #define _SWAP_r11 1265 #define _SWAP_2_r02 1266 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5fa375a8ce6b4a..20507c08644ced 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -135,7 +135,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_STORE_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -1274,7 +1274,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 0, 0, _STORE_SUBSCR_DICT_r30 }, + { 1, 0, _STORE_SUBSCR_DICT_r31 }, }, }, [_DELETE_SUBSCR] = { @@ -3499,7 +3499,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_SET_ADD_r10] = _SET_ADD, [_STORE_SUBSCR_r30] = _STORE_SUBSCR, [_STORE_SUBSCR_LIST_INT_r32] = _STORE_SUBSCR_LIST_INT, - [_STORE_SUBSCR_DICT_r30] = _STORE_SUBSCR_DICT, + [_STORE_SUBSCR_DICT_r31] = _STORE_SUBSCR_DICT, [_DELETE_SUBSCR_r20] = _DELETE_SUBSCR, [_CALL_INTRINSIC_1_r11] = _CALL_INTRINSIC_1, [_CALL_INTRINSIC_2_r21] = _CALL_INTRINSIC_2, @@ -4867,7 +4867,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_STORE_SUBSCR] = "_STORE_SUBSCR", [_STORE_SUBSCR_r30] = "_STORE_SUBSCR_r30", [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", - [_STORE_SUBSCR_DICT_r30] = "_STORE_SUBSCR_DICT_r30", + [_STORE_SUBSCR_DICT_r31] = "_STORE_SUBSCR_DICT_r31", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_STORE_SUBSCR_LIST_INT_r32] = "_STORE_SUBSCR_LIST_INT_r32", [_SWAP] = "_SWAP", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0f6ed3d85f0330..59f4e85534af38 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2513,10 +2513,29 @@ def testfunc(n): self.assertEqual(res, 10) self.assertIsNotNone(ex) uops = get_opnames(ex) + self.assertIn("_STORE_SUBSCR_LIST_INT", uops) self.assertNotIn("_POP_TOP", uops) self.assertNotIn("_POP_TOP_INT", uops) self.assertIn("_POP_TOP_NOP", uops) + def test_store_susbscr_dict(self): + def testfunc(n): + d = {} + for _ in range(n): + d['a'] = 1 + d['b'] = 2 + d['c'] = 3 + d['d'] = 4 + return sum(d.values()) + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 10) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_STORE_SUBSCR_DICT", uops) + self.assertNotIn("_POP_TOP", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just # testing it doesn't crash. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index daa3d218e387f9..318fb1440f99ae 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1158,9 +1158,9 @@ dummy_func( } macro(STORE_SUBSCR_DICT) = - _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT; + _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP; - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- )) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { PyObject *dict = PyStackRef_AsPyObjectBorrow(dict_st); assert(PyDict_CheckExact(dict)); @@ -1168,8 +1168,11 @@ dummy_func( int err = _PyDict_SetItem_Take2((PyDictObject *)dict, PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); - PyStackRef_CLOSE(dict_st); - ERROR_IF(err); + if (err) { + ERROR_NO_POP(); + } + INPUTS_DEAD(); + st = dict_st; } inst(DELETE_SUBSCR, (container, sub --)) { @@ -5458,6 +5461,12 @@ dummy_func( } } + label(pop_3_error) { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } + label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2a1156091e3d37..8cb9a0456007b1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5765,12 +5765,13 @@ break; } - case _STORE_SUBSCR_DICT_r30: { + case _STORE_SUBSCR_DICT_r31: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef sub; _PyStackRef dict_st; _PyStackRef value; + _PyStackRef st; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; @@ -5790,19 +5791,17 @@ PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(dict_st); - stack_pointer = _PyFrame_GetStackPointer(frame); if (err) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } - _tos_cache0 = PyStackRef_ZERO_BITS; + st = dict_st; + _tos_cache0 = st; _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(0); + SET_CURRENT_CACHED_VALUES(1); + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ab9373e0af5afc..51207aaffe1e3d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10973,6 +10973,7 @@ _PyStackRef value; _PyStackRef dict_st; _PyStackRef sub; + _PyStackRef st; // _GUARD_NOS_DICT { nos = stack_pointer[-2]; @@ -10997,14 +10998,19 @@ PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); + if (err) { + JUMP_TO_LABEL(error); + } + st = dict_st; + } + // _POP_TOP + { + value = st; stack_pointer += -3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(dict_st); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); - if (err) { - JUMP_TO_LABEL(error); - } } DISPATCH(); } @@ -11899,6 +11905,13 @@ JUMP_TO_LABEL(error); #endif /* _Py_TAIL_CALL_INTERP */ /* BEGIN LABELS */ + LABEL(pop_3_error) + { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_LABEL(error); + } + LABEL(pop_2_error) { stack_pointer -= 2; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index b2fa7d01e8f6c2..1a8a4392187eca 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -522,6 +522,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256]; static py_tail_call_funcptr instruction_funcptr_tracing_table[256]; +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_3_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_error(TAIL_CALL_PARAMS); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 66aecf7ef54355..d10cebac8d152b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -109,6 +109,11 @@ dummy_func(void) { ss = sub_st; } + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { + (void)value; + st = dict_st; + } + op(_PUSH_NULL, (-- res)) { res = sym_new_null(ctx); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1a3d4ad50bd824..70330417c6d99b 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1148,8 +1148,16 @@ } case _STORE_SUBSCR_DICT: { - CHECK_STACK_BOUNDS(-3); - stack_pointer += -3; + JitOptRef dict_st; + JitOptRef value; + JitOptRef st; + dict_st = stack_pointer[-2]; + value = stack_pointer[-3]; + (void)value; + st = dict_st; + CHECK_STACK_BOUNDS(-2); + stack_pointer[-3] = st; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From 698b58ecaa1e2d61623bd2fa41c28ba87cf25e74 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 14 Dec 2025 23:39:38 +0900 Subject: [PATCH 2/5] nit --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 +++--- Python/bytecodes.c | 5 +++-- Python/executor_cases.c.h | 8 +++++--- Python/generated_cases.c.h | 14 +++++++++++++- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 9 +++++++-- 8 files changed, 35 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 23028871c039f6..4643fbf4438bbc 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1493,7 +1493,7 @@ _PyOpcode_macro_expansion[256] = { [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, OPARG_SIMPLE, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, OPARG_SIMPLE, 0 } } }, - [STORE_SUBSCR_DICT] = { .nuops = 3, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, + [STORE_SUBSCR_DICT] = { .nuops = 4, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [STORE_SUBSCR_LIST_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 }, { _POP_TOP_INT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [SWAP] = { .nuops = 1, .uops = { { _SWAP, OPARG_SIMPLE, 0 } } }, [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, OPARG_SIMPLE, 2 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d1cc7538ab6d02..22bd8c70c05db9 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1067,7 +1067,7 @@ extern "C" { #define _STORE_NAME_r10 1260 #define _STORE_SLICE_r30 1261 #define _STORE_SUBSCR_r30 1262 -#define _STORE_SUBSCR_DICT_r31 1263 +#define _STORE_SUBSCR_DICT_r32 1263 #define _STORE_SUBSCR_LIST_INT_r32 1264 #define _SWAP_r11 1265 #define _SWAP_2_r02 1266 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 20507c08644ced..2ab98978df41a3 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -1274,7 +1274,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 0, _STORE_SUBSCR_DICT_r31 }, + { 2, 0, _STORE_SUBSCR_DICT_r32 }, }, }, [_DELETE_SUBSCR] = { @@ -3499,7 +3499,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_SET_ADD_r10] = _SET_ADD, [_STORE_SUBSCR_r30] = _STORE_SUBSCR, [_STORE_SUBSCR_LIST_INT_r32] = _STORE_SUBSCR_LIST_INT, - [_STORE_SUBSCR_DICT_r31] = _STORE_SUBSCR_DICT, + [_STORE_SUBSCR_DICT_r32] = _STORE_SUBSCR_DICT, [_DELETE_SUBSCR_r20] = _DELETE_SUBSCR, [_CALL_INTRINSIC_1_r11] = _CALL_INTRINSIC_1, [_CALL_INTRINSIC_2_r21] = _CALL_INTRINSIC_2, @@ -4867,7 +4867,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_STORE_SUBSCR] = "_STORE_SUBSCR", [_STORE_SUBSCR_r30] = "_STORE_SUBSCR_r30", [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", - [_STORE_SUBSCR_DICT_r31] = "_STORE_SUBSCR_DICT_r31", + [_STORE_SUBSCR_DICT_r32] = "_STORE_SUBSCR_DICT_r32", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_STORE_SUBSCR_LIST_INT_r32] = "_STORE_SUBSCR_LIST_INT_r32", [_SWAP] = "_SWAP", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 318fb1440f99ae..16fb27f54e1352 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1158,9 +1158,9 @@ dummy_func( } macro(STORE_SUBSCR_DICT) = - _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP; + _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP + POP_TOP; - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st, sb)) { PyObject *dict = PyStackRef_AsPyObjectBorrow(dict_st); assert(PyDict_CheckExact(dict)); @@ -1173,6 +1173,7 @@ dummy_func( } INPUTS_DEAD(); st = dict_st; + sb = sub; } inst(DELETE_SUBSCR, (container, sub --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8cb9a0456007b1..5d928ae2a2a6cb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5765,13 +5765,14 @@ break; } - case _STORE_SUBSCR_DICT_r31: { + case _STORE_SUBSCR_DICT_r32: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef sub; _PyStackRef dict_st; _PyStackRef value; _PyStackRef st; + _PyStackRef sb; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; @@ -5796,10 +5797,11 @@ JUMP_TO_ERROR(); } st = dict_st; + sb = sub; + _tos_cache1 = sb; _tos_cache0 = st; - _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(2); stack_pointer += -3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 51207aaffe1e3d..3d4362ebebbbf9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10974,6 +10974,7 @@ _PyStackRef dict_st; _PyStackRef sub; _PyStackRef st; + _PyStackRef sb; // _GUARD_NOS_DICT { nos = stack_pointer[-2]; @@ -11002,11 +11003,22 @@ JUMP_TO_LABEL(error); } st = dict_st; + sb = sub; + } + // _POP_TOP + { + value = sb; + stack_pointer[-3] = st; + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } // _POP_TOP { value = st; - stack_pointer += -3; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_XCLOSE(value); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index d10cebac8d152b..32500bb789aec8 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -109,9 +109,10 @@ dummy_func(void) { ss = sub_st; } - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st, sb)) { (void)value; st = dict_st; + sb = sub; } op(_PUSH_NULL, (-- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 70330417c6d99b..0fdd662e706901 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1148,16 +1148,21 @@ } case _STORE_SUBSCR_DICT: { + JitOptRef sub; JitOptRef dict_st; JitOptRef value; JitOptRef st; + JitOptRef sb; + sub = stack_pointer[-1]; dict_st = stack_pointer[-2]; value = stack_pointer[-3]; (void)value; st = dict_st; - CHECK_STACK_BOUNDS(-2); + sb = sub; + CHECK_STACK_BOUNDS(-1); stack_pointer[-3] = st; - stack_pointer += -2; + stack_pointer[-2] = sb; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From 803585446041b50bf974bf600529ae7e2ef67598 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Dec 2025 00:00:41 +0900 Subject: [PATCH 3/5] Address code review --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 +++--- Python/bytecodes.c | 7 +++---- Python/executor_cases.c.h | 8 +++----- Python/generated_cases.c.h | 14 +------------- Python/optimizer_bytecodes.c | 3 +-- Python/optimizer_cases.c.h | 9 ++------- 8 files changed, 15 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4643fbf4438bbc..23028871c039f6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1493,7 +1493,7 @@ _PyOpcode_macro_expansion[256] = { [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, OPARG_SIMPLE, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, OPARG_SIMPLE, 0 } } }, - [STORE_SUBSCR_DICT] = { .nuops = 4, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, + [STORE_SUBSCR_DICT] = { .nuops = 3, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [STORE_SUBSCR_LIST_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 }, { _POP_TOP_INT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [SWAP] = { .nuops = 1, .uops = { { _SWAP, OPARG_SIMPLE, 0 } } }, [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, OPARG_SIMPLE, 2 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 22bd8c70c05db9..d1cc7538ab6d02 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1067,7 +1067,7 @@ extern "C" { #define _STORE_NAME_r10 1260 #define _STORE_SLICE_r30 1261 #define _STORE_SUBSCR_r30 1262 -#define _STORE_SUBSCR_DICT_r32 1263 +#define _STORE_SUBSCR_DICT_r31 1263 #define _STORE_SUBSCR_LIST_INT_r32 1264 #define _SWAP_r11 1265 #define _SWAP_2_r02 1266 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2ab98978df41a3..20507c08644ced 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -1274,7 +1274,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 2, 0, _STORE_SUBSCR_DICT_r32 }, + { 1, 0, _STORE_SUBSCR_DICT_r31 }, }, }, [_DELETE_SUBSCR] = { @@ -3499,7 +3499,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_SET_ADD_r10] = _SET_ADD, [_STORE_SUBSCR_r30] = _STORE_SUBSCR, [_STORE_SUBSCR_LIST_INT_r32] = _STORE_SUBSCR_LIST_INT, - [_STORE_SUBSCR_DICT_r32] = _STORE_SUBSCR_DICT, + [_STORE_SUBSCR_DICT_r31] = _STORE_SUBSCR_DICT, [_DELETE_SUBSCR_r20] = _DELETE_SUBSCR, [_CALL_INTRINSIC_1_r11] = _CALL_INTRINSIC_1, [_CALL_INTRINSIC_2_r21] = _CALL_INTRINSIC_2, @@ -4867,7 +4867,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_STORE_SUBSCR] = "_STORE_SUBSCR", [_STORE_SUBSCR_r30] = "_STORE_SUBSCR_r30", [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", - [_STORE_SUBSCR_DICT_r32] = "_STORE_SUBSCR_DICT_r32", + [_STORE_SUBSCR_DICT_r31] = "_STORE_SUBSCR_DICT_r31", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_STORE_SUBSCR_LIST_INT_r32] = "_STORE_SUBSCR_LIST_INT_r32", [_SWAP] = "_SWAP", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 16fb27f54e1352..3a2c2c0f05f5de 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1158,9 +1158,9 @@ dummy_func( } macro(STORE_SUBSCR_DICT) = - _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP + POP_TOP; + _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP; - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st, sb)) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { PyObject *dict = PyStackRef_AsPyObjectBorrow(dict_st); assert(PyDict_CheckExact(dict)); @@ -1171,9 +1171,8 @@ dummy_func( if (err) { ERROR_NO_POP(); } - INPUTS_DEAD(); + DEAD(dict_st); st = dict_st; - sb = sub; } inst(DELETE_SUBSCR, (container, sub --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5d928ae2a2a6cb..8cb9a0456007b1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5765,14 +5765,13 @@ break; } - case _STORE_SUBSCR_DICT_r32: { + case _STORE_SUBSCR_DICT_r31: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef sub; _PyStackRef dict_st; _PyStackRef value; _PyStackRef st; - _PyStackRef sb; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; @@ -5797,11 +5796,10 @@ JUMP_TO_ERROR(); } st = dict_st; - sb = sub; - _tos_cache1 = sb; _tos_cache0 = st; + _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(2); + SET_CURRENT_CACHED_VALUES(1); stack_pointer += -3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3d4362ebebbbf9..51207aaffe1e3d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10974,7 +10974,6 @@ _PyStackRef dict_st; _PyStackRef sub; _PyStackRef st; - _PyStackRef sb; // _GUARD_NOS_DICT { nos = stack_pointer[-2]; @@ -11003,22 +11002,11 @@ JUMP_TO_LABEL(error); } st = dict_st; - sb = sub; - } - // _POP_TOP - { - value = sb; - stack_pointer[-3] = st; - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(value); - stack_pointer = _PyFrame_GetStackPointer(frame); } // _POP_TOP { value = st; - stack_pointer += -1; + stack_pointer += -3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_XCLOSE(value); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 32500bb789aec8..d10cebac8d152b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -109,10 +109,9 @@ dummy_func(void) { ss = sub_st; } - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st, sb)) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { (void)value; st = dict_st; - sb = sub; } op(_PUSH_NULL, (-- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0fdd662e706901..70330417c6d99b 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1148,21 +1148,16 @@ } case _STORE_SUBSCR_DICT: { - JitOptRef sub; JitOptRef dict_st; JitOptRef value; JitOptRef st; - JitOptRef sb; - sub = stack_pointer[-1]; dict_st = stack_pointer[-2]; value = stack_pointer[-3]; (void)value; st = dict_st; - sb = sub; - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(-2); stack_pointer[-3] = st; - stack_pointer[-2] = sb; - stack_pointer += -1; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From 7840b37ddde12022a84a41d5329c3a81c8762a73 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Dec 2025 14:12:54 +0900 Subject: [PATCH 4/5] Address code review --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 5 +++++ Python/generated_cases.c.h | 5 +++++ 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 23028871c039f6..72c3a1e1d9d4cc 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1283,7 +1283,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [STORE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 20507c08644ced..b46786aa4ae1b2 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -135,7 +135,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_STORE_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_STORE_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3a2c2c0f05f5de..566616268ac83d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1169,7 +1169,8 @@ dummy_func( PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); if (err) { - ERROR_NO_POP(); + PyStackRef_CLOSE(dict_st); + ERROR_IF(1); } DEAD(dict_st); st = dict_st; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8cb9a0456007b1..3fff5a67d4db9a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5792,6 +5792,11 @@ PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); if (err) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(dict_st); + stack_pointer = _PyFrame_GetStackPointer(frame); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 51207aaffe1e3d..b9c57e3eb65983 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10999,6 +10999,11 @@ PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); if (err) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(dict_st); + stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } st = dict_st; From 835f6ee387d992039a760d6458599ad5514f9a40 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Mon, 15 Dec 2025 16:09:17 +0000 Subject: [PATCH 5/5] remove unused label --- Python/bytecodes.c | 6 ------ Python/generated_cases.c.h | 7 ------- Python/opcode_targets.h | 1 - 3 files changed, 14 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a5631c0910a6e9..3e6147961f1822 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5467,12 +5467,6 @@ dummy_func( } } - label(pop_3_error) { - stack_pointer -= 3; - assert(WITHIN_STACK_BOUNDS()); - goto error; - } - label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3d725d0714f670..9c828cba877ad4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11924,13 +11924,6 @@ JUMP_TO_LABEL(error); #endif /* _Py_TAIL_CALL_INTERP */ /* BEGIN LABELS */ - LABEL(pop_3_error) - { - stack_pointer -= 3; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_LABEL(error); - } - LABEL(pop_2_error) { stack_pointer -= 2; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 1a8a4392187eca..b2fa7d01e8f6c2 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -522,7 +522,6 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256]; static py_tail_call_funcptr instruction_funcptr_tracing_table[256]; -Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_3_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_error(TAIL_CALL_PARAMS);