From 02ca84db483221fe3dd61248dab7ad0913933c1e Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 17 Jan 2025 20:45:38 +0100 Subject: [PATCH 1/6] specialize concatenation of lists and tuples --- Lib/test/test_opcache.py | 15 +++++++++++++++ Objects/listobject.c | 2 +- Objects/tupleobject.c | 2 +- Python/specialize.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 4ca108cd6ca43e..60876080577452 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1423,6 +1423,21 @@ def binary_op_add_extend(): self.assert_specialized(binary_op_add_extend, "BINARY_OP_EXTEND") self.assert_no_opcode(binary_op_add_extend, "BINARY_OP") + def binary_op_add_extend_sequences(): + l1 = [1, 2] + l2 = [None] + t1 = (1, 2) + t2 = (None,) + for _ in range(100): + list_sum = l1 + l2 + self.assertEqual(list_sum, [1, 2, None]) + tuple_sum = t1 + t2 + self.assertEqual(tuple_sum, (1, 2, None)) + + binary_op_add_extend_sequences() + self.assert_specialized(binary_op_add_extend_sequences, "BINARY_OP_EXTEND") + self.assert_no_opcode(binary_op_add_extend_sequences, "BINARY_OP") + def binary_op_zero_division(): def compactlong_lhs(arg): 42 / arg diff --git a/Objects/listobject.c b/Objects/listobject.c index 5c9fd55bab1b22..bfa7befd386ebc 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -798,7 +798,7 @@ list_concat_lock_held(PyListObject *a, PyListObject *b) return (PyObject *)np; } -static PyObject * +PyObject * list_concat(PyObject *aa, PyObject *bb) { if (!PyList_Check(bb)) { diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index ee6320e6ca3cfe..af359d7c8d32d9 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -547,7 +547,7 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j) return tuple_slice((PyTupleObject *)op, i, j); } -static PyObject * +PyObject * tuple_concat(PyObject *aa, PyObject *bb) { PyTupleObject *a = _PyTuple_CAST(aa); diff --git a/Python/specialize.c b/Python/specialize.c index 09ec25767a4c3f..71102ae999074a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2104,6 +2104,38 @@ is_compactlong(PyObject *v) _PyLong_IsCompact((PyLongObject *)v); } +/* list-list */ + +static int +list_list_guard(PyObject *lhs, PyObject *rhs) +{ + return PyList_CheckExact(lhs) && PyList_CheckExact(rhs); +} + +extern PyObject *list_concat(PyObject *aa, PyObject *bb); + +static PyObject * +list_list_add(PyObject *lhs, PyObject *rhs) +{ + return list_concat(lhs, rhs); +} + +/* tuple-tuple */ + +static int +tuple_tuple_guard(PyObject *lhs, PyObject *rhs) +{ + return PyTuple_CheckExact(lhs) && PyTuple_CheckExact(rhs); +} + +extern PyObject *tuple_concat(PyObject *aa, PyObject *bb); + +static PyObject * +tuple_tuple_add(PyObject *lhs, PyObject *rhs) +{ + return tuple_concat(lhs, rhs); +} + static int compactlongs_guard(PyObject *lhs, PyObject *rhs) { @@ -2213,6 +2245,10 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract}, {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div}, {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply}, + + /* list-list and tuple-tuple concatenation */ + {NB_ADD, list_list_guard, list_list_add}, + {NB_ADD, tuple_tuple_guard, tuple_tuple_add}, }; static int From 27f4c56936fce085677243281f3e1481beede59a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:48:34 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst new file mode 100644 index 00000000000000..c62497c213507a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst @@ -0,0 +1 @@ +Specialize ``BINARY_OP`` for concatenation of lists and tuples. From 51d1b111d2e76bd81a44c0fde471ce4215852b51 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 5 Apr 2026 21:57:21 +0200 Subject: [PATCH 3/6] refactor for type information --- Include/internal/pycore_code.h | 3 +++ Include/internal/pycore_list.h | 1 + Include/internal/pycore_tuple.h | 1 + Lib/test/test_capi/test_opt.py | 42 ++++++++++++++++++++++++++++++++ Objects/listobject.c | 4 +-- Objects/tupleobject.c | 4 +-- Python/optimizer_bytecodes.c | 9 +++++-- Python/optimizer_cases.c.h | 9 +++++-- Python/specialize.c | 43 +++++++++++++++------------------ 9 files changed, 85 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 376e68a4c8773c..09b11599f34223 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -496,6 +496,9 @@ typedef struct { int oparg; binaryopguardfunc guard; binaryopactionfunc action; + /* Static type of the result, or NULL if unknown. Used by the tier 2 + optimizer to propagate type information through _BINARY_OP_EXTEND. */ + PyTypeObject *result_type; } _PyBinaryOpSpecializationDescr; /* Comparison bit masks. */ diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 6b92dc5d111f3b..df0d00f752573b 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -15,6 +15,7 @@ extern "C" { PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); PyAPI_FUNC(PyObject) *_PyList_SliceSubscript(PyObject*, PyObject*); PyAPI_FUNC(PyObject *) _PyList_BinarySlice(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyList_Concat(PyObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); // _PyList_GetItemRef should be used only when the object is known as a list // because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 9409ec94976d3a..bf80f96396ea4a 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -28,6 +28,7 @@ PyAPI_FUNC(void) _PyStolenTuple_Free(PyObject *self); PyAPI_FUNC(PyObject *)_PyTuple_FromStackRefStealOnSuccess(const union _PyStackRef *, Py_ssize_t); PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); PyAPI_FUNC(PyObject *) _PyTuple_BinarySlice(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyTuple_Concat(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyTuple_FromPair(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyTuple_FromPairSteal(PyObject *, PyObject *); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 56f90194b480a1..24e7f3c85fd5c1 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3813,6 +3813,48 @@ def f(n): self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops) self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def test_binary_op_extend_list_concat_type_propagation(self): + # list + list is specialized via BINARY_OP_EXTEND. The tier 2 optimizer + # should learn that the result is a list and eliminate subsequent + # list-type guards. + def testfunc(n): + a = [1, 2] + b = [3, 4] + x = True + for _ in range(n): + c = a + b + if c[0]: + x = False + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, False) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_EXTEND", uops) + # The c[0] subscript emits _GUARD_NOS_LIST before _BINARY_OP_SUBSCR_LIST_INT; + # since _BINARY_OP_EXTEND now propagates PyList_Type, that guard is gone. + self.assertIn("_BINARY_OP_SUBSCR_LIST_INT", uops) + self.assertNotIn("_GUARD_NOS_LIST", uops) + + def test_binary_op_extend_tuple_concat_type_propagation(self): + # tuple + tuple is specialized via BINARY_OP_EXTEND. The tier 2 optimizer + # should learn the result is a tuple and eliminate subsequent tuple guards. + def testfunc(n): + t1 = (1, 2) + t2 = (3, 4) + for _ in range(n): + a, b, c, d = t1 + t2 + return a + b + c + d + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 10) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_EXTEND", uops) + self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops) + self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def test_unary_invert_long_type(self): def testfunc(n): for _ in range(n): diff --git a/Objects/listobject.c b/Objects/listobject.c index bfa7befd386ebc..97869b17cde7a8 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -799,7 +799,7 @@ list_concat_lock_held(PyListObject *a, PyListObject *b) } PyObject * -list_concat(PyObject *aa, PyObject *bb) +_PyList_Concat(PyObject *aa, PyObject *bb) { if (!PyList_Check(bb)) { PyErr_Format(PyExc_TypeError, @@ -3617,7 +3617,7 @@ static PyMethodDef list_methods[] = { static PySequenceMethods list_as_sequence = { list_length, /* sq_length */ - list_concat, /* sq_concat */ + _PyList_Concat, /* sq_concat */ list_repeat, /* sq_repeat */ list_item, /* sq_item */ 0, /* sq_slice */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index af359d7c8d32d9..07384acde32e52 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -548,7 +548,7 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j) } PyObject * -tuple_concat(PyObject *aa, PyObject *bb) +_PyTuple_Concat(PyObject *aa, PyObject *bb) { PyTupleObject *a = _PyTuple_CAST(aa); if (Py_SIZE(a) == 0 && PyTuple_CheckExact(bb)) { @@ -864,7 +864,7 @@ tuple_subtype_new(PyTypeObject *type, PyObject *iterable) static PySequenceMethods tuple_as_sequence = { tuple_length, /* sq_length */ - tuple_concat, /* sq_concat */ + _PyTuple_Concat, /* sq_concat */ tuple_repeat, /* sq_repeat */ tuple_item, /* sq_item */ 0, /* sq_slice */ diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b8148ef57ede0c..0ed9822ab2e4a7 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -410,8 +410,13 @@ dummy_func(void) { } op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) { - (void)descr; - res = sym_new_not_null(ctx); + _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; + if (d != NULL && d->result_type != NULL) { + res = sym_new_type(ctx, d->result_type); + } + else { + res = sym_new_not_null(ctx); + } l = left; r = right; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a15b5ae1d13d3b..7a8a1d20666877 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1168,8 +1168,13 @@ right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *descr = (PyObject *)this_instr->operand0; - (void)descr; - res = sym_new_not_null(ctx); + _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; + if (d != NULL && d->result_type != NULL) { + res = sym_new_type(ctx, d->result_type); + } + else { + res = sym_new_not_null(ctx); + } l = left; r = right; CHECK_STACK_BOUNDS(1); diff --git a/Python/specialize.c b/Python/specialize.c index 71102ae999074a..0953eb421554f5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -9,7 +9,8 @@ #include "pycore_function.h" // _PyFunction_GetVersionForCurrentState() #include "pycore_interpframe.h" // FRAME_SPECIALS_SIZE #include "pycore_lazyimportobject.h" // PyLazyImport_CheckExact -#include "pycore_list.h" // _PyListIterObject +#include "pycore_list.h" // _PyListIterObject, _PyList_Concat +#include "pycore_tuple.h" // _PyTuple_Concat #include "pycore_long.h" // _PyLong_IsNonNegativeCompact() #include "pycore_moduleobject.h" #include "pycore_object.h" @@ -2112,12 +2113,10 @@ list_list_guard(PyObject *lhs, PyObject *rhs) return PyList_CheckExact(lhs) && PyList_CheckExact(rhs); } -extern PyObject *list_concat(PyObject *aa, PyObject *bb); - static PyObject * list_list_add(PyObject *lhs, PyObject *rhs) { - return list_concat(lhs, rhs); + return _PyList_Concat(lhs, rhs); } /* tuple-tuple */ @@ -2128,12 +2127,10 @@ tuple_tuple_guard(PyObject *lhs, PyObject *rhs) return PyTuple_CheckExact(lhs) && PyTuple_CheckExact(rhs); } -extern PyObject *tuple_concat(PyObject *aa, PyObject *bb); - static PyObject * tuple_tuple_add(PyObject *lhs, PyObject *rhs) { - return tuple_concat(lhs, rhs); + return _PyTuple_Concat(lhs, rhs); } static int @@ -2227,28 +2224,28 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /) static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { /* long-long arithmetic */ - {NB_OR, compactlongs_guard, compactlongs_or}, - {NB_AND, compactlongs_guard, compactlongs_and}, - {NB_XOR, compactlongs_guard, compactlongs_xor}, - {NB_INPLACE_OR, compactlongs_guard, compactlongs_or}, - {NB_INPLACE_AND, compactlongs_guard, compactlongs_and}, - {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor}, + {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type}, + {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type}, + {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type}, + {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type}, + {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type}, + {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type}, /* float-long arithemetic */ - {NB_ADD, float_compactlong_guard, float_compactlong_add}, - {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract}, - {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div}, - {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply}, + {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type}, + {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type}, + {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type}, + {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type}, /* float-float arithmetic */ - {NB_ADD, compactlong_float_guard, compactlong_float_add}, - {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract}, - {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div}, - {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply}, + {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type}, + {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type}, + {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type}, + {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type}, /* list-list and tuple-tuple concatenation */ - {NB_ADD, list_list_guard, list_list_add}, - {NB_ADD, tuple_tuple_guard, tuple_tuple_add}, + {NB_ADD, list_list_guard, list_list_add, &PyList_Type}, + {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type}, }; static int From e8263f97577c25e11b5f593e157bf9daad057006 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 5 Apr 2026 22:04:23 +0200 Subject: [PATCH 4/6] add unique type propagation --- Include/internal/pycore_code.h | 4 ++ Lib/test/test_capi/test_opt.py | 23 ++++++++++ ...-01-17-19-48-28.gh-issue-100239.7pbTEA.rst | 4 +- Python/optimizer_bytecodes.c | 3 ++ Python/optimizer_cases.c.h | 3 ++ Python/specialize.c | 43 ++++++++++--------- 6 files changed, 59 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 09b11599f34223..fe8d0a54f2af1a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -499,6 +499,10 @@ typedef struct { /* Static type of the result, or NULL if unknown. Used by the tier 2 optimizer to propagate type information through _BINARY_OP_EXTEND. */ PyTypeObject *result_type; + /* Nonzero iff `action` always returns a freshly allocated object (not + aliased to either operand). Used by the tier 2 optimizer to enable + inplace follow-up ops. */ + int result_unique; } _PyBinaryOpSpecializationDescr; /* Comparison bit masks. */ diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 24e7f3c85fd5c1..e114385a9ebc46 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3813,6 +3813,29 @@ def f(n): self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops) self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def test_binary_op_extend_float_result_enables_inplace_multiply(self): + # (2 + x) * y with x, y floats: `2 + x` goes through _BINARY_OP_EXTEND + # (int + float). The result_type/result_unique info should let the + # subsequent float multiply use the inplace variant. + def testfunc(n): + x = 3.5 + y = 2.0 + res = 0.0 + for _ in range(n): + res = (2 + x) * y + return res + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 11.0) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_EXTEND", uops) + self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops) + self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + # NOS guard on the multiply is eliminated because _BINARY_OP_EXTEND + # propagates PyFloat_Type. + self.assertNotIn("_GUARD_NOS_FLOAT", uops) + def test_binary_op_extend_list_concat_type_propagation(self): # list + list is specialized via BINARY_OP_EXTEND. The tier 2 optimizer # should learn that the result is a list and eliminate subsequent diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst index c62497c213507a..594ef72ac57fae 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst @@ -1 +1,3 @@ -Specialize ``BINARY_OP`` for concatenation of lists and tuples. +Specialize ``BINARY_OP`` for concatenation of lists and tuples, and +propagate the result type through ``_BINARY_OP_EXTEND`` in the tier 2 +optimizer so that follow-up type guards can be eliminated. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0ed9822ab2e4a7..58b50707e55cee 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -413,6 +413,9 @@ dummy_func(void) { _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; if (d != NULL && d->result_type != NULL) { res = sym_new_type(ctx, d->result_type); + if (d->result_unique) { + res = PyJitRef_MakeUnique(res); + } } else { res = sym_new_not_null(ctx); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7a8a1d20666877..891887301119d7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1171,6 +1171,9 @@ _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; if (d != NULL && d->result_type != NULL) { res = sym_new_type(ctx, d->result_type); + if (d->result_unique) { + res = PyJitRef_MakeUnique(res); + } } else { res = sym_new_not_null(ctx); diff --git a/Python/specialize.c b/Python/specialize.c index 0953eb421554f5..4b5c10e9d72909 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2224,28 +2224,31 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /) static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { /* long-long arithmetic */ - {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type}, - {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type}, - {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type}, - {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type}, - {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type}, - {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type}, + {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1}, + {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1}, + {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1}, + {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1}, + {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1}, + {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1}, /* float-long arithemetic */ - {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type}, - {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type}, - {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type}, - {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type}, - - /* float-float arithmetic */ - {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type}, - {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type}, - {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type}, - {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type}, - - /* list-list and tuple-tuple concatenation */ - {NB_ADD, list_list_guard, list_list_add, &PyList_Type}, - {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type}, + {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1}, + {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1}, + {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1}, + {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1}, + + /* long-float arithmetic */ + {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1}, + {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1}, + {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1}, + {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1}, + + /* list-list concatenation: _PyList_Concat always allocates a new list */ + {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1}, + /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut + that can return one of the operands, so the result is not guaranteed + to be a freshly allocated object. */ + {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0}, }; static int From fe63c59490524e5e967fec380478765a946fd9f4 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 6 Apr 2026 23:13:49 +0200 Subject: [PATCH 5/6] special case for concatenation --- Include/internal/pycore_code.h | 5 + Lib/test/test_capi/test_opt.py | 24 +++++ Objects/bytesobject.c | 6 +- Objects/dictobject.c | 4 +- Objects/tupleobject.c | 2 +- Objects/unicodeobject.c | 2 +- Python/optimizer_bytecodes.c | 10 ++ Python/optimizer_cases.c.h | 12 +++ Python/specialize.c | 188 +++++++++++++++++++++++++++++---- 9 files changed, 225 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index fe8d0a54f2af1a..b73dbe123838a4 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -503,6 +503,11 @@ typedef struct { aliased to either operand). Used by the tier 2 optimizer to enable inplace follow-up ops. */ int result_unique; + /* Expected types of the left and right operands. Used by the tier 2 + optimizer to eliminate _GUARD_BINARY_OP_EXTEND when the operand + types are already known. NULL means unknown/don't eliminate. */ + PyTypeObject *lhs_type; + PyTypeObject *rhs_type; } _PyBinaryOpSpecializationDescr; /* Comparison bit masks. */ diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e114385a9ebc46..e4050d3db48cb7 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3878,6 +3878,30 @@ def testfunc(n): self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops) self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def test_binary_op_extend_guard_elimination(self): + # When both operands have known types (e.g., from a prior + # _BINARY_OP_EXTEND result), the _GUARD_BINARY_OP_EXTEND + # should be eliminated. + def testfunc(n): + a = [1, 2] + b = [3, 4] + total = 0 + for _ in range(n): + c = a + b # first: guard stays, result type = list + d = c + c # second: both operands are list -> guard eliminated + total += d[0] + return total + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Both list additions use _BINARY_OP_EXTEND + self.assertEqual(uops.count("_BINARY_OP_EXTEND"), 2) + # But the second guard is eliminated because both operands + # are known to be lists from the first _BINARY_OP_EXTEND. + self.assertEqual(uops.count("_GUARD_BINARY_OP_EXTEND"), 1) + def test_unary_invert_long_type(self): def testfunc(n): for _ in range(n): diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 902144e8ec9f83..8a38d2ba0aa463 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1536,8 +1536,8 @@ bytes_length(PyObject *self) return Py_SIZE(a); } -/* This is also used by PyBytes_Concat() */ -static PyObject * +/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */ +PyObject * bytes_concat(PyObject *a, PyObject *b) { Py_buffer va, vb; @@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b) return result; } -static PyObject * +PyObject * bytes_repeat(PyObject *self, Py_ssize_t n) { PyBytesObject *a = _PyBytes_CAST(self); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 67bc4319e0bae2..ae7179e8ba681a 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self) return PyLong_FromSsize_t(_PyDict_SizeOf(self)); } -static PyObject * +PyObject * dict_or(PyObject *self, PyObject *other) { if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) { @@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other) } -static PyObject * +PyObject * dict_ior(PyObject *self, PyObject *other) { if (dict_update_arg(self, other)) { diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 07384acde32e52..e917a7124aa7e5 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb) return (PyObject *)np; } -static PyObject * +PyObject * tuple_repeat(PyObject *self, Py_ssize_t n) { PyTupleObject *a = _PyTuple_CAST(self); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a0a26a75129929..c4cc19a416429c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars) } -static PyObject* +PyObject* unicode_repeat(PyObject *str, Py_ssize_t len) { PyObject *u; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 58b50707e55cee..0009b5104676ef 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -409,6 +409,16 @@ dummy_func(void) { r = right; } + op(_GUARD_BINARY_OP_EXTEND, (descr/4, left, right -- left, right)) { + _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; + if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) { + if (sym_matches_type(left, d->lhs_type) && + sym_matches_type(right, d->rhs_type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + } + } + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) { _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; if (d != NULL && d->result_type != NULL) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 891887301119d7..c052c63095ad74 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1156,6 +1156,18 @@ } case _GUARD_BINARY_OP_EXTEND: { + JitOptRef right; + JitOptRef left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *descr = (PyObject *)this_instr->operand0; + _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr; + if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) { + if (sym_matches_type(left, d->lhs_type) && + sym_matches_type(right, d->rhs_type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + } break; } diff --git a/Python/specialize.c b/Python/specialize.c index 4b5c10e9d72909..47f46f7918ef45 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2133,6 +2133,121 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs) return _PyTuple_Concat(lhs, rhs); } +/* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead + by calling sq_repeat directly with PyLong_AsSsize_t. */ + +extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n); +extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n); +extern PyObject *bytes_concat(PyObject *a, PyObject *b); +extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n); +extern PyObject *dict_or(PyObject *self, PyObject *other); +extern PyObject *dict_ior(PyObject *self, PyObject *other); + +static inline PyObject * +seq_int_multiply(PyObject *seq, PyObject *n, + ssizeargfunc repeat) +{ + Py_ssize_t count = PyLong_AsSsize_t(n); + if (count == -1 && PyErr_Occurred()) { + return NULL; + } + return repeat(seq, count); +} + +/* str-int and int-str */ + +static int +str_int_guard(PyObject *lhs, PyObject *rhs) +{ + return PyUnicode_CheckExact(lhs) && PyLong_CheckExact(rhs); +} + +static int +int_str_guard(PyObject *lhs, PyObject *rhs) +{ + return PyLong_CheckExact(lhs) && PyUnicode_CheckExact(rhs); +} + +static PyObject * +str_int_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(lhs, rhs, unicode_repeat); +} + +static PyObject * +int_str_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(rhs, lhs, unicode_repeat); +} + +/* bytes-bytes */ + +static int +bytes_bytes_guard(PyObject *lhs, PyObject *rhs) +{ + return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs); +} + +/* bytes-int and int-bytes */ + +static int +bytes_int_guard(PyObject *lhs, PyObject *rhs) +{ + return PyBytes_CheckExact(lhs) && PyLong_CheckExact(rhs); +} + +static int +int_bytes_guard(PyObject *lhs, PyObject *rhs) +{ + return PyLong_CheckExact(lhs) && PyBytes_CheckExact(rhs); +} + +static PyObject * +bytes_int_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(lhs, rhs, bytes_repeat); +} + +static PyObject * +int_bytes_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(rhs, lhs, bytes_repeat); +} + +/* tuple-int and int-tuple */ + +static int +tuple_int_guard(PyObject *lhs, PyObject *rhs) +{ + return PyTuple_CheckExact(lhs) && PyLong_CheckExact(rhs); +} + +static int +int_tuple_guard(PyObject *lhs, PyObject *rhs) +{ + return PyLong_CheckExact(lhs) && PyTuple_CheckExact(rhs); +} + +static PyObject * +tuple_int_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(lhs, rhs, tuple_repeat); +} + +static PyObject * +int_tuple_multiply(PyObject *lhs, PyObject *rhs) +{ + return seq_int_multiply(rhs, lhs, tuple_repeat); +} + +/* dict-dict */ + +static int +dict_dict_guard(PyObject *lhs, PyObject *rhs) +{ + return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs); +} + static int compactlongs_guard(PyObject *lhs, PyObject *rhs) { @@ -2223,32 +2338,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /) #undef LONG_FLOAT_ACTION static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { - /* long-long arithmetic */ - {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1}, - {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1}, - {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1}, - {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1}, - {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1}, - {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1}, - - /* float-long arithemetic */ - {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1}, - {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1}, - {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1}, - {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1}, - - /* long-float arithmetic */ - {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1}, - {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1}, - {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1}, - {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1}, + /* long-long arithmetic: guards also check _PyLong_IsCompact, so + type alone is not sufficient to eliminate the guard. */ + {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL}, + {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL}, + {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL}, + {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL}, + {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL}, + {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL}, + + /* float-long arithmetic: guards also check NaN and compactness. */ + {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1, NULL, NULL}, + {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1, NULL, NULL}, + {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1, NULL, NULL}, + {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1, NULL, NULL}, + + /* long-float arithmetic: guards also check NaN and compactness. */ + {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1, NULL, NULL}, + {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1, NULL, NULL}, + {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1, NULL, NULL}, + {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1, NULL, NULL}, /* list-list concatenation: _PyList_Concat always allocates a new list */ - {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1}, + {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1, &PyList_Type, &PyList_Type}, /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut that can return one of the operands, so the result is not guaranteed to be a freshly allocated object. */ - {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0}, + {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0, &PyTuple_Type, &PyTuple_Type}, + + /* str * int / int * str: call unicode_repeat directly. + unicode_repeat returns the original when n == 1. */ + {NB_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type}, + {NB_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type}, + {NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type}, + {NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type}, + + /* bytes + bytes: call bytes_concat directly. bytes_concat may return + an operand when one side is empty, so result is not always unique. */ + {NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, + {NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, + + /* bytes * int / int * bytes: call bytes_repeat directly. + bytes_repeat returns the original when n == 1. */ + {NB_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type}, + {NB_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type}, + {NB_INPLACE_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type}, + {NB_INPLACE_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type}, + + /* tuple * int / int * tuple: call tuple_repeat directly. + tuple_repeat returns the original when n == 1. */ + {NB_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type}, + {NB_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type}, + {NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type}, + {NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type}, + + /* dict | dict: call dict_or directly */ + {NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type}, + {NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type}, }; static int From f099585f65c1b2b41c911b463eb8c4ce5f2ad390 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 7 Apr 2026 00:07:26 +0200 Subject: [PATCH 6/6] fix --- Objects/bytesobject.c | 6 ++--- Objects/dictobject.c | 4 +-- Objects/tupleobject.c | 2 +- Objects/unicodeobject.c | 2 +- Python/specialize.c | 57 ++++++++++++++++++++++++++--------------- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8a38d2ba0aa463..902144e8ec9f83 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1536,8 +1536,8 @@ bytes_length(PyObject *self) return Py_SIZE(a); } -/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */ -PyObject * +/* This is also used by PyBytes_Concat() */ +static PyObject * bytes_concat(PyObject *a, PyObject *b) { Py_buffer va, vb; @@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b) return result; } -PyObject * +static PyObject * bytes_repeat(PyObject *self, Py_ssize_t n) { PyBytesObject *a = _PyBytes_CAST(self); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ae7179e8ba681a..67bc4319e0bae2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self) return PyLong_FromSsize_t(_PyDict_SizeOf(self)); } -PyObject * +static PyObject * dict_or(PyObject *self, PyObject *other) { if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) { @@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other) } -PyObject * +static PyObject * dict_ior(PyObject *self, PyObject *other) { if (dict_update_arg(self, other)) { diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index e917a7124aa7e5..07384acde32e52 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb) return (PyObject *)np; } -PyObject * +static PyObject * tuple_repeat(PyObject *self, Py_ssize_t n) { PyTupleObject *a = _PyTuple_CAST(self); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c4cc19a416429c..a0a26a75129929 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars) } -PyObject* +static PyObject* unicode_repeat(PyObject *str, Py_ssize_t len) { PyObject *u; diff --git a/Python/specialize.c b/Python/specialize.c index 47f46f7918ef45..ed4d3da6e59b05 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2136,13 +2136,6 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs) /* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead by calling sq_repeat directly with PyLong_AsSsize_t. */ -extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n); -extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n); -extern PyObject *bytes_concat(PyObject *a, PyObject *b); -extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n); -extern PyObject *dict_or(PyObject *self, PyObject *other); -extern PyObject *dict_ior(PyObject *self, PyObject *other); - static inline PyObject * seq_int_multiply(PyObject *seq, PyObject *n, ssizeargfunc repeat) @@ -2171,13 +2164,15 @@ int_str_guard(PyObject *lhs, PyObject *rhs) static PyObject * str_int_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(lhs, rhs, unicode_repeat); + return seq_int_multiply(lhs, rhs, + PyUnicode_Type.tp_as_sequence->sq_repeat); } static PyObject * int_str_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(rhs, lhs, unicode_repeat); + return seq_int_multiply(rhs, lhs, + PyUnicode_Type.tp_as_sequence->sq_repeat); } /* bytes-bytes */ @@ -2188,6 +2183,12 @@ bytes_bytes_guard(PyObject *lhs, PyObject *rhs) return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs); } +static PyObject * +bytes_bytes_add(PyObject *lhs, PyObject *rhs) +{ + return PyBytes_Type.tp_as_sequence->sq_concat(lhs, rhs); +} + /* bytes-int and int-bytes */ static int @@ -2205,13 +2206,15 @@ int_bytes_guard(PyObject *lhs, PyObject *rhs) static PyObject * bytes_int_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(lhs, rhs, bytes_repeat); + return seq_int_multiply(lhs, rhs, + PyBytes_Type.tp_as_sequence->sq_repeat); } static PyObject * int_bytes_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(rhs, lhs, bytes_repeat); + return seq_int_multiply(rhs, lhs, + PyBytes_Type.tp_as_sequence->sq_repeat); } /* tuple-int and int-tuple */ @@ -2231,13 +2234,15 @@ int_tuple_guard(PyObject *lhs, PyObject *rhs) static PyObject * tuple_int_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(lhs, rhs, tuple_repeat); + return seq_int_multiply(lhs, rhs, + PyTuple_Type.tp_as_sequence->sq_repeat); } static PyObject * int_tuple_multiply(PyObject *lhs, PyObject *rhs) { - return seq_int_multiply(rhs, lhs, tuple_repeat); + return seq_int_multiply(rhs, lhs, + PyTuple_Type.tp_as_sequence->sq_repeat); } /* dict-dict */ @@ -2248,6 +2253,18 @@ dict_dict_guard(PyObject *lhs, PyObject *rhs) return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs); } +static PyObject * +dict_dict_or(PyObject *lhs, PyObject *rhs) +{ + return PyDict_Type.tp_as_number->nb_or(lhs, rhs); +} + +static PyObject * +dict_dict_ior(PyObject *lhs, PyObject *rhs) +{ + return PyDict_Type.tp_as_number->nb_inplace_or(lhs, rhs); +} + static int compactlongs_guard(PyObject *lhs, PyObject *rhs) { @@ -2373,10 +2390,10 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { {NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type}, {NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type}, - /* bytes + bytes: call bytes_concat directly. bytes_concat may return - an operand when one side is empty, so result is not always unique. */ - {NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, - {NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, + /* bytes + bytes: bytes_concat may return an operand when one side + is empty, so result is not always unique. */ + {NB_ADD, bytes_bytes_guard, bytes_bytes_add, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, + {NB_INPLACE_ADD, bytes_bytes_guard, bytes_bytes_add, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type}, /* bytes * int / int * bytes: call bytes_repeat directly. bytes_repeat returns the original when n == 1. */ @@ -2392,9 +2409,9 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { {NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type}, {NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type}, - /* dict | dict: call dict_or directly */ - {NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type}, - {NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type}, + /* dict | dict */ + {NB_OR, dict_dict_guard, dict_dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type}, + {NB_INPLACE_OR, dict_dict_guard, dict_dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type}, }; static int