From 02ca84db483221fe3dd61248dab7ad0913933c1e Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Fri, 17 Jan 2025 20:45:38 +0100
Subject: [PATCH 1/6] specialize concatenation of lists and tuples

---
 Lib/test/test_opcache.py | 15 +++++++++++++++
 Objects/listobject.c     |  2 +-
 Objects/tupleobject.c    |  2 +-
 Python/specialize.c      | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 4ca108cd6ca43e..60876080577452 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -1423,6 +1423,21 @@ def binary_op_add_extend():
         self.assert_specialized(binary_op_add_extend, "BINARY_OP_EXTEND")
         self.assert_no_opcode(binary_op_add_extend, "BINARY_OP")
 
+        def binary_op_add_extend_sequences():
+            l1 = [1, 2]
+            l2 = [None]
+            t1 = (1, 2)
+            t2 = (None,)
+            for _ in range(100):
+                list_sum = l1 + l2
+                self.assertEqual(list_sum, [1, 2, None])
+                tuple_sum = t1 + t2
+                self.assertEqual(tuple_sum, (1, 2, None))
+
+        binary_op_add_extend_sequences()
+        self.assert_specialized(binary_op_add_extend_sequences, "BINARY_OP_EXTEND")
+        self.assert_no_opcode(binary_op_add_extend_sequences, "BINARY_OP")
+
         def binary_op_zero_division():
             def compactlong_lhs(arg):
                 42 / arg
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 5c9fd55bab1b22..bfa7befd386ebc 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -798,7 +798,7 @@ list_concat_lock_held(PyListObject *a, PyListObject *b)
     return (PyObject *)np;
 }
 
-static PyObject *
+PyObject *
 list_concat(PyObject *aa, PyObject *bb)
 {
     if (!PyList_Check(bb)) {
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index ee6320e6ca3cfe..af359d7c8d32d9 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -547,7 +547,7 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j)
     return tuple_slice((PyTupleObject *)op, i, j);
 }
 
-static PyObject *
+PyObject *
 tuple_concat(PyObject *aa, PyObject *bb)
 {
     PyTupleObject *a = _PyTuple_CAST(aa);
diff --git a/Python/specialize.c b/Python/specialize.c
index 09ec25767a4c3f..71102ae999074a 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2104,6 +2104,38 @@ is_compactlong(PyObject *v)
            _PyLong_IsCompact((PyLongObject *)v);
 }
 
+/* list-list */
+
+static int
+list_list_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyList_CheckExact(lhs) && PyList_CheckExact(rhs);
+}
+
+extern PyObject *list_concat(PyObject *aa, PyObject *bb);
+
+static PyObject *
+list_list_add(PyObject *lhs, PyObject *rhs)
+{
+    return list_concat(lhs, rhs);
+}
+
+/* tuple-tuple */
+
+static int
+tuple_tuple_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyTuple_CheckExact(lhs) && PyTuple_CheckExact(rhs);
+}
+
+extern PyObject *tuple_concat(PyObject *aa, PyObject *bb);
+
+static PyObject *
+tuple_tuple_add(PyObject *lhs, PyObject *rhs)
+{
+    return tuple_concat(lhs, rhs);
+}
+
 static int
 compactlongs_guard(PyObject *lhs, PyObject *rhs)
 {
@@ -2213,6 +2245,10 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract},
     {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div},
     {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply},
+
+    /* list-list and tuple-tuple concatenation */
+    {NB_ADD, list_list_guard, list_list_add},
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add},
 };
 
 static int

From 27f4c56936fce085677243281f3e1481beede59a Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Fri, 17 Jan 2025 19:48:34 +0000
Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?=
 =?UTF-8?q?rb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst               | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
new file mode 100644
index 00000000000000..c62497c213507a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
@@ -0,0 +1 @@
+Specialize ``BINARY_OP`` for concatenation of lists and tuples.

From 51d1b111d2e76bd81a44c0fde471ce4215852b51 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Sun, 5 Apr 2026 21:57:21 +0200
Subject: [PATCH 3/6] refactor for type information

---
 Include/internal/pycore_code.h  |  3 +++
 Include/internal/pycore_list.h  |  1 +
 Include/internal/pycore_tuple.h |  1 +
 Lib/test/test_capi/test_opt.py  | 42 ++++++++++++++++++++++++++++++++
 Objects/listobject.c            |  4 +--
 Objects/tupleobject.c           |  4 +--
 Python/optimizer_bytecodes.c    |  9 +++++--
 Python/optimizer_cases.c.h      |  9 +++++--
 Python/specialize.c             | 43 +++++++++++++++------------------
 9 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 376e68a4c8773c..09b11599f34223 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -496,6 +496,9 @@ typedef struct {
     int oparg;
     binaryopguardfunc guard;
     binaryopactionfunc action;
+    /* Static type of the result, or NULL if unknown. Used by the tier 2
+       optimizer to propagate type information through _BINARY_OP_EXTEND. */
+    PyTypeObject *result_type;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index 6b92dc5d111f3b..df0d00f752573b 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -15,6 +15,7 @@ extern "C" {
 PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
 PyAPI_FUNC(PyObject) *_PyList_SliceSubscript(PyObject*, PyObject*);
 PyAPI_FUNC(PyObject *) _PyList_BinarySlice(PyObject *, PyObject *, PyObject *);
+PyAPI_FUNC(PyObject *) _PyList_Concat(PyObject *, PyObject *);
 extern void _PyList_DebugMallocStats(FILE *out);
 // _PyList_GetItemRef should be used only when the object is known as a list
 // because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does.
diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h
index 9409ec94976d3a..bf80f96396ea4a 100644
--- a/Include/internal/pycore_tuple.h
+++ b/Include/internal/pycore_tuple.h
@@ -28,6 +28,7 @@ PyAPI_FUNC(void) _PyStolenTuple_Free(PyObject *self);
 PyAPI_FUNC(PyObject *)_PyTuple_FromStackRefStealOnSuccess(const union _PyStackRef *, Py_ssize_t);
 PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);
 PyAPI_FUNC(PyObject *) _PyTuple_BinarySlice(PyObject *, PyObject *, PyObject *);
+PyAPI_FUNC(PyObject *) _PyTuple_Concat(PyObject *, PyObject *);
 
 PyAPI_FUNC(PyObject *) _PyTuple_FromPair(PyObject *, PyObject *);
 PyAPI_FUNC(PyObject *) _PyTuple_FromPairSteal(PyObject *, PyObject *);
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 56f90194b480a1..24e7f3c85fd5c1 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -3813,6 +3813,48 @@ def f(n):
         self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops)
         self.assertNotIn("_GUARD_TOS_TUPLE", uops)
 
+    def test_binary_op_extend_list_concat_type_propagation(self):
+        # list + list is specialized via BINARY_OP_EXTEND. The tier 2 optimizer
+        # should learn that the result is a list and eliminate subsequent
+        # list-type guards.
+        def testfunc(n):
+            a = [1, 2]
+            b = [3, 4]
+            x = True
+            for _ in range(n):
+                c = a + b
+                if c[0]:
+                    x = False
+            return x
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, False)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_EXTEND", uops)
+        # The c[0] subscript emits _GUARD_NOS_LIST before _BINARY_OP_SUBSCR_LIST_INT;
+        # since _BINARY_OP_EXTEND now propagates PyList_Type, that guard is gone.
+        self.assertIn("_BINARY_OP_SUBSCR_LIST_INT", uops)
+        self.assertNotIn("_GUARD_NOS_LIST", uops)
+
+    def test_binary_op_extend_tuple_concat_type_propagation(self):
+        # tuple + tuple is specialized via BINARY_OP_EXTEND. The tier 2 optimizer
+        # should learn the result is a tuple and eliminate subsequent tuple guards.
+        def testfunc(n):
+            t1 = (1, 2)
+            t2 = (3, 4)
+            for _ in range(n):
+                a, b, c, d = t1 + t2
+            return a + b + c + d
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, 10)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_EXTEND", uops)
+        self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops)
+        self.assertNotIn("_GUARD_TOS_TUPLE", uops)
+
     def test_unary_invert_long_type(self):
         def testfunc(n):
             for _ in range(n):
diff --git a/Objects/listobject.c b/Objects/listobject.c
index bfa7befd386ebc..97869b17cde7a8 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -799,7 +799,7 @@ list_concat_lock_held(PyListObject *a, PyListObject *b)
 }
 
 PyObject *
-list_concat(PyObject *aa, PyObject *bb)
+_PyList_Concat(PyObject *aa, PyObject *bb)
 {
     if (!PyList_Check(bb)) {
         PyErr_Format(PyExc_TypeError,
@@ -3617,7 +3617,7 @@ static PyMethodDef list_methods[] = {
 
 static PySequenceMethods list_as_sequence = {
     list_length,                                /* sq_length */
-    list_concat,                                /* sq_concat */
+    _PyList_Concat,                             /* sq_concat */
     list_repeat,                                /* sq_repeat */
     list_item,                                  /* sq_item */
     0,                                          /* sq_slice */
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index af359d7c8d32d9..07384acde32e52 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -548,7 +548,7 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j)
 }
 
 PyObject *
-tuple_concat(PyObject *aa, PyObject *bb)
+_PyTuple_Concat(PyObject *aa, PyObject *bb)
 {
     PyTupleObject *a = _PyTuple_CAST(aa);
     if (Py_SIZE(a) == 0 && PyTuple_CheckExact(bb)) {
@@ -864,7 +864,7 @@ tuple_subtype_new(PyTypeObject *type, PyObject *iterable)
 
 static PySequenceMethods tuple_as_sequence = {
     tuple_length,                               /* sq_length */
-    tuple_concat,                               /* sq_concat */
+    _PyTuple_Concat,                            /* sq_concat */
     tuple_repeat,                               /* sq_repeat */
     tuple_item,                                 /* sq_item */
     0,                                          /* sq_slice */
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index b8148ef57ede0c..0ed9822ab2e4a7 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -410,8 +410,13 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) {
-        (void)descr;
-        res = sym_new_not_null(ctx);
+        _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
+        if (d != NULL && d->result_type != NULL) {
+            res = sym_new_type(ctx, d->result_type);
+        }
+        else {
+            res = sym_new_not_null(ctx);
+        }
         l = left;
         r = right;
     }
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index a15b5ae1d13d3b..7a8a1d20666877 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1168,8 +1168,13 @@
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             PyObject *descr = (PyObject *)this_instr->operand0;
-            (void)descr;
-            res = sym_new_not_null(ctx);
+            _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
+            if (d != NULL && d->result_type != NULL) {
+                res = sym_new_type(ctx, d->result_type);
+            }
+            else {
+                res = sym_new_not_null(ctx);
+            }
             l = left;
             r = right;
             CHECK_STACK_BOUNDS(1);
diff --git a/Python/specialize.c b/Python/specialize.c
index 71102ae999074a..0953eb421554f5 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -9,7 +9,8 @@
 #include "pycore_function.h"      // _PyFunction_GetVersionForCurrentState()
 #include "pycore_interpframe.h"   // FRAME_SPECIALS_SIZE
 #include "pycore_lazyimportobject.h" // PyLazyImport_CheckExact
-#include "pycore_list.h"          // _PyListIterObject
+#include "pycore_list.h"          // _PyListIterObject, _PyList_Concat
+#include "pycore_tuple.h"         // _PyTuple_Concat
 #include "pycore_long.h"          // _PyLong_IsNonNegativeCompact()
 #include "pycore_moduleobject.h"
 #include "pycore_object.h"
@@ -2112,12 +2113,10 @@ list_list_guard(PyObject *lhs, PyObject *rhs)
     return PyList_CheckExact(lhs) && PyList_CheckExact(rhs);
 }
 
-extern PyObject *list_concat(PyObject *aa, PyObject *bb);
-
 static PyObject *
 list_list_add(PyObject *lhs, PyObject *rhs)
 {
-    return list_concat(lhs, rhs);
+    return _PyList_Concat(lhs, rhs);
 }
 
 /* tuple-tuple */
@@ -2128,12 +2127,10 @@ tuple_tuple_guard(PyObject *lhs, PyObject *rhs)
     return PyTuple_CheckExact(lhs) && PyTuple_CheckExact(rhs);
 }
 
-extern PyObject *tuple_concat(PyObject *aa, PyObject *bb);
-
 static PyObject *
 tuple_tuple_add(PyObject *lhs, PyObject *rhs)
 {
-    return tuple_concat(lhs, rhs);
+    return _PyTuple_Concat(lhs, rhs);
 }
 
 static int
@@ -2227,28 +2224,28 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 
 static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     /* long-long arithmetic */
-    {NB_OR, compactlongs_guard, compactlongs_or},
-    {NB_AND, compactlongs_guard, compactlongs_and},
-    {NB_XOR, compactlongs_guard, compactlongs_xor},
-    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or},
-    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and},
-    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor},
+    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
+    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
+    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
+    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
+    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
+    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
 
     /* float-long arithemetic */
-    {NB_ADD, float_compactlong_guard, float_compactlong_add},
-    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract},
-    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div},
-    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply},
+    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type},
+    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type},
+    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type},
+    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type},
 
     /* float-float arithmetic */
-    {NB_ADD, compactlong_float_guard, compactlong_float_add},
-    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract},
-    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div},
-    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply},
+    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type},
+    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type},
+    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type},
+    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type},
 
     /* list-list and tuple-tuple concatenation */
-    {NB_ADD, list_list_guard, list_list_add},
-    {NB_ADD, tuple_tuple_guard, tuple_tuple_add},
+    {NB_ADD, list_list_guard, list_list_add, &PyList_Type},
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type},
 };
 
 static int

From e8263f97577c25e11b5f593e157bf9daad057006 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Sun, 5 Apr 2026 22:04:23 +0200
Subject: [PATCH 4/6] add unique type propagation

---
 Include/internal/pycore_code.h                |  4 ++
 Lib/test/test_capi/test_opt.py                | 23 ++++++++++
 ...-01-17-19-48-28.gh-issue-100239.7pbTEA.rst |  4 +-
 Python/optimizer_bytecodes.c                  |  3 ++
 Python/optimizer_cases.c.h                    |  3 ++
 Python/specialize.c                           | 43 ++++++++++---------
 6 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 09b11599f34223..fe8d0a54f2af1a 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -499,6 +499,10 @@ typedef struct {
     /* Static type of the result, or NULL if unknown. Used by the tier 2
        optimizer to propagate type information through _BINARY_OP_EXTEND. */
     PyTypeObject *result_type;
+    /* Nonzero iff `action` always returns a freshly allocated object (not
+       aliased to either operand). Used by the tier 2 optimizer to enable
+       inplace follow-up ops. */
+    int result_unique;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 24e7f3c85fd5c1..e114385a9ebc46 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -3813,6 +3813,29 @@ def f(n):
         self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops)
         self.assertNotIn("_GUARD_TOS_TUPLE", uops)
 
+    def test_binary_op_extend_float_result_enables_inplace_multiply(self):
+        # (2 + x) * y with x, y floats: `2 + x` goes through _BINARY_OP_EXTEND
+        # (int + float). The result_type/result_unique info should let the
+        # subsequent float multiply use the inplace variant.
+        def testfunc(n):
+            x = 3.5
+            y = 2.0
+            res = 0.0
+            for _ in range(n):
+                res = (2 + x) * y
+            return res
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, 11.0)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_EXTEND", uops)
+        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)
+        self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops)
+        # NOS guard on the multiply is eliminated because _BINARY_OP_EXTEND
+        # propagates PyFloat_Type.
+        self.assertNotIn("_GUARD_NOS_FLOAT", uops)
+
     def test_binary_op_extend_list_concat_type_propagation(self):
         # list + list is specialized via BINARY_OP_EXTEND. The tier 2 optimizer
         # should learn that the result is a list and eliminate subsequent
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
index c62497c213507a..594ef72ac57fae 100644
--- a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-19-48-28.gh-issue-100239.7pbTEA.rst
@@ -1 +1,3 @@
-Specialize ``BINARY_OP`` for concatenation of lists and tuples.
+Specialize ``BINARY_OP`` for concatenation of lists and tuples, and
+propagate the result type through ``_BINARY_OP_EXTEND`` in the tier 2
+optimizer so that follow-up type guards can be eliminated.
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 0ed9822ab2e4a7..58b50707e55cee 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -413,6 +413,9 @@ dummy_func(void) {
         _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
         if (d != NULL && d->result_type != NULL) {
             res = sym_new_type(ctx, d->result_type);
+            if (d->result_unique) {
+                res = PyJitRef_MakeUnique(res);
+            }
         }
         else {
             res = sym_new_not_null(ctx);
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 7a8a1d20666877..891887301119d7 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1171,6 +1171,9 @@
             _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
             if (d != NULL && d->result_type != NULL) {
                 res = sym_new_type(ctx, d->result_type);
+                if (d->result_unique) {
+                    res = PyJitRef_MakeUnique(res);
+                }
             }
             else {
                 res = sym_new_not_null(ctx);
diff --git a/Python/specialize.c b/Python/specialize.c
index 0953eb421554f5..4b5c10e9d72909 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2224,28 +2224,31 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 
 static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     /* long-long arithmetic */
-    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
-    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
-    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
-    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type},
-    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type},
-    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type},
+    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
+    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
+    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
+    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
+    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
+    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
 
     /* float-long arithemetic */
-    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type},
-    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type},
-    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type},
-    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type},
-
-    /* float-float arithmetic */
-    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type},
-    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type},
-    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type},
-    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type},
-
-    /* list-list and tuple-tuple concatenation */
-    {NB_ADD, list_list_guard, list_list_add, &PyList_Type},
-    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type},
+    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
+    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
+    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
+    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
+
+    /* long-float arithmetic */
+    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
+    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
+    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
+    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
+
+    /* list-list concatenation: _PyList_Concat always allocates a new list */
+    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1},
+    /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
+       that can return one of the operands, so the result is not guaranteed
+       to be a freshly allocated object. */
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0},
 };
 
 static int

From fe63c59490524e5e967fec380478765a946fd9f4 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Mon, 6 Apr 2026 23:13:49 +0200
Subject: [PATCH 5/6] special case for concatenation

---
 Include/internal/pycore_code.h |   5 +
 Lib/test/test_capi/test_opt.py |  24 +++++
 Objects/bytesobject.c          |   6 +-
 Objects/dictobject.c           |   4 +-
 Objects/tupleobject.c          |   2 +-
 Objects/unicodeobject.c        |   2 +-
 Python/optimizer_bytecodes.c   |  10 ++
 Python/optimizer_cases.c.h     |  12 +++
 Python/specialize.c            | 188 +++++++++++++++++++++++++++++----
 9 files changed, 225 insertions(+), 28 deletions(-)

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index fe8d0a54f2af1a..b73dbe123838a4 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -503,6 +503,11 @@ typedef struct {
        aliased to either operand). Used by the tier 2 optimizer to enable
        inplace follow-up ops. */
     int result_unique;
+    /* Expected types of the left and right operands. Used by the tier 2
+       optimizer to eliminate _GUARD_BINARY_OP_EXTEND when the operand
+       types are already known. NULL means unknown/don't eliminate. */
+    PyTypeObject *lhs_type;
+    PyTypeObject *rhs_type;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index e114385a9ebc46..e4050d3db48cb7 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -3878,6 +3878,30 @@ def testfunc(n):
         self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops)
         self.assertNotIn("_GUARD_TOS_TUPLE", uops)
 
+    def test_binary_op_extend_guard_elimination(self):
+        # When both operands have known types (e.g., from a prior
+        # _BINARY_OP_EXTEND result), the _GUARD_BINARY_OP_EXTEND
+        # should be eliminated.
+        def testfunc(n):
+            a = [1, 2]
+            b = [3, 4]
+            total = 0
+            for _ in range(n):
+                c = a + b    # first: guard stays, result type = list
+                d = c + c    # second: both operands are list -> guard eliminated
+                total += d[0]
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # Both list additions use _BINARY_OP_EXTEND
+        self.assertEqual(uops.count("_BINARY_OP_EXTEND"), 2)
+        # But the second guard is eliminated because both operands
+        # are known to be lists from the first _BINARY_OP_EXTEND.
+        self.assertEqual(uops.count("_GUARD_BINARY_OP_EXTEND"), 1)
+
     def test_unary_invert_long_type(self):
         def testfunc(n):
             for _ in range(n):
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 902144e8ec9f83..8a38d2ba0aa463 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1536,8 +1536,8 @@ bytes_length(PyObject *self)
     return Py_SIZE(a);
 }
 
-/* This is also used by PyBytes_Concat() */
-static PyObject *
+/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */
+PyObject *
 bytes_concat(PyObject *a, PyObject *b)
 {
     Py_buffer va, vb;
@@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b)
     return result;
 }
 
-static PyObject *
+PyObject *
 bytes_repeat(PyObject *self, Py_ssize_t n)
 {
     PyBytesObject *a = _PyBytes_CAST(self);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 67bc4319e0bae2..ae7179e8ba681a 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self)
     return PyLong_FromSsize_t(_PyDict_SizeOf(self));
 }
 
-static PyObject *
+PyObject *
 dict_or(PyObject *self, PyObject *other)
 {
     if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) {
@@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other)
 }
 
 
-static PyObject *
+PyObject *
 dict_ior(PyObject *self, PyObject *other)
 {
     if (dict_update_arg(self, other)) {
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 07384acde32e52..e917a7124aa7e5 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb)
     return (PyObject *)np;
 }
 
-static PyObject *
+PyObject *
 tuple_repeat(PyObject *self, Py_ssize_t n)
 {
     PyTupleObject *a = _PyTuple_CAST(self);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a0a26a75129929..c4cc19a416429c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars)
 }
 
 
-static PyObject*
+PyObject*
 unicode_repeat(PyObject *str, Py_ssize_t len)
 {
     PyObject *u;
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 58b50707e55cee..0009b5104676ef 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -409,6 +409,16 @@ dummy_func(void) {
         r = right;
     }
 
+    op(_GUARD_BINARY_OP_EXTEND, (descr/4, left, right -- left, right)) {
+        _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
+        if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) {
+            if (sym_matches_type(left, d->lhs_type) &&
+                sym_matches_type(right, d->rhs_type)) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+        }
+    }
+
     op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) {
         _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
         if (d != NULL && d->result_type != NULL) {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 891887301119d7..c052c63095ad74 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1156,6 +1156,18 @@
         }
 
         case _GUARD_BINARY_OP_EXTEND: {
+            JitOptRef right;
+            JitOptRef left;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            PyObject *descr = (PyObject *)this_instr->operand0;
+            _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
+            if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) {
+                if (sym_matches_type(left, d->lhs_type) &&
+                    sym_matches_type(right, d->rhs_type)) {
+                    REPLACE_OP(this_instr, _NOP, 0, 0);
+                }
+            }
             break;
         }
 
diff --git a/Python/specialize.c b/Python/specialize.c
index 4b5c10e9d72909..47f46f7918ef45 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2133,6 +2133,121 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs)
     return _PyTuple_Concat(lhs, rhs);
 }
 
+/* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
+   by calling sq_repeat directly with PyLong_AsSsize_t. */
+
+extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n);
+extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n);
+extern PyObject *bytes_concat(PyObject *a, PyObject *b);
+extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n);
+extern PyObject *dict_or(PyObject *self, PyObject *other);
+extern PyObject *dict_ior(PyObject *self, PyObject *other);
+
+static inline PyObject *
+seq_int_multiply(PyObject *seq, PyObject *n,
+                 ssizeargfunc repeat)
+{
+    Py_ssize_t count = PyLong_AsSsize_t(n);
+    if (count == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return repeat(seq, count);
+}
+
+/* str-int and int-str */
+
+static int
+str_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyUnicode_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_str_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyUnicode_CheckExact(rhs);
+}
+
+static PyObject *
+str_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, unicode_repeat);
+}
+
+static PyObject *
+int_str_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, unicode_repeat);
+}
+
+/* bytes-bytes */
+
+static int
+bytes_bytes_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs);
+}
+
+/* bytes-int and int-bytes */
+
+static int
+bytes_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyBytes_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_bytes_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyBytes_CheckExact(rhs);
+}
+
+static PyObject *
+bytes_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, bytes_repeat);
+}
+
+static PyObject *
+int_bytes_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, bytes_repeat);
+}
+
+/* tuple-int and int-tuple */
+
+static int
+tuple_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyTuple_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_tuple_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyTuple_CheckExact(rhs);
+}
+
+static PyObject *
+tuple_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, tuple_repeat);
+}
+
+static PyObject *
+int_tuple_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, tuple_repeat);
+}
+
+/* dict-dict */
+
+static int
+dict_dict_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs);
+}
+
 static int
 compactlongs_guard(PyObject *lhs, PyObject *rhs)
 {
@@ -2223,32 +2338,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 #undef LONG_FLOAT_ACTION
 
 static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
-    /* long-long arithmetic */
-    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
-    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
-    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
-    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
-    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
-    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
-
-    /* float-long arithemetic */
-    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
-    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
-    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
-    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
-
-    /* long-float arithmetic */
-    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
-    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
-    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
-    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
+    /* long-long arithmetic: guards also check _PyLong_IsCompact, so
+       type alone is not sufficient to eliminate the guard. */
+    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
+    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
+    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
+
+    /* float-long arithmetic: guards also check NaN and compactness. */
+    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1, NULL, NULL},
+    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1, NULL, NULL},
+    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1, NULL, NULL},
+    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1, NULL, NULL},
+
+    /* long-float arithmetic: guards also check NaN and compactness. */
+    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1, NULL, NULL},
+    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1, NULL, NULL},
+    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1, NULL, NULL},
+    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1, NULL, NULL},
 
     /* list-list concatenation: _PyList_Concat always allocates a new list */
-    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1},
+    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1, &PyList_Type, &PyList_Type},
     /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
        that can return one of the operands, so the result is not guaranteed
        to be a freshly allocated object. */
-    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0},
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0, &PyTuple_Type, &PyTuple_Type},
+
+    /* str * int / int * str: call unicode_repeat directly.
+       unicode_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
+    {NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
+
+    /* bytes + bytes: call bytes_concat directly. bytes_concat may return
+       an operand when one side is empty, so result is not always unique. */
+    {NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+    {NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+
+    /* bytes * int / int * bytes: call bytes_repeat directly.
+       bytes_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
+    {NB_INPLACE_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
+
+    /* tuple * int / int * tuple: call tuple_repeat directly.
+       tuple_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
+    {NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
+
+    /* dict | dict: call dict_or directly */
+    {NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
+    {NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
 };
 
 static int

From f099585f65c1b2b41c911b463eb8c4ce5f2ad390 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Tue, 7 Apr 2026 00:07:26 +0200
Subject: [PATCH 6/6] fix

---
 Objects/bytesobject.c   |  6 ++---
 Objects/dictobject.c    |  4 +--
 Objects/tupleobject.c   |  2 +-
 Objects/unicodeobject.c |  2 +-
 Python/specialize.c     | 57 ++++++++++++++++++++++++++---------------
 5 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 8a38d2ba0aa463..902144e8ec9f83 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1536,8 +1536,8 @@ bytes_length(PyObject *self)
     return Py_SIZE(a);
 }
 
-/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */
-PyObject *
+/* This is also used by PyBytes_Concat() */
+static PyObject *
 bytes_concat(PyObject *a, PyObject *b)
 {
     Py_buffer va, vb;
@@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b)
     return result;
 }
 
-PyObject *
+static PyObject *
 bytes_repeat(PyObject *self, Py_ssize_t n)
 {
     PyBytesObject *a = _PyBytes_CAST(self);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index ae7179e8ba681a..67bc4319e0bae2 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self)
     return PyLong_FromSsize_t(_PyDict_SizeOf(self));
 }
 
-PyObject *
+static PyObject *
 dict_or(PyObject *self, PyObject *other)
 {
     if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) {
@@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other)
 }
 
 
-PyObject *
+static PyObject *
 dict_ior(PyObject *self, PyObject *other)
 {
     if (dict_update_arg(self, other)) {
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index e917a7124aa7e5..07384acde32e52 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb)
     return (PyObject *)np;
 }
 
-PyObject *
+static PyObject *
 tuple_repeat(PyObject *self, Py_ssize_t n)
 {
     PyTupleObject *a = _PyTuple_CAST(self);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c4cc19a416429c..a0a26a75129929 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars)
 }
 
 
-PyObject*
+static PyObject*
 unicode_repeat(PyObject *str, Py_ssize_t len)
 {
     PyObject *u;
diff --git a/Python/specialize.c b/Python/specialize.c
index 47f46f7918ef45..ed4d3da6e59b05 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2136,13 +2136,6 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs)
 /* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
    by calling sq_repeat directly with PyLong_AsSsize_t. */
 
-extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n);
-extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n);
-extern PyObject *bytes_concat(PyObject *a, PyObject *b);
-extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n);
-extern PyObject *dict_or(PyObject *self, PyObject *other);
-extern PyObject *dict_ior(PyObject *self, PyObject *other);
-
 static inline PyObject *
 seq_int_multiply(PyObject *seq, PyObject *n,
                  ssizeargfunc repeat)
@@ -2171,13 +2164,15 @@ int_str_guard(PyObject *lhs, PyObject *rhs)
 static PyObject *
 str_int_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(lhs, rhs, unicode_repeat);
+    return seq_int_multiply(lhs, rhs,
+                            PyUnicode_Type.tp_as_sequence->sq_repeat);
 }
 
 static PyObject *
 int_str_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(rhs, lhs, unicode_repeat);
+    return seq_int_multiply(rhs, lhs,
+                            PyUnicode_Type.tp_as_sequence->sq_repeat);
 }
 
 /* bytes-bytes */
@@ -2188,6 +2183,12 @@ bytes_bytes_guard(PyObject *lhs, PyObject *rhs)
     return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs);
 }
 
+static PyObject *
+bytes_bytes_add(PyObject *lhs, PyObject *rhs)
+{
+    return PyBytes_Type.tp_as_sequence->sq_concat(lhs, rhs);
+}
+
 /* bytes-int and int-bytes */
 
 static int
@@ -2205,13 +2206,15 @@ int_bytes_guard(PyObject *lhs, PyObject *rhs)
 static PyObject *
 bytes_int_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(lhs, rhs, bytes_repeat);
+    return seq_int_multiply(lhs, rhs,
+                            PyBytes_Type.tp_as_sequence->sq_repeat);
 }
 
 static PyObject *
 int_bytes_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(rhs, lhs, bytes_repeat);
+    return seq_int_multiply(rhs, lhs,
+                            PyBytes_Type.tp_as_sequence->sq_repeat);
 }
 
 /* tuple-int and int-tuple */
@@ -2231,13 +2234,15 @@ int_tuple_guard(PyObject *lhs, PyObject *rhs)
 static PyObject *
 tuple_int_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(lhs, rhs, tuple_repeat);
+    return seq_int_multiply(lhs, rhs,
+                            PyTuple_Type.tp_as_sequence->sq_repeat);
 }
 
 static PyObject *
 int_tuple_multiply(PyObject *lhs, PyObject *rhs)
 {
-    return seq_int_multiply(rhs, lhs, tuple_repeat);
+    return seq_int_multiply(rhs, lhs,
+                            PyTuple_Type.tp_as_sequence->sq_repeat);
 }
 
 /* dict-dict */
@@ -2248,6 +2253,18 @@ dict_dict_guard(PyObject *lhs, PyObject *rhs)
     return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs);
 }
 
+static PyObject *
+dict_dict_or(PyObject *lhs, PyObject *rhs)
+{
+    return PyDict_Type.tp_as_number->nb_or(lhs, rhs);
+}
+
+static PyObject *
+dict_dict_ior(PyObject *lhs, PyObject *rhs)
+{
+    return PyDict_Type.tp_as_number->nb_inplace_or(lhs, rhs);
+}
+
 static int
 compactlongs_guard(PyObject *lhs, PyObject *rhs)
 {
@@ -2373,10 +2390,10 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     {NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
     {NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
 
-    /* bytes + bytes: call bytes_concat directly. bytes_concat may return
-       an operand when one side is empty, so result is not always unique. */
-    {NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
-    {NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+    /* bytes + bytes: bytes_concat may return an operand when one side
+       is empty, so result is not always unique. */
+    {NB_ADD, bytes_bytes_guard, bytes_bytes_add, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+    {NB_INPLACE_ADD, bytes_bytes_guard, bytes_bytes_add, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
 
     /* bytes * int / int * bytes: call bytes_repeat directly.
        bytes_repeat returns the original when n == 1. */
@@ -2392,9 +2409,9 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
     {NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
     {NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
 
-    /* dict | dict: call dict_or directly */
-    {NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
-    {NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
+    /* dict | dict */
+    {NB_OR, dict_dict_guard, dict_dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
+    {NB_INPLACE_OR, dict_dict_guard, dict_dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
 };
 
 static int