From 1a717574c757f03819a26635fb60a6bfa4cf65f7 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 14:25:34 -0700 Subject: [PATCH 01/12] Add tests for rejecting non-zero padding bits in base64/base32 RFC 4648 section 3.5 allows decoders to reject encoded data containing non-zero pad bits. Both a2b_base64 (strict_mode=True) and a2b_base32 currently silently discard non-zero trailing bits instead of raising binascii.Error. These tests document the expected behavior. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 70 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1dcd2b25c79087..47bc4872462558 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -334,6 +334,34 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) + def test_base64_nonzero_padding_bits(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # 2 data chars + "==": last char has 4 padding bits + # 'A' = 0, 'B' = 1 ->000000 000001 ->byte 0x00, leftover 0001 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AB=='), strict_mode=True) + # 'A' = 0, 'P' = 15 ->000000 001111 ->byte 0x00, leftover 1111 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AP=='), strict_mode=True) + + # 3 data chars + "=": last char has 2 padding bits + # 'A' = 0, 'A' = 0, 'B' = 1 ->000000 000000 000001 ->bytes 0x00 0x00, + # leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAB='), strict_mode=True) + # 'A' = 0, 'A' = 0, 'D' = 3 ->leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAD='), strict_mode=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base64(self.type2test(b'AA=='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AAA='), strict_mode=True) + + # Full quads with no padding have no leftover bits --always valid + binascii.a2b_base64(self.type2test(b'AAAA'), strict_mode=True) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -824,6 +852,48 @@ def assertInvalidLength(*args): assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + def test_base32_nonzero_padding_bits(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # 2 data chars + "======": last char has 2 padding bits + # 'AB' ->00000 00001 ->byte 0x00, leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AB======')) + # 'AD' ->00000 00011 ->byte 0x00, leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AD======')) + + # 4 data chars + "====": last char has 4 padding bits + # 'AAAB' ->00000 00000 00000 00001 ->bytes 0x00 0x00, leftover 0001 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAB====')) + # 'AAAP' ->leftover 1111 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAP====')) + + # 5 data chars + "===": last char has 1 padding bit + # 'AAAAB' ->4*00000 + 00001 ->bytes 0x00*3, leftover 1 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAB===')) + + # 7 data chars + "=": last char has 3 padding bits + # 'AAAAAAB' ->6*00000 + 00001 ->bytes 0x00*4, leftover 001 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAB=')) + # 'AAAAAAH' ->leftover 111 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAH=')) + + # Verify that zero padding bits are accepted + binascii.a2b_base32(self.type2test(b'AA======')) + binascii.a2b_base32(self.type2test(b'AAAA====')) + binascii.a2b_base32(self.type2test(b'AAAAA===')) + binascii.a2b_base32(self.type2test(b'AAAAAAA=')) + + # Full octet with no padding --always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA')) + def test_base32_alphabet(self): alphabet = b'0Aa1Bb2Cc3Dd4Ee5Ff6Gg7Hh8Ii9JjKk' data = self.type2test(self.rawdata) From 0ca2563a96284ace9bef9c1f979799117cd7ad07 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 14:44:56 -0700 Subject: [PATCH 02/12] Reject non-zero padding bits in base64/base32 decoding Add leftchar validation after the main decode loop in a2b_base64 (strict_mode only) and a2b_base32 (always). Fix existing test data that incidentally had non-zero padding bits to use characters with zero trailing bits while preserving the same decoded output. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 58 +++++++++++++++++++-------------------- Modules/binascii.c | 20 ++++++++++++++ 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 47bc4872462558..40a40f88007aaf 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -183,8 +183,8 @@ def assertExcessPadding(*args): def assertInvalidLength(*args): _assertRegexTemplate(r'(?i)Invalid.+number of data characters', *args) - assertExcessPadding(b'ab===', b'i') - assertExcessPadding(b'ab====', b'i') + assertExcessPadding(b'aQ===', b'i') + assertExcessPadding(b'aQ====', b'i') assertExcessPadding(b'abc==', b'i\xb7') assertExcessPadding(b'abc===', b'i\xb7') assertExcessPadding(b'abc====', b'i\xb7') @@ -201,7 +201,7 @@ def assertInvalidLength(*args): assertLeadingPadding(b'====abcd', b'i\xb7\x1d') assertLeadingPadding(b'=====abcd', b'i\xb7\x1d') - assertInvalidLength(b'a=b==', b'i') + assertInvalidLength(b'a=Q==', b'i') assertInvalidLength(b'a=bc=', b'i\xb7') assertInvalidLength(b'a=bc==', b'i\xb7') assertInvalidLength(b'a=bcd', b'i\xb7\x1d') @@ -241,17 +241,17 @@ def assertNonBase64Data(data, expected, ignorechars): self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''), expected) - assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') - assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00') - assertNonBase64Data(b'ab:==', b'i', ignorechars=b':') - assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') - assertNonBase64Data(b'ab==:', b'i', ignorechars=b':') + assertNonBase64Data(b'\naQ==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'aQ:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\x00Q==', b'i', ignorechars=b'\x00') + assertNonBase64Data(b'aQ:==', b'i', ignorechars=b':') + assertNonBase64Data(b'aQ=:=', b'i', ignorechars=b':') + assertNonBase64Data(b'aQ==:', b'i', ignorechars=b':') assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':') - assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) - assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) + assertNonBase64Data(b'aQ==\n', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=bytearray(b'\n')) + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=memoryview(b'\n')) # Same cell in the cache: '\r' >> 3 == '\n' >> 3. data = self.type2test(b'\r\n') @@ -766,19 +766,19 @@ def assertInvalidLength(*args): assertExcessData(b"ABCDEFG=H") assertExcessData(b"432Z====55555555") - assertExcessData(b"BE======EF", b"\t\x08") + assertExcessData(b"BE======EA", b"\t\x08") assertExcessData(b"BEEF====C", b"\t\x08Q") - assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01") + assertExcessData(b"BEEFC===AI", b"\t\x08Q\x01") assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D") assertExcessPadding(b"BE=======", b"\t") assertExcessPadding(b"BE========", b"\t") - assertExcessPadding(b"BEEF=====", b"\t\x08") - assertExcessPadding(b"BEEF======", b"\t\x08") + assertExcessPadding(b"BEEA=====", b"\t\x08") + assertExcessPadding(b"BEEA======", b"\t\x08") assertExcessPadding(b"BEEFC====", b"\t\x08Q") assertExcessPadding(b"BEEFC=====", b"\t\x08Q") - assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01") - assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI==", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI===", b"\t\x08Q\x01") assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D") @@ -818,16 +818,16 @@ def assertInvalidLength(*args): assertIncorrectPadding(b"BE===", b"\t") assertIncorrectPadding(b"BE====", b"\t") assertIncorrectPadding(b"BE=====", b"\t") - assertIncorrectPadding(b"BEEF=", b"\t\x08") - assertIncorrectPadding(b"BEEF==", b"\t\x08") - assertIncorrectPadding(b"BEEF===", b"\t\x08") + assertIncorrectPadding(b"BEEA=", b"\t\x08") + assertIncorrectPadding(b"BEEA==", b"\t\x08") + assertIncorrectPadding(b"BEEA===", b"\t\x08") assertIncorrectPadding(b"BEEFC=", b"\t\x08Q") assertIncorrectPadding(b"BEEFC==", b"\t\x08Q") - assertDiscontinuousPadding(b"BE=EF===", b"\t\x08") - assertDiscontinuousPadding(b"BE==EF==", b"\t\x08") + assertDiscontinuousPadding(b"BE=EA===", b"\t\x08") + assertDiscontinuousPadding(b"BE==EA==", b"\t\x08") assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q") - assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01") + assertDiscontinuousPadding(b"BEEFC=AI", b"\t\x08Q\x01") assertInvalidLength(b"A") assertInvalidLength(b"ABC") @@ -847,10 +847,10 @@ def assertInvalidLength(*args): assertInvalidLength(b"B=E=====", b"\t") assertInvalidLength(b"B==E====", b"\t") - assertInvalidLength(b"BEE=F===", b"\t\x08") - assertInvalidLength(b"BEE==F==", b"\t\x08") - assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") - assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + assertInvalidLength(b"BEE=A===", b"\t\x08") + assertInvalidLength(b"BEE==A==", b"\t\x08") + assertInvalidLength(b"BEEFCA=I", b"\t\x08Q\x01") + assertInvalidLength(b"BEEFCA=====I", b"\t\x08Q\x01") def test_base32_nonzero_padding_bits(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 diff --git a/Modules/binascii.c b/Modules/binascii.c index a57bf3ee6339f5..d016cb4f01c4c5 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -902,6 +902,16 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (strict_mode && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error_end; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); @@ -1652,6 +1662,16 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, goto error; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); From 615b2279cea420a52910e962ca22ca13730bf8fc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 15:15:21 -0700 Subject: [PATCH 03/12] Fix test_base64 test data with non-zero padding bits Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_base64.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 9648624b267a54..1b3e040a85952c 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -311,12 +311,12 @@ def test_b64decode_padding_error(self): def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. - tests = ((b'%3d==', b'\xdd', b'%$'), - (b'$3d==', b'\xdd', b'%$'), + tests = ((b'%3Q==', b'\xdd', b'%$'), + (b'$3Q==', b'\xdd', b'%$'), (b'[==', b'', b'[='), - (b'YW]3=', b'am', b']'), - (b'3{d==', b'\xdd', b'{}'), - (b'3d}==', b'\xdd', b'{}'), + (b'YW]0=', b'am', b']'), + (b'3{Q==', b'\xdd', b'{}'), + (b'3Q}==', b'\xdd', b'{}'), (b'@@', b'', b'@!'), (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'), From 819e14eb5a9955b516032add448f68aa95f9f260 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:32:55 +0000 Subject: [PATCH 04/12] Add canonical= kwarg to base64/base32/base85/ascii85 decoders Gate non-zero padding bits rejection behind a new canonical= keyword argument independent of strict_mode, per discussion on gh-146311. Per RFC 4648 section 3.5 ("Canonical Encoding"), decoders MAY reject encodings where pad bits are not zero. The new canonical=True flag enables this check for a2b_base64, a2b_base32, a2b_base85, and a2b_ascii85. For base85/ascii85, the canonical check also rejects single-character final groups (never produced by a conforming encoder) and verifies that partial group encodings match what the encoder would produce. Co-Authored-By: Claude Opus 4.6 (1M context) --- Doc/library/base64.rst | 42 +++++-- Doc/library/binascii.rst | 30 ++++- Lib/base64.py | 42 ++++--- Lib/test/test_base64.py | 10 +- Lib/test/test_binascii.py | 225 ++++++++++++++++++++++++------------ Modules/binascii.c | 110 ++++++++++++++++-- Modules/clinic/binascii.c.h | 124 ++++++++++++++------ 7 files changed, 426 insertions(+), 157 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 425dff8f2a9ad1..2b22683fe33510 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -76,8 +76,8 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b64decode(s, altchars=None, validate=False, *, padded=True) - b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True) +.. function:: b64decode(s, altchars=None, validate=False, *, padded=True, canonical=False) + b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True, canonical=False) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -112,10 +112,13 @@ POST request. If *validate* is true, these non-alphabet characters in the input result in a :exc:`binascii.Error`. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base64` for details. + For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -179,7 +182,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'') +.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', canonical=False) Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -203,12 +206,15 @@ POST request. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base32` for details. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b32hexencode(s, *, padded=True, wrapcol=0) @@ -222,7 +228,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'') +.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', canonical=False) Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -235,7 +241,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b16encode(s, *, wrapcol=0) @@ -315,7 +321,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 -.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v') +.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', canonical=False) Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. @@ -332,8 +338,14 @@ Refer to the documentation of the individual functions for more information. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_ascii85` for details. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *canonical* parameter. + .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -353,7 +365,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: b85decode(b, *, ignorechars=b'') +.. function:: b85decode(b, *, ignorechars=b'', canonical=False) Decode the base85-encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. Padding is implicitly removed, if @@ -362,10 +374,13 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.4 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -390,7 +405,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: z85decode(s, *, ignorechars=b'') +.. function:: z85decode(s, *, ignorechars=b'', canonical=False) Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. See `Z85 specification @@ -399,10 +414,13 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.13 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 4f2edb7eff8a8f..5c7888012e27cf 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -48,8 +48,8 @@ The :mod:`!binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False) - a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True) +.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False, canonical=False) + a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True, canonical=False) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. @@ -80,11 +80,15 @@ The :mod:`!binascii` module defines the following functions: * Contains no excess data after padding (including excess padding, newlines, etc.). * Does not start with a padding. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. This check is independent of *strict_mode*. + .. versionchanged:: 3.11 Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet*, *ignorechars* and *padded* parameters. + Added the *alphabet*, *ignorechars*, *padded*, and *canonical* parameters. .. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) @@ -110,7 +114,7 @@ The :mod:`!binascii` module defines the following functions: Added the *alphabet*, *padded* and *wrapcol* parameters. -.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') +.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'', canonical=False) Convert Ascii85 data back to binary and return the binary data. @@ -132,6 +136,11 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. + If *canonical* is true, non-canonical encodings in the final group are + rejected with :exc:`binascii.Error`. This includes single-character + final groups (which no conforming encoder produces) and final groups whose + padding digits are not what the encoder would produce. + Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -160,7 +169,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'') +.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'', canonical=False) Convert Base85 data back to binary and return the binary data. More than one line may be passed at a time. @@ -176,6 +185,11 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings in the final group are + rejected with :exc:`binascii.Error`. This includes single-character + final groups (which no conforming encoder produces) and final groups whose + padding digits are not what the encoder would produce. + Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -199,7 +213,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'') +.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'', canonical=False) Convert base32 data back to binary and return the binary data. @@ -228,6 +242,10 @@ The :mod:`!binascii` module defines the following functions: presented before the end of the encoded data and the excess pad characters will be ignored. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. + Invalid base32 data will raise :exc:`binascii.Error`. .. versionadded:: next diff --git a/Lib/base64.py b/Lib/base64.py index a94bec4d031c52..3a60e358ee285f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -68,7 +68,7 @@ def b64encode(s, altchars=None, *, padded=True, wrapcol=0): def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, - *, padded=True, ignorechars=_NOT_SPECIFIED): + *, padded=True, ignorechars=_NOT_SPECIFIED, canonical=False): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -110,11 +110,13 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, alphabet = binascii.BASE64_ALPHABET[:-2] + altchars return binascii.a2b_base64(s, strict_mode=validate, alphabet=alphabet, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if ignorechars is _NOT_SPECIFIED: ignorechars = b'' result = binascii.a2b_base64(s, strict_mode=validate, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if badchar is not None: import warnings if validate: @@ -230,7 +232,8 @@ def b32encode(s, *, padded=True, wrapcol=0): return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): +def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -241,7 +244,8 @@ def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars) + return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars, + canonical=canonical) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) @@ -250,13 +254,15 @@ def b32hexencode(s, *, padded=True, wrapcol=0): alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b''): +def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -324,7 +330,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): return binascii.b2a_ascii85(b, foldspaces=foldspaces, adobe=adobe, wrapcol=wrapcol, pad=pad) -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', + canonical=False): """Decode the Ascii85 encoded bytes-like object or ASCII string b. foldspaces is a flag that specifies whether the 'y' short sequence should be @@ -338,10 +345,13 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): input. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ return binascii.a2b_ascii85(b, foldspaces=foldspaces, - adobe=adobe, ignorechars=ignorechars) + adobe=adobe, ignorechars=ignorechars, + canonical=canonical) def b85encode(b, pad=False, *, wrapcol=0): """Encode bytes-like object b in base85 format and return a bytes object. @@ -354,12 +364,15 @@ def b85encode(b, pad=False, *, wrapcol=0): """ return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b, *, ignorechars=b''): +def b85decode(b, *, ignorechars=b'', canonical=False): """Decode the base85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(b, ignorechars=ignorechars) + return binascii.a2b_base85(b, ignorechars=ignorechars, + canonical=canonical) def z85encode(s, pad=False, *, wrapcol=0): """Encode bytes-like object b in z85 format and return a bytes object. @@ -373,12 +386,15 @@ def z85encode(s, pad=False, *, wrapcol=0): return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, alphabet=binascii.Z85_ALPHABET) -def z85decode(s, *, ignorechars=b''): +def z85decode(s, *, ignorechars=b'', canonical=False): """Decode the z85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, ignorechars=ignorechars) + return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, + ignorechars=ignorechars, canonical=canonical) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 39a458bb029b40..1a4dd56a553f4d 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -383,12 +383,12 @@ def _common_test_ignorechars(self, func): def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. - tests = ((b'%3Q==', b'\xdd', b'%$'), - (b'$3Q==', b'\xdd', b'%$'), + tests = ((b'%3d==', b'\xdd', b'%$'), + (b'$3d==', b'\xdd', b'%$'), (b'[==', b'', b'[='), - (b'YW]0=', b'am', b']'), - (b'3{Q==', b'\xdd', b'{}'), - (b'3Q}==', b'\xdd', b'{}'), + (b'YW]3=', b'am', b']'), + (b'3{d==', b'\xdd', b'{}'), + (b'3d}==', b'\xdd', b'{}'), (b'@@', b'', b'@!'), (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'), diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 3880944bf35e47..a345190a71777d 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -186,8 +186,8 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assert_regex = fr"(?i)Invalid.+number of data characters \({length}\)" _assertRegexTemplate(assert_regex, data, *args, **kwargs) - assertExcessPadding(b'aQ===', b'i') - assertExcessPadding(b'aQ====', b'i') + assertExcessPadding(b'ab===', b'i') + assertExcessPadding(b'ab====', b'i') assertExcessPadding(b'abc==', b'i\xb7') assertExcessPadding(b'abc===', b'i\xb7') assertExcessPadding(b'abc====', b'i\xb7') @@ -205,7 +205,7 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertLeadingPadding(b'=====abcd', b'i\xb7\x1d') assertLeadingPadding(b' =abcd', b'i\xb7\x1d', ignorechars=b' ') - assertInvalidLength(b'a=Q==', b'i') + assertInvalidLength(b'a=b==', b'i') assertInvalidLength(b'a=bc=', b'i\xb7') assertInvalidLength(b'a=bc==', b'i\xb7') assertInvalidLength(b'a=bcd', b'i\xb7\x1d') @@ -292,17 +292,17 @@ def assertNonBase64Data(data, expected, ignorechars): self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''), expected) - assertNonBase64Data(b'\naQ==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'aQ:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\x00Q==', b'i', ignorechars=b'\x00') - assertNonBase64Data(b'aQ:==', b'i', ignorechars=b':') - assertNonBase64Data(b'aQ=:=', b'i', ignorechars=b':') - assertNonBase64Data(b'aQ==:', b'i', ignorechars=b':') + assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') + assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00') + assertNonBase64Data(b'ab:==', b'i', ignorechars=b':') + assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') + assertNonBase64Data(b'ab==:', b'i', ignorechars=b':') assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':') - assertNonBase64Data(b'aQ==\n', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=bytearray(b'\n')) - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=memoryview(b'\n')) + assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) + assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) self.assertEqual(binascii.a2b_base64(b'+A-/B_', ignorechars=b'+/-_'), b'\xf8\x0f\xc1') @@ -383,33 +383,37 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) - def test_base64_nonzero_padding_bits(self): + def test_base64_canonical(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 # Decoders MAY reject encoded data if the pad bits are not zero. + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB==')), b'\x00') + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB=='), + strict_mode=True), b'\x00') + # 2 data chars + "==": last char has 4 padding bits - # 'A' = 0, 'B' = 1 ->000000 000001 ->byte 0x00, leftover 0001 (non-zero) + # 'A' = 0, 'B' = 1 -> leftover 0001 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AB=='), strict_mode=True) - # 'A' = 0, 'P' = 15 ->000000 001111 ->byte 0x00, leftover 1111 (non-zero) + binascii.a2b_base64(self.type2test(b'AB=='), canonical=True) + # 'A' = 0, 'P' = 15 -> leftover 1111 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AP=='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AP=='), canonical=True) # 3 data chars + "=": last char has 2 padding bits - # 'A' = 0, 'A' = 0, 'B' = 1 ->000000 000000 000001 ->bytes 0x00 0x00, - # leftover 01 (non-zero) + # 'A' = 0, 'A' = 0, 'B' = 1 -> leftover 01 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AAB='), strict_mode=True) - # 'A' = 0, 'A' = 0, 'D' = 3 ->leftover 11 (non-zero) + binascii.a2b_base64(self.type2test(b'AAB='), canonical=True) + # 'A' = 0, 'A' = 0, 'D' = 3 -> leftover 11 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AAD='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AAD='), canonical=True) # Verify that zero padding bits are accepted - binascii.a2b_base64(self.type2test(b'AA=='), strict_mode=True) - binascii.a2b_base64(self.type2test(b'AAA='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AA=='), canonical=True) + binascii.a2b_base64(self.type2test(b'AAA='), canonical=True) - # Full quads with no padding have no leftover bits --always valid - binascii.a2b_base64(self.type2test(b'AAAA'), strict_mode=True) + # Full quads with no padding have no leftover bits -- always valid + binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' @@ -795,6 +799,82 @@ def test_base85_alphabet(self): with self.assertRaises(TypeError): binascii.a2b_base64(data, alphabet=bytearray(alphabet)) + def test_base85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_base85(b'VF'), b'a') + self.assertEqual(binascii.a2b_base85(b'V'), b'') + + # 1-char partial groups are never produced by a conforming encoder + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'V', canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'0', canonical=True) + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_base85(data) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Non-canonical 2-char group (1 output byte) + canonical_enc = binascii.b2a_base85(b'a') + self.assertEqual(canonical_enc, b'VE') + # VF decodes to b'a' but is not canonical + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'VF', canonical=True) + + # Non-canonical 3-char group (2 output bytes) + canonical_enc = binascii.b2a_base85(b'ab') + decoded_canonical = binascii.a2b_base85(canonical_enc, canonical=True) + self.assertEqual(decoded_canonical, b'ab') + # Increment last digit to make non-canonical + non_canonical = canonical_enc[:-1] + bytes([canonical_enc[-1] + 1]) + self.assertEqual(binascii.a2b_base85(non_canonical), b'ab') + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Full 5-char groups are always canonical (no padding bits) + self.assertEqual( + binascii.a2b_base85(b'VPa!s', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + + def test_ascii85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') + self.assertEqual(binascii.a2b_ascii85(b'@'), b'') + + # 1-char partial groups are never produced by a conforming encoder + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@', canonical=True) + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_ascii85(data) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Non-canonical 2-char group + canonical_enc = binascii.b2a_ascii85(b'a') + self.assertEqual(canonical_enc, b'@/') + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@0', canonical=True) + + # Full 5-char groups are always canonical + self.assertEqual( + binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') + + # Adobe-wrapped with canonical + self.assertEqual( + binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), + b'abcd') + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -863,19 +943,19 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertExcessData(b"ABCDEFG=H") assertExcessData(b"432Z====55555555") - assertExcessData(b"BE======EA", b"\t\x08") + assertExcessData(b"BE======EF", b"\t\x08") assertExcessData(b"BEEF====C", b"\t\x08Q") - assertExcessData(b"BEEFC===AI", b"\t\x08Q\x01") + assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01") assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D") assertExcessPadding(b"BE=======", b"\t") assertExcessPadding(b"BE========", b"\t") - assertExcessPadding(b"BEEA=====", b"\t\x08") - assertExcessPadding(b"BEEA======", b"\t\x08") + assertExcessPadding(b"BEEF=====", b"\t\x08") + assertExcessPadding(b"BEEF======", b"\t\x08") assertExcessPadding(b"BEEFC====", b"\t\x08Q") assertExcessPadding(b"BEEFC=====", b"\t\x08Q") - assertExcessPadding(b"BEEFCAI==", b"\t\x08Q\x01") - assertExcessPadding(b"BEEFCAI===", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01") assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D") @@ -916,16 +996,16 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertIncorrectPadding(b"BE===", b"\t") assertIncorrectPadding(b"BE====", b"\t") assertIncorrectPadding(b"BE=====", b"\t") - assertIncorrectPadding(b"BEEA=", b"\t\x08") - assertIncorrectPadding(b"BEEA==", b"\t\x08") - assertIncorrectPadding(b"BEEA===", b"\t\x08") + assertIncorrectPadding(b"BEEF=", b"\t\x08") + assertIncorrectPadding(b"BEEF==", b"\t\x08") + assertIncorrectPadding(b"BEEF===", b"\t\x08") assertIncorrectPadding(b"BEEFC=", b"\t\x08Q") assertIncorrectPadding(b"BEEFC==", b"\t\x08Q") - assertDiscontinuousPadding(b"BE=EA===", b"\t\x08") - assertDiscontinuousPadding(b"BE==EA==", b"\t\x08") + assertDiscontinuousPadding(b"BE=EF===", b"\t\x08") + assertDiscontinuousPadding(b"BE==EF==", b"\t\x08") assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q") - assertDiscontinuousPadding(b"BEEFC=AI", b"\t\x08Q\x01") + assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01") assertInvalidLength(b"A") assertInvalidLength(b"ABC") @@ -948,62 +1028,59 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertInvalidLength(b"B=E=====", b"\t") assertInvalidLength(b"B==E====", b"\t") - assertInvalidLength(b"BEE=A===", b"\t\x08") - assertInvalidLength(b"BEE==A==", b"\t\x08") - assertInvalidLength(b"BEEFCA=I", b"\t\x08Q\x01") - assertInvalidLength(b"BEEFCA=====I", b"\t\x08Q\x01") + assertInvalidLength(b"BEE=F===", b"\t\x08") + assertInvalidLength(b"BEE==F==", b"\t\x08") + assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") + assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") - def test_base32_nonzero_padding_bits(self): + assertInvalidLength(b" A", ignorechars=b' ') + assertInvalidLength(b" ABC", ignorechars=b' ') + assertInvalidLength(b" ABCDEF", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHI", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHIJK", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHIJKLMN", ignorechars=b' ') + assertInvalidLength(b" A=======", ignorechars=b' ') + assertInvalidLength(b" ABC=====", ignorechars=b' ') + assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + + def test_base32_canonical(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 # Decoders MAY reject encoded data if the pad bits are not zero. + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base32(self.type2test(b'AB======')), + b'\x00') + # 2 data chars + "======": last char has 2 padding bits - # 'AB' ->00000 00001 ->byte 0x00, leftover 01 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AB======')) - # 'AD' ->00000 00011 ->byte 0x00, leftover 11 (non-zero) + binascii.a2b_base32(self.type2test(b'AB======'), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AD======')) + binascii.a2b_base32(self.type2test(b'AD======'), canonical=True) # 4 data chars + "====": last char has 4 padding bits - # 'AAAB' ->00000 00000 00000 00001 ->bytes 0x00 0x00, leftover 0001 with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAB====')) - # 'AAAP' ->leftover 1111 + binascii.a2b_base32(self.type2test(b'AAAB===='), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAP====')) + binascii.a2b_base32(self.type2test(b'AAAP===='), canonical=True) # 5 data chars + "===": last char has 1 padding bit - # 'AAAAB' ->4*00000 + 00001 ->bytes 0x00*3, leftover 1 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAB===')) + binascii.a2b_base32(self.type2test(b'AAAAB==='), canonical=True) # 7 data chars + "=": last char has 3 padding bits - # 'AAAAAAB' ->6*00000 + 00001 ->bytes 0x00*4, leftover 001 with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAAAB=')) - # 'AAAAAAH' ->leftover 111 + binascii.a2b_base32(self.type2test(b'AAAAAAB='), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAAAH=')) + binascii.a2b_base32(self.type2test(b'AAAAAAH='), canonical=True) # Verify that zero padding bits are accepted - binascii.a2b_base32(self.type2test(b'AA======')) - binascii.a2b_base32(self.type2test(b'AAAA====')) - binascii.a2b_base32(self.type2test(b'AAAAA===')) - binascii.a2b_base32(self.type2test(b'AAAAAAA=')) - - # Full octet with no padding --always valid - binascii.a2b_base32(self.type2test(b'AAAAAAAA')) + binascii.a2b_base32(self.type2test(b'AA======'), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAA===='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAA==='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAAAA='), canonical=True) - assertInvalidLength(b" A", ignorechars=b' ') - assertInvalidLength(b" ABC", ignorechars=b' ') - assertInvalidLength(b" ABCDEF", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHI", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHIJK", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHIJKLMN", ignorechars=b' ') - assertInvalidLength(b" A=======", ignorechars=b' ') - assertInvalidLength(b" ABC=====", ignorechars=b' ') - assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + # Full octet with no padding -- always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 diff --git a/Modules/binascii.c b/Modules/binascii.c index dcfb0e5113f1bf..5e650518df4896 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -729,6 +729,8 @@ binascii.a2b_base64 ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when strict_mode is true. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base64 data. [clinic start generated code]*/ @@ -736,8 +738,8 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars) -/*[clinic end generated code: output=525d840a299ff132 input=74a53dd3b23474b3]*/ + Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=77c46dcbf4239527 input=c99096d071deeec8]*/ { assert(data->len >= 0); @@ -911,7 +913,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 * Decoders MAY reject non-zero padding bits. */ - if (strict_mode && leftchar != 0) { + if (canonical && leftchar != 0) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Non-zero padding bits"); @@ -1047,14 +1049,16 @@ binascii.a2b_ascii85 Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings per RFC 4648 section 3.5. Decode Ascii85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars) -/*[clinic end generated code: output=599aa3e41095a651 input=f39abd11eab4bac0]*/ + int adobe, Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=09b35f1eac531357 input=75a0dcab97528ade]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1174,6 +1178,43 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-canonical encodings. */ + if (canonical && chunk_len < 4) { + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical Ascii85 group size"); + } + goto error; + } + uint32_t canon = 0; + for (Py_ssize_t i = chunk_len; i > 0; i--) { + canon = (canon << 8) | bin_data[-i]; + } + canon <<= (4 - chunk_len) * 8; + unsigned char digits[5]; + uint32_t tmp = canon; + for (int i = 4; i >= 0; i--) { + digits[i] = tmp % 85; + tmp /= 85; + } + uint32_t expected = 0; + for (int i = 0; i < 5; i++) { + expected = expected * 85 + + (i <= chunk_len ? digits[i] : 84); + } + if (expected != leftchar) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1325,14 +1366,17 @@ binascii.a2b_base85 alphabet: PyBytesObject(c_default="NULL") = BASE85_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings per RFC 4648 section 3.5. Decode a line of Base85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=6a8d6eae798818d7 input=04d72a319712bdf3]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=fe3bb2d3a66b9842]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1418,6 +1462,49 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-canonical encodings. */ + if (canonical && chunk_len < 4) { + if (chunk_len == 0) { + /* 1-char partial group is never produced by a conforming + * encoder. */ + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical Base85 group size"); + } + goto error; + } + /* Re-encode the output bytes to verify canonical form. + * Build the canonical uint32 from output bytes (zero-padded). */ + uint32_t canon = 0; + for (Py_ssize_t i = chunk_len; i > 0; i--) { + canon = (canon << 8) | bin_data[-i]; + } + canon <<= (4 - chunk_len) * 8; + /* Extract first (chunk_len + 1) base85 digits. */ + unsigned char digits[5]; + uint32_t tmp = canon; + for (int i = 4; i >= 0; i--) { + digits[i] = tmp % 85; + tmp /= 85; + } + /* Reconstruct expected value: canonical digits + 84-padding. */ + uint32_t expected = 0; + for (int i = 0; i < 5; i++) { + expected = expected * 85 + + (i <= chunk_len ? digits[i] : 84); + } + if (expected != leftchar) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1545,14 +1632,17 @@ binascii.a2b_base32 alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base32 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=7dbbaa816d956b1c input=07a3721acdf9b688]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=bc70f2bb6001fb55 input=5bfe6d1ea2f30e3b]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1735,7 +1825,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 * Decoders MAY reject non-zero padding bits. */ - if (leftchar != 0) { + if (canonical && leftchar != 0) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Non-zero padding bits"); diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 0a2d33c428d10a..793af5b1a7e743 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -119,7 +119,7 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(binascii_a2b_base64__doc__, "a2b_base64($module, data, /, *, strict_mode=,\n" " padded=True, alphabet=BASE64_ALPHABET,\n" -" ignorechars=)\n" +" ignorechars=, canonical=False)\n" "--\n" "\n" "Decode a line of base64 data.\n" @@ -132,7 +132,9 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, " When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input when\n" -" strict_mode is true."); +" strict_mode is true.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE64_METHODDEF \ {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__}, @@ -140,7 +142,7 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars); + Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -148,7 +150,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -157,7 +159,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -166,20 +168,21 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[5]; + PyObject *argsbuf[6]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int strict_mode = -1; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {NULL, NULL}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -220,11 +223,20 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[4]) { + if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[5]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -352,7 +364,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_ascii85__doc__, "a2b_ascii85($module, data, /, *, foldspaces=False, adobe=False,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode Ascii85 data.\n" @@ -362,14 +374,16 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " adobe\n" " Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars); + int adobe, Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -377,7 +391,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -386,7 +400,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -395,19 +409,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_ascii85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int foldspaces = 0; int adobe = 0; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -438,11 +453,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars); + return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -573,20 +597,23 @@ binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyDoc_STRVAR(binascii_a2b_base85__doc__, "a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of Base85 data.\n" "\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -594,7 +621,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -603,7 +630,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -612,18 +639,19 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -646,11 +674,20 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[3]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars); + return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -768,7 +805,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_base32__doc__, "a2b_base32($module, data, /, *, padded=True, alphabet=BASE32_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of base32 data.\n" @@ -776,14 +813,17 @@ PyDoc_STRVAR(binascii_a2b_base32__doc__, " padded\n" " When set to false, padding in input is not required.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE32_METHODDEF \ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -791,7 +831,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -800,7 +840,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -809,19 +849,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -853,11 +894,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -1634,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=2acab1ceb0058b1a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d1e630dd194dfddd input=a9049054013a1b77]*/ From 4b7c6ae0cdccb9c988517db7608ec507af12e9cd Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:43:01 +0000 Subject: [PATCH 05/12] Add 'canonical' to global strings tables The _Py_ID(canonical) identifier used by the clinic-generated argument parsing code needs to be registered in the global strings. Co-Authored-By: Claude Opus 4.6 (1M context) --- Include/internal/pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_unicodeobject_generated.h | 4 ++++ 4 files changed, 7 insertions(+) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index beae65213a27b6..4fd42185d8a4a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1636,6 +1636,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cancel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(canonical)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capitals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(category)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index bb1c6dbaf03906..f2d43c22069b92 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -359,6 +359,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(callable) STRUCT_FOR_ID(callback) STRUCT_FOR_ID(cancel) + STRUCT_FOR_ID(canonical) STRUCT_FOR_ID(capath) STRUCT_FOR_ID(capitals) STRUCT_FOR_ID(category) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 64b029797ab9b3..6ee64a461d8568 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1634,6 +1634,7 @@ extern "C" { INIT_ID(callable), \ INIT_ID(callback), \ INIT_ID(cancel), \ + INIT_ID(canonical), \ INIT_ID(capath), \ INIT_ID(capitals), \ INIT_ID(category), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 461ee36dcebb6d..bcb117e1091674 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1216,6 +1216,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(canonical); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(capath); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); From 308433af1cb7d0940f41b7998a02f9b83aef798b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:51:48 +0000 Subject: [PATCH 06/12] Remove incorrect RFC 4648 references from base85/ascii85 RFC 4648 only covers base16, base32, and base64. The canonical encoding concept applies to base85 but is not defined by that RFC. Co-Authored-By: Claude Opus 4.6 (1M context) --- Modules/binascii.c | 14 ++++++-------- Modules/clinic/binascii.c.h | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 5e650518df4896..43167a43ef7a79 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1050,7 +1050,7 @@ binascii.a2b_ascii85 ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. canonical: bool = False - When set to true, reject non-canonical encodings per RFC 4648 section 3.5. + When set to true, reject non-canonical encodings. Decode Ascii85 data. [clinic start generated code]*/ @@ -1058,7 +1058,7 @@ Decode Ascii85 data. static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, int adobe, Py_buffer *ignorechars, int canonical) -/*[clinic end generated code: output=09b35f1eac531357 input=75a0dcab97528ade]*/ +/*[clinic end generated code: output=09b35f1eac531357 input=dd050604ed30199e]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1178,8 +1178,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 - * Decoders MAY reject non-canonical encodings. */ + /* Reject non-canonical encodings in the final group. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { state = get_binascii_state(module); @@ -1367,7 +1366,7 @@ binascii.a2b_base85 ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. canonical: bool = False - When set to true, reject non-canonical encodings per RFC 4648 section 3.5. + When set to true, reject non-canonical encodings. Decode a line of Base85 data. [clinic start generated code]*/ @@ -1376,7 +1375,7 @@ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, PyBytesObject *alphabet, Py_buffer *ignorechars, int canonical) -/*[clinic end generated code: output=90dfef0c6b51e5f3 input=fe3bb2d3a66b9842]*/ +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=2819dc8aeffee5a2]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1462,8 +1461,7 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 - * Decoders MAY reject non-canonical encodings. */ + /* Reject non-canonical encodings in the final group. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { /* 1-char partial group is never produced by a conforming diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 793af5b1a7e743..ed695758ef998c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -376,7 +376,7 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " ignorechars\n" " A byte string containing characters to ignore from the input.\n" " canonical\n" -" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, @@ -605,7 +605,7 @@ PyDoc_STRVAR(binascii_a2b_base85__doc__, " ignorechars\n" " A byte string containing characters to ignore from the input.\n" " canonical\n" -" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, @@ -1684,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=d1e630dd194dfddd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b41544f39b0ef681 input=a9049054013a1b77]*/ From 9b78bddc9357aad073a033e53238cd4e7b563299 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:19:54 +0000 Subject: [PATCH 07/12] Simplify base85 canonical check using integer division Replace the re-encode-and-compare loops with a quotient comparison: two divisions by 85**n_pad tell us whether the decoded uint32 and the zero-padded output bytes share the same leading base-85 digits. Co-Authored-By: Claude Opus 4.6 (1M context) --- Modules/binascii.c | 73 ++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 43167a43ef7a79..7e2a0b7c173e6a 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -244,6 +244,9 @@ static const _Py_ALIGNED_DEF(64, unsigned char) table_b2a_base85_a85[] = #define BASE85_A85_Z 0x00000000 #define BASE85_A85_Y 0x20202020 +/* 85**0 through 85**4, used for canonical encoding checks. */ +static const uint32_t pow85[] = {1, 85, 7225, 614125, 52200625}; + static const _Py_ALIGNED_DEF(64, unsigned char) table_a2b_base32[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, @@ -1178,7 +1181,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. */ + /* Reject non-canonical encodings in the final group. + * + * A partial group of N chars (2-4) encodes N-1 bytes. The + * decoder pads missing chars with digit 84 (the maximum). + * The encoder produces the unique N chars for those bytes by + * zero-padding the bytes to a uint32 and taking the leading + * N base-85 digits. Two encodings are equivalent iff they + * yield the same leading digits, i.e. the same quotient when + * the decoded uint32 is divided by 85**(5-N). + * + * So we zero the bottom (4-chunk_len) bytes of leftchar to + * get the canonical uint32 ("canonical_top") and compare + * quotients. A 1-char group (chunk_len==0) is always + * non-canonical since no conforming encoder produces it. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { state = get_binascii_state(module); @@ -1188,23 +1204,12 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } goto error; } - uint32_t canon = 0; - for (Py_ssize_t i = chunk_len; i > 0; i--) { - canon = (canon << 8) | bin_data[-i]; - } - canon <<= (4 - chunk_len) * 8; - unsigned char digits[5]; - uint32_t tmp = canon; - for (int i = 4; i >= 0; i--) { - digits[i] = tmp % 85; - tmp /= 85; - } - uint32_t expected = 0; - for (int i = 0; i < 5; i++) { - expected = expected * 85 - + (i <= chunk_len ? digits[i] : 84); - } - if (expected != leftchar) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, @@ -1461,11 +1466,10 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. */ + /* Reject non-canonical encodings in the final group. + * See the comment in a2b_ascii85 for the full explanation. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { - /* 1-char partial group is never produced by a conforming - * encoder. */ state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, @@ -1473,27 +1477,12 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } goto error; } - /* Re-encode the output bytes to verify canonical form. - * Build the canonical uint32 from output bytes (zero-padded). */ - uint32_t canon = 0; - for (Py_ssize_t i = chunk_len; i > 0; i--) { - canon = (canon << 8) | bin_data[-i]; - } - canon <<= (4 - chunk_len) * 8; - /* Extract first (chunk_len + 1) base85 digits. */ - unsigned char digits[5]; - uint32_t tmp = canon; - for (int i = 4; i >= 0; i--) { - digits[i] = tmp % 85; - tmp /= 85; - } - /* Reconstruct expected value: canonical digits + 84-padding. */ - uint32_t expected = 0; - for (int i = 0; i < 5; i++) { - expected = expected * 85 - + (i <= chunk_len ? digits[i] : 84); - } - if (expected != leftchar) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, From 101edf6ce964361daba051b26d4ee027080defc5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:24:15 +0000 Subject: [PATCH 08/12] Improve base85/ascii85 canonical test coverage - Test non-canonical rejection for all partial group sizes (2/3/4 chars) - Test digit-0 1-char group for ascii85 (exercises chunk_len==0 guard) - Test boundary byte values (\x00, \xff) at each group size Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 56 ++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index a345190a71777d..a3426ae7392f5d 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -807,6 +807,8 @@ def test_base85_canonical(self): # 1-char partial groups are never produced by a conforming encoder with self.assertRaises(binascii.Error): binascii.a2b_base85(b'V', canonical=True) + # Digit 0 in a 1-char group exercises the explicit chunk_len==0 + # guard (without it the division check would see 0/P == 0/P). with self.assertRaises(binascii.Error): binascii.a2b_base85(b'0', canonical=True) @@ -817,22 +819,27 @@ def test_base85_canonical(self): decoded = binascii.a2b_base85(encoded, canonical=True) self.assertEqual(decoded, data) - # Non-canonical 2-char group (1 output byte) - canonical_enc = binascii.b2a_base85(b'a') - self.assertEqual(canonical_enc, b'VE') - # VF decodes to b'a' but is not canonical - with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'VF', canonical=True) - - # Non-canonical 3-char group (2 output bytes) - canonical_enc = binascii.b2a_base85(b'ab') - decoded_canonical = binascii.a2b_base85(canonical_enc, canonical=True) - self.assertEqual(decoded_canonical, b'ab') - # Increment last digit to make non-canonical - non_canonical = canonical_enc[:-1] + bytes([canonical_enc[-1] + 1]) - self.assertEqual(binascii.a2b_base85(non_canonical), b'ab') - with self.assertRaises(binascii.Error): - binascii.a2b_base85(non_canonical, canonical=True) + # Test non-canonical rejection for each partial group size + # (2-char/1-byte, 3-char/2-byte, 4-char/3-byte). + # Incrementing the last digit by 1 produces a non-canonical + # encoding. For 4-char groups (n_pad=1) a +1 can change the + # output byte, so we use b'ab\x00' whose canonical form allows + # a +1 that still decodes to the same 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_base85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + # Same decoded output without canonical check + self.assertEqual(binascii.a2b_base85(non_canonical), data) + # Rejected with canonical=True + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Boundary bytes: \x00 and \xff for each partial group size + for data in [b'\x00', b'\x00\x00', b'\x00\x00\x00', + b'\xff', b'\xff\xff', b'\xff\xff\xff']: + canonical_enc = binascii.b2a_base85(data) + binascii.a2b_base85(canonical_enc, canonical=True) # Full 5-char groups are always canonical (no padding bits) self.assertEqual( @@ -849,6 +856,9 @@ def test_ascii85_canonical(self): # 1-char partial groups are never produced by a conforming encoder with self.assertRaises(binascii.Error): binascii.a2b_ascii85(b'@', canonical=True) + # Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!', canonical=True) # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', @@ -857,11 +867,15 @@ def test_ascii85_canonical(self): decoded = binascii.a2b_ascii85(encoded, canonical=True) self.assertEqual(decoded, data) - # Non-canonical 2-char group - canonical_enc = binascii.b2a_ascii85(b'a') - self.assertEqual(canonical_enc, b'@/') - with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'@0', canonical=True) + # Test non-canonical rejection for each partial group size. + # See test_base85_canonical for why b'ab\x00' is used for 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_ascii85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + self.assertEqual(binascii.a2b_ascii85(non_canonical), data) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(non_canonical, canonical=True) # Full 5-char groups are always canonical self.assertEqual( From b618655765b6b33d753f026b2f2fe5d904bf0677 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:30:18 +0000 Subject: [PATCH 09/12] Add hypothesis tests for canonical encoding - Round-trip tests: encoder always produces canonical output (base64, base32, base85, ascii85) - Uniqueness tests: for base85/ascii85 partial groups, sweep all 85 last-digit values and verify exactly one decodes to the original payload with canonical=True Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 82 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index a3426ae7392f5d..b598cc30418fbe 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -415,6 +415,17 @@ def test_base64_canonical(self): # Full quads with no padding have no leftover bits -- always valid binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base64_canonical_roundtrip(self, payload): + # The encoder must always produce canonical output. + encoded = binascii.b2a_base64(payload, newline=False) + decoded = binascii.a2b_base64(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -848,6 +859,38 @@ def test_base85_canonical(self): # Empty input is valid self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base85(payload) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_base85_canonical_unique(self, payload): + # For a partial group, sweeping all 85 last-digit values should + # yield exactly one encoding that both decodes to the original + # payload AND passes canonical=True. + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_base85(payload) + table = binascii.BASE85_ALPHABET + accepted = [] + for digit in table: + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_base85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_ascii85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') @@ -889,6 +932,35 @@ def test_ascii85_canonical(self): binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), b'abcd') + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_ascii85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_ascii85(payload) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_ascii85_canonical_unique(self, payload): + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_ascii85(payload) + # Ascii85 alphabet: '!' (33) through 'u' (117) + accepted = [] + for digit in range(33, 118): + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_ascii85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -1096,6 +1168,16 @@ def test_base32_canonical(self): # Full octet with no padding -- always valid binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base32_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base32(payload) + decoded = binascii.a2b_base32(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 t = self.type2test From b5391bd378c6e7aab6c8897a3ddb487eeb24155e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:29:34 +0000 Subject: [PATCH 10/12] Reject 1-char base85/ascii85 groups unconditionally Per the PLRM spec (section 3.13.3), a final partial 5-tuple containing only one character is an encoding violation. Move this check outside the `canonical=` guard so it is always enforced. Also change chunk_len and i from Py_ssize_t to int per review feedback. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 54 +++++++++++++++++++-------------------- Modules/binascii.c | 51 ++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index b598cc30418fbe..181b0b052e73b9 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -486,20 +486,22 @@ def test_ascii85_valid(self): res += b self.assertEqual(res, rawdata) - # Test decoding inputs with length 1 mod 5 - params = [ - (b"a", False, False, b"", b""), - (b"xbw", False, False, b"wx", b""), - (b"<~c~>", False, True, b"", b""), - (b"{d ~>", False, True, b" {", b""), - (b"ye", True, False, b"", b" "), - (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), - (b"<~FCfN8yg~>", True, True, b"", b"test "), - (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + # Inputs with length 1 mod 5 end with a 1-char group, which is + # an encoding violation per the PLRM spec. + error_params = [ + (b"a", False, False, b""), + (b"xbw", False, False, b"wx"), + (b"<~c~>", False, True, b""), + (b"{d ~>", False, True, b" {"), + (b"ye", True, False, b""), + (b"z\x01y\x00f", True, False, b"\x00\x01"), + (b"<~FCfN8yg~>", True, True, b""), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"), ] - for a, foldspaces, adobe, ignorechars, b in params: + for a, foldspaces, adobe, ignorechars in error_params: kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars} - self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(self.type2test(a), **kwargs) def test_ascii85_invalid(self): # Test Ascii85 with invalid characters interleaved @@ -713,16 +715,18 @@ def test_base85_valid(self): self.assertEqual(res, self.rawdata) # Test decoding inputs with different length - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') + # 1-char groups are rejected (encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'a')) self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q') self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')), b'qa\x9e') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')), b'qa\x9e\xb6') - self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')), - b'qa\x9e\xb6') + # 6-char input = full 5-char group + trailing 1-char group (rejected) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'abcdef')) self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')), b'qa\x9e\xb6\x81') @@ -813,15 +817,13 @@ def test_base85_alphabet(self): def test_base85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_base85(b'VF'), b'a') - self.assertEqual(binascii.a2b_base85(b'V'), b'') - # 1-char partial groups are never produced by a conforming encoder + # 1-char partial groups are always rejected (encoding violation: + # no conforming encoder produces them) with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'V', canonical=True) - # Digit 0 in a 1-char group exercises the explicit chunk_len==0 - # guard (without it the division check would see 0/P == 0/P). + binascii.a2b_base85(b'V') with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'0', canonical=True) + binascii.a2b_base85(b'0') # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', @@ -894,14 +896,12 @@ def test_base85_canonical_unique(self, payload): def test_ascii85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') - self.assertEqual(binascii.a2b_ascii85(b'@'), b'') - # 1-char partial groups are never produced by a conforming encoder + # 1-char partial groups are always rejected (PLRM encoding violation) with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'@', canonical=True) - # Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard + binascii.a2b_ascii85(b'@') with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'!', canonical=True) + binascii.a2b_ascii85(b'!') # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', diff --git a/Modules/binascii.c b/Modules/binascii.c index 7e2a0b7c173e6a..a4c4ce6b910b36 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1176,8 +1176,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A final partial 5-tuple containing only one character is an + * encoding violation per the PLRM spec; reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Ascii85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } @@ -1193,17 +1205,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, * * So we zero the bottom (4-chunk_len) bytes of leftchar to * get the canonical uint32 ("canonical_top") and compare - * quotients. A 1-char group (chunk_len==0) is always - * non-canonical since no conforming encoder produces it. */ + * quotients. */ if (canonical && chunk_len < 4) { - if (chunk_len == 0) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_SetString(state->Error, - "Non-canonical Ascii85 group size"); - } - goto error; - } int n_pad = 4 - chunk_len; uint32_t canonical_top = (leftchar >> (n_pad * 8)) << (n_pad * 8); @@ -1461,22 +1464,26 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A 1-char final group is an encoding violation (no conforming + * encoder produces it); reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Base85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } /* Reject non-canonical encodings in the final group. * See the comment in a2b_ascii85 for the full explanation. */ if (canonical && chunk_len < 4) { - if (chunk_len == 0) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_SetString(state->Error, - "Non-canonical Base85 group size"); - } - goto error; - } int n_pad = 4 - chunk_len; uint32_t canonical_top = (leftchar >> (n_pad * 8)) << (n_pad * 8); From 2a1d91d3a26cca86af0a53436fc1e725ae506cf3 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:31:09 +0000 Subject: [PATCH 11/12] Enforce z-abbreviation for all-zero groups in ascii85 canonical mode When canonical=True, reject '!!!!!' (five zero digits) in favor of the 'z' abbreviation. The PLRM spec defines 'z' as the representation for all-zero groups, so '!!!!!' is a non-canonical encoding. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 16 ++++++++++++ Modules/binascii.c | 54 ++++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 181b0b052e73b9..6991e2ef6815e3 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -924,6 +924,21 @@ def test_ascii85_canonical(self): self.assertEqual( binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + # 'z' is the canonical form for all-zero groups per the PLRM. + # '!!!!!' decodes identically but is non-canonical. + self.assertEqual(binascii.a2b_ascii85(b'!!!!!'), b'\x00' * 4) + self.assertEqual(binascii.a2b_ascii85(b'z'), b'\x00' * 4) + self.assertEqual( + binascii.a2b_ascii85(b'z', canonical=True), b'\x00' * 4) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!!!!!', canonical=True) + # Multiple groups: z + !!!!! should fail + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'z!!!!!', canonical=True) + # Multiple z groups are fine + self.assertEqual( + binascii.a2b_ascii85(b'zz', canonical=True), b'\x00' * 8) + # Empty input is valid self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') @@ -935,6 +950,7 @@ def test_ascii85_canonical(self): @hypothesis.given(payload=hypothesis.strategies.binary()) @hypothesis.example(b'') @hypothesis.example(b'\x00') + @hypothesis.example(b'\x00\x00\x00\x00') # triggers z abbreviation @hypothesis.example(b'\xff\xff') @hypothesis.example(b'abc') def test_ascii85_canonical_roundtrip(self, payload): diff --git a/Modules/binascii.c b/Modules/binascii.c index a4c4ce6b910b36..da91280a355440 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1124,6 +1124,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, uint32_t leftchar = 0; int group_pos = 0; + int from_z = 0; /* true when current group came from 'z' shorthand */ for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { /* Shift (in radix-85) data or padding into our buffer. */ unsigned char this_digit; @@ -1159,6 +1160,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, goto error; } leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + from_z = (this_ch == 'z'); group_pos = 5; } else if (!ignorechar(this_ch, ignorechars, ignorecache)) { @@ -1193,35 +1195,45 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. - * - * A partial group of N chars (2-4) encodes N-1 bytes. The - * decoder pads missing chars with digit 84 (the maximum). - * The encoder produces the unique N chars for those bytes by - * zero-padding the bytes to a uint32 and taking the leading - * N base-85 digits. Two encodings are equivalent iff they - * yield the same leading digits, i.e. the same quotient when - * the decoded uint32 is divided by 85**(5-N). - * - * So we zero the bottom (4-chunk_len) bytes of leftchar to - * get the canonical uint32 ("canonical_top") and compare - * quotients. */ - if (canonical && chunk_len < 4) { - int n_pad = 4 - chunk_len; - uint32_t canonical_top = - (leftchar >> (n_pad * 8)) << (n_pad * 8); - if (canonical_top / pow85[n_pad] - != leftchar / pow85[n_pad]) - { + if (canonical) { + /* The PLRM spec requires all-zero groups to use the 'z' + * abbreviation. Reject '!!!!!' (five zero digits). */ + if (chunk_len == 4 && leftchar == 0 && !from_z) { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, - "Non-zero padding bits"); + "Non-canonical encoding, " + "use 'z' for all-zero groups"); } goto error; } + /* Reject non-canonical partial groups. + * + * A partial group of N chars (2-4) encodes N-1 bytes. + * The decoder pads missing chars with digit 84 (the max). + * The encoder produces the unique N chars for those bytes + * by zero-padding the bytes to a uint32 and taking the + * leading N base-85 digits. Two encodings are equivalent + * iff they yield the same quotient when divided by + * 85**(5-N). */ + if (chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } } + from_z = 0; group_pos = 0; leftchar = 0; } From dea498279046d7241f4527dab287f8e04d11d607 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:35:24 +0000 Subject: [PATCH 12/12] Update docs for base85/ascii85 canonical and 1-char group changes - Document that single-character final groups are always rejected - Add versionchanged:: next markers for the behavioral change - Update canonical= description for ascii85 to mention z-abbreviation - Update canonical= description for base85 Co-Authored-By: Claude Opus 4.6 (1M context) --- Doc/library/base64.rst | 6 ++++++ Doc/library/binascii.rst | 35 +++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 2b22683fe33510..a02fdc1e8a0080 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -345,6 +345,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *canonical* parameter. + Single-character final groups are now always rejected as encoding + violations. .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -381,6 +383,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -421,6 +425,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 5c7888012e27cf..154ff770f73710 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -119,11 +119,12 @@ The :mod:`!binascii` module defines the following functions: Convert Ascii85 data back to binary and return the binary data. Valid Ascii85 data contains characters from the Ascii85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is accepted as a short form of the group ``!!!!!``, which encodes four - consecutive null bytes. + consecutive null bytes. A single-character final group is always rejected + as an encoding violation. *foldspaces* is a flag that specifies whether the 'y' short sequence should be accepted as shorthand for 4 consecutive spaces (ASCII 0x20). @@ -136,15 +137,20 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. - If *canonical* is true, non-canonical encodings in the final group are - rejected with :exc:`binascii.Error`. This includes single-character - final groups (which no conforming encoder produces) and final groups whose - padding digits are not what the encoder would produce. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. This enforces that the ``z`` abbreviation is used + for all-zero groups (rather than ``!!!!!``), and that partial final groups + use the same padding digits the encoder would produce. Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_ascii85(data, /, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -175,9 +181,10 @@ The :mod:`!binascii` module defines the following functions: More than one line may be passed at a time. Valid Base85 data contains characters from the Base85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from - ``0`` to ``2 ** 32 - 1``, inclusive. + ``0`` to ``2 ** 32 - 1``, inclusive. A single-character final group is + always rejected as an encoding violation. Optional *alphabet* must be a :class:`bytes` object of length 85 which specifies an alternative alphabet. @@ -185,15 +192,19 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. - If *canonical* is true, non-canonical encodings in the final group are - rejected with :exc:`binascii.Error`. This includes single-character - final groups (which no conforming encoder produces) and final groups whose - padding digits are not what the encoder would produce. + If *canonical* is true, non-canonical encodings in partial final groups + are rejected with :exc:`binascii.Error`. This enforces that the padding + digits match what the encoder would produce. Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, wrapcol=0, pad=False)