diff --git a/Doc/library/array.rst b/Doc/library/array.rst index ca7c055285aa82..603081bcd19716 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -22,9 +22,7 @@ defined: +-----------+--------------------+-------------------+-----------------------+-------+ | ``'B'`` | unsigned char | int | 1 | | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'u'`` | wchar_t | Unicode character | 2 | \(1) | -+-----------+--------------------+-------------------+-----------------------+-------+ -| ``'w'`` | Py_UCS4 | Unicode character | 4 | \(2) | +| ``'w'`` | Py_UCS4 | Unicode character | 4 | \(1) | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'h'`` | signed short | int | 2 | | +-----------+--------------------+-------------------+-----------------------+-------+ @@ -42,35 +40,24 @@ defined: +-----------+--------------------+-------------------+-----------------------+-------+ | ``'Q'`` | unsigned long long | int | 8 | | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'e'`` | _Float16 | float | 2 | \(3) | +| ``'e'`` | _Float16 | float | 2 | \(2) | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'f'`` | float | float | 4 | | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'d'`` | double | float | 8 | | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'Zf'`` | float complex | complex | 8 | \(4) | +| ``'Zf'`` | float complex | complex | 8 | \(3) | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'Zd'`` | double complex | complex | 16 | \(4) | +| ``'Zd'`` | double complex | complex | 16 | \(3) | +-----------+--------------------+-------------------+-----------------------+-------+ Notes: (1) - It can be 16 bits or 32 bits depending on the platform. - - .. versionchanged:: 3.9 - ``array('u')`` now uses :c:type:`wchar_t` as C type instead of deprecated - ``Py_UNICODE``. This change doesn't affect its behavior because - ``Py_UNICODE`` is alias of :c:type:`wchar_t` since Python 3.3. - - .. deprecated-removed:: 3.3 3.16 - Please migrate to ``'w'`` typecode. - -(2) .. versionadded:: 3.13 -(3) +(2) The IEEE 754 binary16 "half precision" type was introduced in the 2008 revision of the `IEEE 754 standard `_. This type is not widely supported by C compilers. It's available @@ -79,7 +66,7 @@ Notes: .. versionadded:: 3.15 -(4) +(3) Complex types (``Zf`` and ``Zd``) are available unconditionally, regardless on support for complex types (the Annex G of the C11 standard) by the C compiler. @@ -220,7 +207,7 @@ The module defines the following type: .. method:: fromunicode(ustr, /) Extends this array with data from the given Unicode string. - The array must have type code ``'u'`` or ``'w'``; otherwise a :exc:`ValueError` is raised. + The array must have type code ``'w'``; otherwise a :exc:`ValueError` is raised. Use ``array.frombytes(unicodestring.encode(enc))`` to append Unicode data to an array of some other type. @@ -288,7 +275,7 @@ The module defines the following type: .. method:: tounicode() - Convert the array to a Unicode string. The array must have a type ``'u'`` or ``'w'``; + Convert the array to a Unicode string. The array must have a type ``'w'``; otherwise a :exc:`ValueError` is raised. Use ``array.tobytes().decode(enc)`` to obtain a Unicode string from an array of some other type. @@ -296,7 +283,7 @@ The module defines the following type: The string representation of array objects has the form ``array(typecode, initializer)``. The *initializer* is omitted if the array is empty, otherwise it is -a Unicode string if the *typecode* is ``'u'`` or ``'w'``, otherwise it is +a Unicode string if the *typecode* is ``'w'``, otherwise it is a list of numbers. The string representation is guaranteed to be able to be converted back to an array with the same type and value using :func:`eval`, so long as the diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index 98a8644884a8d7..59639481f8ad1a 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -114,6 +114,12 @@ annotationlib Use :meth:`annotationlib.ForwardRef.evaluate` or :func:`typing.evaluate_forward_ref` instead. +array +----- + +* The ``'u'`` format code (:c:type:`wchar_t`) which has been deprecated in + documentation since Python 3.3 and at runtime since Python 3.13. + sysconfig --------- diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index f7fa56a6e4bfa0..cff28e97df0339 100755 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -14,15 +14,10 @@ import operator import struct import sys -import warnings import array from array import _array_reconstructor as array_reconstructor -with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - sizeof_wchar = array.array('u').itemsize - class ArraySubclass(array.array): pass @@ -32,7 +27,7 @@ def __init__(self, typecode, newarg=None): array.array.__init__(self) typecodes = ( - 'u', 'w', 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', + 'w', 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd', 'q', 'Q', 'e', 'Zf', 'Zd') @@ -135,14 +130,6 @@ def test_typecodes(self): class ArrayReconstructorTest(unittest.TestCase): - def setUp(self): - self.enterContext(warnings.catch_warnings()) - warnings.filterwarnings( - "ignore", - message="The 'u' type code is deprecated and " - "will be removed in Python 3.16", - category=DeprecationWarning) - def test_error(self): self.assertRaises(TypeError, array_reconstructor, "", "b", 0, b"") @@ -242,12 +229,11 @@ def test_unicode(self): ) for testcase in testcases: mformat_code, encoding = testcase - for c in 'uw': - a = array.array(c, teststr) - b = array_reconstructor( - array.array, c, mformat_code, teststr.encode(encoding)) - self.assertEqual(a, b, - msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase)) + a = array.array('w', teststr) + b = array_reconstructor( + array.array, 'w', mformat_code, teststr.encode(encoding)) + self.assertEqual(a, b, + msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase)) class BaseTest: @@ -259,14 +245,6 @@ class BaseTest: # outside: An entry that is not in example # minitemsize: the minimum guaranteed itemsize - def setUp(self): - self.enterContext(warnings.catch_warnings()) - warnings.filterwarnings( - "ignore", - message="The 'u' type code is deprecated and " - "will be removed in Python 3.16", - category=DeprecationWarning) - def assertEntryEqual(self, entry1, entry2): self.assertEqual(entry1, entry2) @@ -299,7 +277,7 @@ def test_buffer_info(self): self.assertEqual(bi[1], len(a)) def test_byteswap(self): - if self.typecode in ('u', 'w'): + if self.typecode == 'w': example = '\U00100100' else: example = self.example @@ -1167,7 +1145,7 @@ def test_buffer(self): self.assertEqual(m.tobytes(), expected) self.assertRaises(BufferError, a.frombytes, a.tobytes()) self.assertEqual(m.tobytes(), expected) - if self.typecode in ('u', 'w'): + if self.typecode == 'w': self.assertRaises(BufferError, a.fromunicode, a.tounicode()) self.assertEqual(m.tobytes(), expected) self.assertRaises(BufferError, operator.imul, a, 2) @@ -1223,7 +1201,7 @@ def test_sizeof_without_buffer(self): support.check_sizeof(self, a, basesize) def test_initialize_with_unicode(self): - if self.typecode not in ('u', 'w'): + if self.typecode != 'w': with self.assertRaises(TypeError) as cm: a = array.array(self.typecode, 'foo') self.assertIn("cannot use a str", str(cm.exception)) @@ -1232,7 +1210,6 @@ def test_initialize_with_unicode(self): self.assertIn("cannot use a unicode array", str(cm.exception)) else: a = array.array(self.typecode, "foo") - a = array.array(self.typecode, array.array('u', 'foo')) a = array.array(self.typecode, array.array('w', 'foo')) @support.cpython_only @@ -1258,12 +1235,12 @@ def test_setitem(self): self.assertRaises(TypeError, a.__setitem__, 0, self.example[:2]) class UnicodeTest(StringTest, unittest.TestCase): - typecode = 'u' + typecode = 'w' example = '\x01\u263a\x00\ufeff' smallerexample = '\x01\u263a\x00\ufefe' biggerexample = '\x01\u263a\x01\ufeff' outside = str('\x33') - minitemsize = sizeof_wchar + minitemsize = 4 def test_unicode(self): self.assertRaises(TypeError, array.array, 'b', 'foo') @@ -1285,36 +1262,6 @@ def test_unicode(self): self.assertRaises(TypeError, a.fromunicode) - def test_issue17223(self): - if self.typecode == 'u' and sizeof_wchar == 2: - # PyUnicode_FromUnicode() cannot fail with 16-bit wchar_t - self.skipTest("specific to 32-bit wchar_t") - - # this used to crash - # U+FFFFFFFF is an invalid code point in Unicode 6.0 - invalid_str = b'\xff\xff\xff\xff' - - a = array.array(self.typecode, invalid_str) - self.assertRaises(ValueError, a.tounicode) - self.assertRaises(ValueError, str, a) - - def test_typecode_u_deprecation(self): - with self.assertWarns(DeprecationWarning): - array.array("u") - - def test_empty_string_mem_leak_gh140474(self): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - for _ in range(1000): - a = array.array('u', '') - self.assertEqual(len(a), 0) - self.assertEqual(a.typecode, 'u') - - -class UCS4Test(UnicodeTest): - typecode = 'w' - minitemsize = 4 - class NumberTest(BaseTest): diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index 7454c8a15391e9..3213a475127343 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -24,7 +24,6 @@ import sys, array, io, os from decimal import Decimal from fractions import Fraction -from test.support import warnings_helper try: from _testbuffer import * @@ -3261,15 +3260,6 @@ class BEPoint(ctypes.BigEndianStructure): self.assertNotEqual(point, a) self.assertRaises(NotImplementedError, a.tolist) - @warnings_helper.ignore_warnings(category=DeprecationWarning) # gh-80480 array('u') - def test_memoryview_compare_special_cases_deprecated_u_type_code(self): - - # Depends on issue #15625: the struct module does not understand 'u'. - a = array.array('u', 'xyz') - v = memoryview(a) - self.assertNotEqual(a, v) - self.assertNotEqual(v, a) - def test_memoryview_compare_ndim_zero(self): nd1 = ndarray(1729, shape=[], format='@L') diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 5e19307b815a1b..701f793b75c7fe 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,7 +1,7 @@ from test.support import (gc_collect, bigmemtest, _2G, cpython_only, captured_stdout, check_disallow_instantiation, linked_to_musl, - warnings_helper, SHORT_TIMEOUT, Stopwatch, requires_resource) + SHORT_TIMEOUT, Stopwatch, requires_resource) import locale import re import string @@ -1780,11 +1780,10 @@ def test_bug_6561(self): for x in not_decimal_digits: self.assertIsNone(re.match(r'^\d$', x)) - @warnings_helper.ignore_warnings(category=DeprecationWarning) # gh-80480 array('u') def test_empty_array(self): # SF buf 1647541 import array - for typecode in 'bBhuwHiIlLfd': + for typecode in 'bBhwHiIlLfd': a = array.array(typecode) self.assertIsNone(re.compile(b"bla").match(a)) self.assertEqual(re.compile(b"").match(a).groups(), ()) diff --git a/Misc/NEWS.d/next/Library/2026-05-08-07-51-54.gh-issue-80480.nnA4p1.rst b/Misc/NEWS.d/next/Library/2026-05-08-07-51-54.gh-issue-80480.nnA4p1.rst new file mode 100644 index 00000000000000..837131fa0db992 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-08-07-51-54.gh-issue-80480.nnA4p1.rst @@ -0,0 +1,2 @@ +Remove deprecated ``'u'`` type code (:c:type:`wchar_t`) for the :mod:`array` +module. diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 472c59ea8c9882..4e7a927112fab1 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -292,49 +292,6 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) return 0; } -static PyObject * -u_getitem(arrayobject *ap, Py_ssize_t i) -{ - return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]); -} - -static int -u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) -{ - if (!PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "array item must be a unicode character, not %T", - v); - return -1; - } - - Py_ssize_t len = PyUnicode_AsWideChar(v, NULL, 0); - if (len != 2) { - if (PyUnicode_GET_LENGTH(v) != 1) { - PyErr_Format(PyExc_TypeError, - "array item must be a unicode character, " - "not a string of length %zd", - PyUnicode_GET_LENGTH(v)); - } - else { - PyErr_Format(PyExc_TypeError, - "string %A cannot be converted to " - "a single wchar_t character", - v); - } - return -1; - } - - wchar_t w; - len = PyUnicode_AsWideChar(v, &w, 1); - assert(len == 1); - - if (i >= 0) { - ((wchar_t *)ap->ob_item)[i] = w; - } - return 0; -} - static PyObject * w_getitem(arrayobject *ap, Py_ssize_t i) { @@ -750,7 +707,6 @@ cd_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) DEFINE_COMPAREITEMS(b, signed char) DEFINE_COMPAREITEMS(BB, unsigned char) -DEFINE_COMPAREITEMS(u, wchar_t) DEFINE_COMPAREITEMS(w, Py_UCS4) DEFINE_COMPAREITEMS(h, short) DEFINE_COMPAREITEMS(HH, unsigned short) @@ -769,7 +725,6 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long) static const struct arraydescr descriptors[] = { {"b", 1, b_getitem, b_setitem, b_compareitems, 1, 1}, {"B", 1, BB_getitem, BB_setitem, BB_compareitems, 1, 0}, - {"u", sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, 0, 0}, {"w", sizeof(Py_UCS4), w_getitem, w_setitem, w_compareitems, 0, 0,}, {"h", sizeof(short), h_getitem, h_setitem, h_compareitems, 1, 1}, {"H", sizeof(short), HH_getitem, HH_setitem, HH_compareitems, 1, 0}, @@ -1985,47 +1940,30 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) /*[clinic end generated code: output=24359f5e001a7f2b input=158d47c302f27ca1]*/ { const char *typecode = self->ob_descr->typecode; - if (strcmp(typecode, "u") != 0 && strcmp(typecode, "w") != 0) { + if (strcmp(typecode, "w") != 0) { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " - "unicode type arrays ('u' or 'w')"); + "unicode type ('w') arrays"); return NULL; } - if (strcmp(typecode, "u") == 0) { - Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); - assert(ustr_length > 0); - if (ustr_length > 1) { - ustr_length--; /* trim trailing NUL character */ - Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + ustr_length) == -1) { - return NULL; - } + Py_ssize_t ustr_length = PyUnicode_GetLength(ustr); + Py_ssize_t old_size = Py_SIZE(self); + Py_ssize_t new_size = old_size + ustr_length; - // must not fail - PyUnicode_AsWideChar( - ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); - } + if (new_size < 0 || (size_t)new_size > PY_SSIZE_T_MAX / sizeof(Py_UCS4)) { + return PyErr_NoMemory(); } - else { // typecode == "w" - Py_ssize_t ustr_length = PyUnicode_GetLength(ustr); - Py_ssize_t old_size = Py_SIZE(self); - Py_ssize_t new_size = old_size + ustr_length; - - if (new_size < 0 || (size_t)new_size > PY_SSIZE_T_MAX / sizeof(Py_UCS4)) { - return PyErr_NoMemory(); - } - if (array_resize(self, new_size) == -1) { - return NULL; - } - - // must not fail - Py_UCS4 *u = PyUnicode_AsUCS4(ustr, ((Py_UCS4*)self->ob_item) + old_size, - ustr_length, 0); - assert(u != NULL); - (void)u; // Suppress unused_variable warning. + if (array_resize(self, new_size) == -1) { + return NULL; } + // must not fail + Py_UCS4 *u = PyUnicode_AsUCS4(ustr, ((Py_UCS4*)self->ob_item) + old_size, + ustr_length, 0); + assert(u != NULL); + (void)u; // Suppress unused_variable warning. + Py_RETURN_NONE; } @@ -2045,19 +1983,14 @@ array_array_tounicode_impl(arrayobject *self) /*[clinic end generated code: output=08e442378336e1ef input=6690997213d219db]*/ { const char *typecode = self->ob_descr->typecode; - if (strcmp(typecode, "u") != 0 && strcmp(typecode, "w") != 0) { + if (strcmp(typecode, "w") != 0) { PyErr_SetString(PyExc_ValueError, - "tounicode() may only be called on unicode type arrays ('u' or 'w')"); + "tounicode() may only be called on unicode type ('w') arrays"); return NULL; } - if (strcmp(typecode, "u") == 0) { - return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self)); - } - else { // typecode == "w" - int byteorder = 0; // native byteorder - return PyUnicode_DecodeUTF32((const char *) self->ob_item, Py_SIZE(self) * 4, - NULL, &byteorder); - } + int byteorder = 0; // native byteorder + return PyUnicode_DecodeUTF32((const char *) self->ob_item, Py_SIZE(self) * 4, + NULL, &byteorder); } /*[clinic input] @@ -2133,15 +2066,6 @@ typecode_to_mformat_code(const char *typecode) case 'B': return UNSIGNED_INT8; - case 'u': - if (sizeof(wchar_t) == 2) { - return UTF16_LE + is_big_endian; - } - if (sizeof(wchar_t) == 4) { - return UTF32_LE + is_big_endian; - } - return UNKNOWN_FORMAT; - case 'w': return UTF32_LE + is_big_endian; @@ -2696,7 +2620,7 @@ array_repr(PyObject *op) return PyUnicode_FromFormat("%s('%s')", _PyType_Name(Py_TYPE(a)), typecode); } - if (strcmp(typecode, "u") == 0 || strcmp(typecode, "w") == 0) { + if (strcmp(typecode, "w") == 0) { v = array_array_tounicode_impl(a); } else { v = array_array_tolist_impl(a); @@ -2966,9 +2890,6 @@ array_buffer_getbuf(PyObject *op, Py_buffer *view, int flags) view->internal = NULL; if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { view->format = (char *)self->ob_descr->typecode; - if (sizeof(wchar_t) >= 4 && strcmp(self->ob_descr->typecode, "u") == 0) { - view->format = "w"; - } } self->ob_exports++; @@ -3003,16 +2924,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } - if (strcmp(s, "u") == 0) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "The 'u' type code is deprecated and " - "will be removed in Python 3.16", - 1)) { - return NULL; - } - } - - bool is_unicode = (strcmp(s, "u") == 0 || strcmp(s, "w") == 0); + bool is_unicode = (strcmp(s, "w") == 0); if (initial && !is_unicode) { if (PyUnicode_Check(initial)) { @@ -3022,7 +2934,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } else if (array_Check(initial, state)) { const char *is = ((arrayobject*)initial)->ob_descr->typecode; - if (strcmp(is, "u") == 0 || strcmp(is, "w") == 0) { + if (strcmp(is, "w") == 0) { PyErr_Format(PyExc_TypeError, "cannot use a unicode array to " "initialize an array with typecode '%s'", s); return NULL; @@ -3098,43 +3010,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { - if (strcmp(s, "u") == 0) { - Py_ssize_t n; - wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n); - if (ustr == NULL) { - Py_DECREF(a); - Py_XDECREF(it); - return NULL; - } - - if (n > 0) { - arrayobject *self = (arrayobject *)a; - // self->ob_item may be NULL but it is safe. - PyMem_Free(self->ob_item); - self->ob_item = (char *)ustr; - Py_SET_SIZE(self, n); - self->allocated = n; - } - else { - PyMem_Free(ustr); - } + Py_ssize_t n = PyUnicode_GET_LENGTH(initial); + Py_UCS4 *ustr = PyUnicode_AsUCS4Copy(initial); + if (ustr == NULL) { + Py_DECREF(a); + Py_XDECREF(it); + return NULL; } - else { // s == "w" - Py_ssize_t n = PyUnicode_GET_LENGTH(initial); - Py_UCS4 *ustr = PyUnicode_AsUCS4Copy(initial); - if (ustr == NULL) { - Py_DECREF(a); - Py_XDECREF(it); - return NULL; - } - arrayobject *self = (arrayobject *)a; - // self->ob_item may be NULL but it is safe. - PyMem_Free(self->ob_item); - self->ob_item = (char *)ustr; - Py_SET_SIZE(self, n); - self->allocated = n; - } + arrayobject *self = (arrayobject *)a; + // self->ob_item may be NULL but it is safe. + PyMem_Free(self->ob_item); + self->ob_item = (char *)ustr; + Py_SET_SIZE(self, n); + self->allocated = n; } else if (initial != NULL && array_Check(initial, state) && len > 0) { arrayobject *self = (arrayobject *)a; @@ -3180,7 +3069,6 @@ The following type codes are defined:\n\ Type code C Type Minimum size in bytes\n\ 'b' signed integer 1\n\ 'B' unsigned integer 1\n\ - 'u' Unicode character 2 (see note)\n\ 'h' signed integer 2\n\ 'H' unsigned integer 2\n\ 'i' signed integer 2\n\ @@ -3195,9 +3083,6 @@ The following type codes are defined:\n\ 'Zf' float complex 8\n\ 'Zd' double complex 16\n\ \n\ -NOTE: The 'u' typecode corresponds to Python's unicode character. On\n\ -narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\ -\n\ NOTE: The 'q' and 'Q' type codes are only available if the platform\n\ C compiler used to build Python supports 'long long', or, on Windows,\n\ '__int64'.\n\