diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index a893932169a089..8be5bf8aa4f546 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -208,6 +208,16 @@ def rl(name): else: return None + try: + import _remote_debugging + except ImportError: + _remote_debugging = None + + def rd(name): + if _remote_debugging: + return getattr(_remote_debugging, name, None) + return None + # Try a range of "open" functions. # All of them should fail with TestHook(raise_on_events={"open"}) as hook: @@ -225,6 +235,8 @@ def rl(name): (rl("append_history_file"), 0, None), (rl("read_init_file"), testfn), (rl("read_init_file"), None), + (rd("BinaryWriter"), testfn, 1000, 0), + (rd("BinaryReader"), testfn), ]: if not fn: continue @@ -258,6 +270,8 @@ def rl(name): ("~/.history", "a") if rl("append_history_file") else None, (testfn, "r") if readline else None, ("", "r") if readline else None, + (testfn, "wb") if rd("BinaryWriter") else None, + (testfn, "rb") if rd("BinaryReader") else None, ] if i is not None ], diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 9cf706aa2dafee..1fbb4e2d6c6fbb 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -2,6 +2,7 @@ import json import os +import pathlib import random import struct import tempfile @@ -814,6 +815,35 @@ def test_invalid_file_path(self): with BinaryReader("/nonexistent/path/file.bin") as reader: reader.replay_samples(RawCollector()) + def test_path_arguments_round_trip(self): + """Reader and writer accept str, bytes or os.PathLike.""" + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + filename = f.name + self.temp_files.append(filename) + + for path_arg in (filename, os.fsencode(filename), pathlib.Path(filename)): + with self.subTest(path_type=type(path_arg).__name__): + writer = _remote_debugging.BinaryWriter(path_arg, 1000, 0) + writer.finalize() + reader = _remote_debugging.BinaryReader(path_arg) + info = reader.get_info() + reader.close() + self.assertEqual(info["sample_count"], 0) + + def test_rejects_non_pathlike(self): + """Reader and writer raise TypeError on non-path-like filenames.""" + with self.assertRaises(TypeError): + _remote_debugging.BinaryWriter(123, 1000, 0) + with self.assertRaises(TypeError): + _remote_debugging.BinaryReader(123) + + def test_invalid_path_error_preserves_pathlib(self): + """Missing path: OSError carries the original path object, not a string.""" + missing = pathlib.Path("/i/do/not/exist") + with self.assertRaises(FileNotFoundError) as cm: + _remote_debugging.BinaryReader(missing) + self.assertEqual(os.fspath(cm.exception.filename), os.fspath(missing)) + def test_writer_handles_empty_stack_first_sample(self): """BinaryWriter.write_sample tolerates an empty stack on a fresh thread. diff --git a/Misc/NEWS.d/next/Security/2026-05-08-02-18-54.gh-issue-149474.ujQ-mu.rst b/Misc/NEWS.d/next/Security/2026-05-08-02-18-54.gh-issue-149474.ujQ-mu.rst new file mode 100644 index 00000000000000..48e718b95ebe3a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-08-02-18-54.gh-issue-149474.ujQ-mu.rst @@ -0,0 +1,3 @@ +Fix the binary writer in :mod:`profiling.sampling` not firing the audit +(:pep:`578`) when creating the output file. The writer and the reader now +accept any path-like object. Patch by Maurycy Pawłowski-Wieroński. diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index 87a54371c774f1..d4188335c0b6d0 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -253,7 +253,6 @@ typedef struct { /* Main binary writer structure */ typedef struct { FILE *fp; - char *filename; /* Write buffer for batched I/O */ uint8_t *write_buffer; @@ -311,10 +310,7 @@ typedef struct { /* Main binary reader structure */ typedef struct { - char *filename; - #if USE_MMAP - int fd; uint8_t *mapped_data; size_t mapped_size; #else @@ -522,7 +518,7 @@ grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_ * Create a new binary writer. * * Arguments: - * filename: Path to output file + * path: Path to output file * sample_interval_us: Sampling interval in microseconds * compression_type: COMPRESSION_NONE or COMPRESSION_ZSTD * start_time_us: Start timestamp in microseconds (from time.monotonic() * 1e6) @@ -531,7 +527,7 @@ grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_ * New BinaryWriter* on success, NULL on failure (PyErr set) */ BinaryWriter *binary_writer_create( - const char *filename, + PyObject *path, uint64_t sample_interval_us, int compression_type, uint64_t start_time_us @@ -583,12 +579,12 @@ void binary_writer_destroy(BinaryWriter *writer); * Open a binary file for reading. * * Arguments: - * filename: Path to input file + * path: Path to input file * * Returns: * New BinaryReader* on success, NULL on failure (PyErr set) */ -BinaryReader *binary_reader_open(const char *filename); +BinaryReader *binary_reader_open(PyObject *path); /* * Replay samples from binary file through a collector. diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 551530b519952c..972b197cfbad86 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -358,7 +358,7 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_ } BinaryReader * -binary_reader_open(const char *filename) +binary_reader_open(PyObject *path) { BinaryReader *reader = PyMem_Calloc(1, sizeof(BinaryReader)); if (!reader) { @@ -366,29 +366,18 @@ binary_reader_open(const char *filename) return NULL; } -#if USE_MMAP - reader->fd = -1; /* Explicit initialization for cleanup safety */ -#endif - - reader->filename = PyMem_Malloc(strlen(filename) + 1); - if (!reader->filename) { - PyMem_Free(reader); - PyErr_NoMemory(); - return NULL; - } - strcpy(reader->filename, filename); - #if USE_MMAP /* Open with mmap on Unix */ - reader->fd = open(filename, O_RDONLY); - if (reader->fd < 0) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); + FILE *fp = Py_fopen(path, "rb"); + if (!fp) { goto error; } + int fd = fileno(fp); struct stat st; - if (fstat(reader->fd, &st) < 0) { + if (fstat(fd, &st) < 0) { PyErr_SetFromErrno(PyExc_IOError); + Py_fclose(fp); goto error; } reader->mapped_size = st.st_size; @@ -400,14 +389,15 @@ binary_reader_open(const char *filename) */ #ifdef __linux__ reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ, - MAP_PRIVATE | MAP_POPULATE, reader->fd, 0); + MAP_PRIVATE | MAP_POPULATE, fd, 0); #else reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ, - MAP_PRIVATE, reader->fd, 0); + MAP_PRIVATE, fd, 0); #endif if (reader->mapped_data == MAP_FAILED) { reader->mapped_data = NULL; PyErr_SetFromErrno(PyExc_IOError); + Py_fclose(fp); goto error; } @@ -428,19 +418,20 @@ binary_reader_open(const char *filename) /* Add file descriptor-level hints for better kernel I/O scheduling */ #if defined(__linux__) && defined(POSIX_FADV_SEQUENTIAL) - (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_SEQUENTIAL); + (void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); if (reader->mapped_size > (64 * 1024 * 1024)) { - (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_WILLNEED); + (void)posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED); } #endif + (void)Py_fclose(fp); + uint8_t *data = reader->mapped_data; size_t file_size = reader->mapped_size; #else /* Use stdio on Windows */ - reader->fp = fopen(filename, "rb"); + reader->fp = Py_fopen(path, "rb"); if (!reader->fp) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); goto error; } @@ -1263,8 +1254,6 @@ binary_reader_close(BinaryReader *reader) return; } - PyMem_Free(reader->filename); - #if USE_MMAP if (reader->mapped_data) { munmap(reader->mapped_data, reader->mapped_size); @@ -1274,13 +1263,9 @@ binary_reader_close(BinaryReader *reader) /* Clear sample_data which may point into the now-unmapped region */ reader->sample_data = NULL; reader->sample_data_size = 0; - if (reader->fd >= 0) { - close(reader->fd); - reader->fd = -1; /* Mark as closed */ - } #else if (reader->fp) { - fclose(reader->fp); + Py_fclose(reader->fp); reader->fp = NULL; } if (reader->file_data) { diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c index 4cfed7300ac5ab..c31ed7d746466f 100644 --- a/Modules/_remote_debugging/binary_io_writer.c +++ b/Modules/_remote_debugging/binary_io_writer.c @@ -717,7 +717,7 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry, } BinaryWriter * -binary_writer_create(const char *filename, uint64_t sample_interval_us, int compression_type, +binary_writer_create(PyObject *path, uint64_t sample_interval_us, int compression_type, uint64_t start_time_us) { BinaryWriter *writer = PyMem_Calloc(1, sizeof(BinaryWriter)); @@ -726,14 +726,6 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp return NULL; } - writer->filename = PyMem_Malloc(strlen(filename) + 1); - if (!writer->filename) { - PyMem_Free(writer); - PyErr_NoMemory(); - return NULL; - } - strcpy(writer->filename, filename); - writer->start_time_us = start_time_us; writer->sample_interval_us = sample_interval_us; writer->compression_type = compression_type; @@ -799,9 +791,8 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp } } - writer->fp = fopen(filename, "wb"); + writer->fp = Py_fopen(path, "wb"); if (!writer->fp) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); goto error; } @@ -1193,7 +1184,7 @@ binary_writer_finalize(BinaryWriter *writer) return -1; } - if (fclose(writer->fp) != 0) { + if (Py_fclose(writer->fp) != 0) { writer->fp = NULL; PyErr_SetFromErrno(PyExc_IOError); return -1; @@ -1211,10 +1202,9 @@ binary_writer_destroy(BinaryWriter *writer) } if (writer->fp) { - fclose(writer->fp); + Py_fclose(writer->fp); } - PyMem_Free(writer->filename); PyMem_Free(writer->write_buffer); #ifdef HAVE_ZSTD diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 1133db808efaec..d56622fb82ab56 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -688,7 +688,7 @@ PyDoc_STRVAR(_remote_debugging_BinaryWriter___init____doc__, static int _remote_debugging_BinaryWriter___init___impl(BinaryWriterObject *self, - const char *filename, + PyObject *filename, unsigned long long sample_interval_us, unsigned long long start_time_us, int compression); @@ -728,7 +728,7 @@ _remote_debugging_BinaryWriter___init__(PyObject *self, PyObject *args, PyObject PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 3; - const char *filename; + PyObject *filename; unsigned long long sample_interval_us; unsigned long long start_time_us; int compression = 0; @@ -738,19 +738,7 @@ _remote_debugging_BinaryWriter___init__(PyObject *self, PyObject *args, PyObject if (!fastargs) { goto exit; } - if (!PyUnicode_Check(fastargs[0])) { - _PyArg_BadArgument("BinaryWriter", "argument 'filename'", "str", fastargs[0]); - goto exit; - } - Py_ssize_t filename_length; - filename = PyUnicode_AsUTF8AndSize(fastargs[0], &filename_length); - if (filename == NULL) { - goto exit; - } - if (strlen(filename) != (size_t)filename_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - goto exit; - } + filename = fastargs[0]; if (!_PyLong_UnsignedLongLong_Converter(fastargs[1], &sample_interval_us)) { goto exit; } @@ -1009,7 +997,7 @@ PyDoc_STRVAR(_remote_debugging_BinaryReader___init____doc__, static int _remote_debugging_BinaryReader___init___impl(BinaryReaderObject *self, - const char *filename); + PyObject *filename); static int _remote_debugging_BinaryReader___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -1045,26 +1033,14 @@ _remote_debugging_BinaryReader___init__(PyObject *self, PyObject *args, PyObject PyObject *argsbuf[1]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); - const char *filename; + PyObject *filename; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (!PyUnicode_Check(fastargs[0])) { - _PyArg_BadArgument("BinaryReader", "argument 'filename'", "str", fastargs[0]); - goto exit; - } - Py_ssize_t filename_length; - filename = PyUnicode_AsUTF8AndSize(fastargs[0], &filename_length); - if (filename == NULL) { - goto exit; - } - if (strlen(filename) != (size_t)filename_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - goto exit; - } + filename = fastargs[0]; return_value = _remote_debugging_BinaryReader___init___impl((BinaryReaderObject *)self, filename); exit: @@ -1564,4 +1540,4 @@ _remote_debugging_get_gc_stats(PyObject *module, PyObject *const *args, Py_ssize exit: return return_value; } -/*[clinic end generated code: output=36674f4cb8a653f3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5e2a29746a0c5d65 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 172f8711a2a2a0..efdd2e1a2d7b7a 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -1476,7 +1476,7 @@ class _remote_debugging.BinaryWriter "BinaryWriterObject *" "&PyBinaryWriter_Typ /*[clinic input] @permit_long_docstring_body _remote_debugging.BinaryWriter.__init__ - filename: str + filename: object sample_interval_us: unsigned_long_long start_time_us: unsigned_long_long * @@ -1495,11 +1495,11 @@ Use as a context manager or call finalize() when done. static int _remote_debugging_BinaryWriter___init___impl(BinaryWriterObject *self, - const char *filename, + PyObject *filename, unsigned long long sample_interval_us, unsigned long long start_time_us, int compression) -/*[clinic end generated code: output=014c0306f1bacf4b input=3bdf01c1cc2f5a1d]*/ +/*[clinic end generated code: output=00446656ea2e5986 input=b92f0c77ba4cd274]*/ { if (self->writer) { binary_writer_destroy(self->writer); @@ -1742,7 +1742,7 @@ class _remote_debugging.BinaryReader "BinaryReaderObject *" "&PyBinaryReader_Typ /*[clinic input] _remote_debugging.BinaryReader.__init__ - filename: str + filename: object High-performance binary reader for profiling data. @@ -1754,8 +1754,8 @@ Use as a context manager or call close() when done. static int _remote_debugging_BinaryReader___init___impl(BinaryReaderObject *self, - const char *filename) -/*[clinic end generated code: output=9699226f7ae052bb input=4201f9cc500ef2f6]*/ + PyObject *filename) +/*[clinic end generated code: output=f04b33ee5c5e6dbf input=9d7cbe8b4f1a97c9]*/ { if (self->reader) { binary_reader_close(self->reader);