-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
gh-149474: use Py_fopen in Binary{Reader,Writer} for audit hook and path-like support
#149524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Fix the binary writer in :mod:`profiling.sampling` not firing the audit | ||
| (:pep:`578`) when creating the output file. The writer and the reader now | ||
| accept any path-like object. Patch by Maurycy Pawłowski-Wieroński. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -358,37 +358,26 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_ | |
| } | ||
|
|
||
| BinaryReader * | ||
| binary_reader_open(const char *filename) | ||
| binary_reader_open(PyObject *path) | ||
| { | ||
| BinaryReader *reader = PyMem_Calloc(1, sizeof(BinaryReader)); | ||
| if (!reader) { | ||
| PyErr_NoMemory(); | ||
| return NULL; | ||
| } | ||
|
|
||
| #if USE_MMAP | ||
| reader->fd = -1; /* Explicit initialization for cleanup safety */ | ||
| #endif | ||
|
|
||
| reader->filename = PyMem_Malloc(strlen(filename) + 1); | ||
| if (!reader->filename) { | ||
| PyMem_Free(reader); | ||
| PyErr_NoMemory(); | ||
| return NULL; | ||
| } | ||
| strcpy(reader->filename, filename); | ||
|
|
||
| #if USE_MMAP | ||
| /* Open with mmap on Unix */ | ||
| reader->fd = open(filename, O_RDONLY); | ||
| if (reader->fd < 0) { | ||
| PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); | ||
| FILE *fp = Py_fopen(path, "rb"); | ||
| if (!fp) { | ||
| goto error; | ||
| } | ||
| int fd = fileno(fp); | ||
|
|
||
| struct stat st; | ||
| if (fstat(reader->fd, &st) < 0) { | ||
| if (fstat(fd, &st) < 0) { | ||
| PyErr_SetFromErrno(PyExc_IOError); | ||
| Py_fclose(fp); | ||
| goto error; | ||
| } | ||
| reader->mapped_size = st.st_size; | ||
|
|
@@ -400,14 +389,15 @@ binary_reader_open(const char *filename) | |
| */ | ||
| #ifdef __linux__ | ||
| reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ, | ||
| MAP_PRIVATE | MAP_POPULATE, reader->fd, 0); | ||
| MAP_PRIVATE | MAP_POPULATE, fd, 0); | ||
| #else | ||
| reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ, | ||
| MAP_PRIVATE, reader->fd, 0); | ||
| MAP_PRIVATE, fd, 0); | ||
| #endif | ||
| if (reader->mapped_data == MAP_FAILED) { | ||
| reader->mapped_data = NULL; | ||
| PyErr_SetFromErrno(PyExc_IOError); | ||
| Py_fclose(fp); | ||
| goto error; | ||
| } | ||
|
|
||
|
|
@@ -428,19 +418,20 @@ binary_reader_open(const char *filename) | |
|
|
||
| /* Add file descriptor-level hints for better kernel I/O scheduling */ | ||
| #if defined(__linux__) && defined(POSIX_FADV_SEQUENTIAL) | ||
| (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_SEQUENTIAL); | ||
| (void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: Do we need to do this here?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don’t think we need it here. These calls are best-effort performance hints and intentionally ignore all
|
||
| if (reader->mapped_size > (64 * 1024 * 1024)) { | ||
| (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_WILLNEED); | ||
| (void)posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED); | ||
| } | ||
| #endif | ||
|
|
||
| (void)Py_fclose(fp); | ||
|
|
||
| uint8_t *data = reader->mapped_data; | ||
| size_t file_size = reader->mapped_size; | ||
| #else | ||
| /* Use stdio on Windows */ | ||
| reader->fp = fopen(filename, "rb"); | ||
| reader->fp = Py_fopen(path, "rb"); | ||
| if (!reader->fp) { | ||
| PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); | ||
| goto error; | ||
| } | ||
|
|
||
|
|
@@ -1263,8 +1254,6 @@ binary_reader_close(BinaryReader *reader) | |
| return; | ||
| } | ||
|
|
||
| PyMem_Free(reader->filename); | ||
|
|
||
| #if USE_MMAP | ||
| if (reader->mapped_data) { | ||
| munmap(reader->mapped_data, reader->mapped_size); | ||
|
|
@@ -1274,13 +1263,9 @@ binary_reader_close(BinaryReader *reader) | |
| /* Clear sample_data which may point into the now-unmapped region */ | ||
| reader->sample_data = NULL; | ||
| reader->sample_data_size = 0; | ||
| if (reader->fd >= 0) { | ||
| close(reader->fd); | ||
| reader->fd = -1; /* Mark as closed */ | ||
| } | ||
| #else | ||
| if (reader->fp) { | ||
| fclose(reader->fp); | ||
| Py_fclose(reader->fp); | ||
| reader->fp = NULL; | ||
| } | ||
| if (reader->file_data) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -717,7 +717,7 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry, | |
| } | ||
|
|
||
| BinaryWriter * | ||
| binary_writer_create(const char *filename, uint64_t sample_interval_us, int compression_type, | ||
| binary_writer_create(PyObject *path, uint64_t sample_interval_us, int compression_type, | ||
| uint64_t start_time_us) | ||
| { | ||
| BinaryWriter *writer = PyMem_Calloc(1, sizeof(BinaryWriter)); | ||
|
|
@@ -726,14 +726,6 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp | |
| return NULL; | ||
| } | ||
|
|
||
| writer->filename = PyMem_Malloc(strlen(filename) + 1); | ||
|
pablogsal marked this conversation as resolved.
|
||
| if (!writer->filename) { | ||
| PyMem_Free(writer); | ||
| PyErr_NoMemory(); | ||
| return NULL; | ||
| } | ||
| strcpy(writer->filename, filename); | ||
|
|
||
| writer->start_time_us = start_time_us; | ||
| writer->sample_interval_us = sample_interval_us; | ||
| writer->compression_type = compression_type; | ||
|
|
@@ -799,9 +791,8 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp | |
| } | ||
| } | ||
|
|
||
| writer->fp = fopen(filename, "wb"); | ||
| writer->fp = Py_fopen(path, "wb"); | ||
| if (!writer->fp) { | ||
| PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); | ||
| goto error; | ||
| } | ||
|
|
||
|
|
@@ -1193,7 +1184,7 @@ binary_writer_finalize(BinaryWriter *writer) | |
| return -1; | ||
| } | ||
|
|
||
| if (fclose(writer->fp) != 0) { | ||
| if (Py_fclose(writer->fp) != 0) { | ||
| writer->fp = NULL; | ||
| PyErr_SetFromErrno(PyExc_IOError); | ||
| return -1; | ||
|
|
@@ -1211,10 +1202,9 @@ binary_writer_destroy(BinaryWriter *writer) | |
| } | ||
|
|
||
| if (writer->fp) { | ||
| fclose(writer->fp); | ||
| Py_fclose(writer->fp); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: why don't we check the return code here? Because the returned type is
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is nothing we can do if close fails, we could log perhaps...
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case I woudl suggest that we explicitly suppress the returned value and add a comment. We can also raise an unraisable exception (which I do in curses) |
||
| } | ||
|
|
||
| PyMem_Free(writer->filename); | ||
| PyMem_Free(writer->write_buffer); | ||
|
|
||
| #ifdef HAVE_ZSTD | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.