diff --git a/src/blockingqueue.hpp b/src/blockingqueue.hpp index 4da839a..e1d41bc 100644 --- a/src/blockingqueue.hpp +++ b/src/blockingqueue.hpp @@ -47,7 +47,7 @@ class BlockingQueue totalSize -= item.size; lock.unlock(); - itemsNotFull.notify_all(); + itemsNotFull.notify_one(); } return std::move(item.value); diff --git a/src/build.cpp b/src/build.cpp index 669676a..900acd1 100644 --- a/src/build.cpp +++ b/src/build.cpp @@ -22,6 +22,10 @@ #include #include #include +#include +#include +#include +#include #include @@ -162,10 +166,14 @@ static size_t normalizeEOL(char* data, size_t size) return result; } -static std::vector readFile(FileStream& in) +static std::vector readFile(FileStream& in, size_t sizeHint = 0) { std::vector result; + // pre-allocate if size hint is available to avoid O(n^2) reallocation + if (sizeHint > 0) + result.reserve(sizeHint); + // read file as is char buffer[65536]; size_t readsize; @@ -646,18 +654,33 @@ void buildAppendFilePart(BuildContext* context, const char* path, unsigned int s bool buildAppendFile(BuildContext* context, const char* path, uint64_t timeStamp, uint64_t fileSize) { - FileStream in(path, "rb"); - if (!in) - { - context->output->error("Error reading file %s\n", path); - return false; - } - try { - std::vector contents = convertToUTF8(readFile(in)); + // Try optimized Windows read first (uses FILE_FLAG_SEQUENTIAL_SCAN for better prefetching) + std::vector contents = readFileOptimized(path); - appendFilePart(context, path, 0, contents.empty() ? 0 : &contents[0], contents.size(), timeStamp, fileSize, &contents); + // Fallback to FileStream if optimized read failed (e.g., on non-Windows or special files) + if (contents.empty() && fileSize > 0) + { + FileStream in(path, "rb"); + if (!in) + { + context->output->error("Error reading file %s\n", path); + return false; + } + contents = readFile(in, static_cast(fileSize)); + } + + // Normalize EOL and convert to UTF8 + if (!contents.empty()) + { + size_t size = normalizeEOL(&contents[0], contents.size()); + contents.resize(size); + } + + contents = convertToUTF8(std::move(contents)); + + appendFilePart(context, path, 0, contents.empty() ? nullptr : &contents[0], contents.size(), timeStamp, fileSize, &contents); return true; } @@ -739,6 +762,102 @@ unsigned int buildFinish(BuildContext* context) return result; } +// Parallel file reader for overlapping I/O with processing +struct ReadAheadBuffer +{ + struct ReadFile + { + std::string path; + std::vector contents; + uint64_t timeStamp; + uint64_t fileSize; + bool ready; + bool error; + + ReadFile() : timeStamp(0), fileSize(0), ready(false), error(false) {} + }; + + std::vector files; + std::atomic nextToRead; + std::atomic nextToConsume; + std::mutex mutex; + std::condition_variable cv; + std::atomic done; + size_t windowSize; + + ReadAheadBuffer(size_t fileCount, size_t window = 64) + : files(fileCount), nextToRead(0), nextToConsume(0), done(false), windowSize(window) + { + } + + void readerThread() + { + while (!done) + { + size_t idx = nextToRead.fetch_add(1); + if (idx >= files.size()) + break; + + // Wait if we're too far ahead of consumer + while (idx >= nextToConsume + windowSize && !done) + std::this_thread::yield(); + + if (done) break; + + ReadFile& rf = files[idx]; + + // Read file + std::vector contents = readFileOptimized(rf.path.c_str()); + if (contents.empty() && rf.fileSize > 0) + { + // Fallback + FileStream in(rf.path.c_str(), "rb"); + if (in) + contents = readFile(in, static_cast(rf.fileSize)); + } + + // Normalize EOL + if (!contents.empty()) + { + size_t size = normalizeEOL(&contents[0], contents.size()); + contents.resize(size); + } + + // Convert to UTF8 + contents = convertToUTF8(std::move(contents)); + + { + std::lock_guard lock(mutex); + rf.contents = std::move(contents); + rf.ready = true; + } + cv.notify_all(); + } + } + + bool getFile(size_t idx, std::vector& contents) + { + if (idx >= files.size()) return false; + + ReadFile& rf = files[idx]; + + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return rf.ready || done; }); + + if (!rf.ready) return false; + + contents = std::move(rf.contents); + nextToConsume = idx + 1; + return !rf.error; + } + + void stop() + { + done = true; + cv.notify_all(); + } +}; + void buildProject(Output* output, const char* path) { output->print("Building %s:\n", path); @@ -765,11 +884,39 @@ void buildProject(Output* output, const char* path) BuildContext* builder = buildStart(output, tempPath.c_str(), files.size()); if (!builder) return; - for (auto& f: files) + // Use parallel read-ahead for better I/O throughput + unsigned int numReaders = std::max(2u, std::thread::hardware_concurrency() / 2); + ReadAheadBuffer readAhead(files.size(), numReaders * 8); + + // Initialize file info + for (size_t i = 0; i < files.size(); ++i) { - buildAppendFile(builder, f.path.c_str(), f.timeStamp, f.fileSize); + readAhead.files[i].path = files[i].path; + readAhead.files[i].timeStamp = files[i].timeStamp; + readAhead.files[i].fileSize = files[i].fileSize; } + // Start reader threads + std::vector readers; + for (unsigned int i = 0; i < numReaders; ++i) + readers.emplace_back(&ReadAheadBuffer::readerThread, &readAhead); + + // Consume files in order + for (size_t i = 0; i < files.size(); ++i) + { + std::vector contents; + if (readAhead.getFile(i, contents)) + { + appendFilePart(builder, files[i].path.c_str(), 0, + contents.empty() ? nullptr : &contents[0], contents.size(), + files[i].timeStamp, files[i].fileSize, &contents); + } + } + + readAhead.stop(); + for (auto& t : readers) + t.join(); + buildFinish(builder); } diff --git a/src/fileutil.hpp b/src/fileutil.hpp index 664e36b..b3fe59f 100644 --- a/src/fileutil.hpp +++ b/src/fileutil.hpp @@ -2,6 +2,7 @@ #pragma once #include +#include #include #include @@ -28,3 +29,7 @@ bool getFileAttributes(const char* path, uint64_t* mtime, uint64_t* size); FILE* openFile(const char* path, const char* mode); bool watchDirectory(const char* path, const std::function& callback); + +// Read file directly into vector using optimized Windows API (FILE_FLAG_SEQUENTIAL_SCAN) +// Returns empty vector on failure +std::vector readFileOptimized(const char* path); diff --git a/src/fileutil_posix.cpp b/src/fileutil_posix.cpp index 9148ad2..14fe3bf 100644 --- a/src/fileutil_posix.cpp +++ b/src/fileutil_posix.cpp @@ -281,6 +281,13 @@ static bool watchDirectoryFSEvent(const char* path, const std::function readFileOptimized(const char* path) +{ + (void)path; + return std::vector(); +} + bool watchDirectory(const char* path, const std::function& callback) { #if defined(__linux__) diff --git a/src/fileutil_win.cpp b/src/fileutil_win.cpp index ba5a66d..0f12ddd 100644 --- a/src/fileutil_win.cpp +++ b/src/fileutil_win.cpp @@ -162,6 +162,55 @@ FILE* openFile(const char* path, const char* mode) return _wfopen(wpath.c_str(), wmode); } +// Read file directly into vector using Windows API with FILE_FLAG_SEQUENTIAL_SCAN +std::vector readFileOptimized(const char* path) +{ + std::vector result; + + // Get full path with long path prefix + std::wstring wpath = fromUtf8(isFullPath(path) ? path : normalizePath(getCurrentDirectory().c_str(), path).c_str()); + wpath.insert(0, L"\\\\?\\"); + std::replace(wpath.begin(), wpath.end(), '/', '\\'); + + // Open file with sequential scan hint for better prefetching + HANDLE hFile = CreateFileW(wpath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); + + if (hFile == INVALID_HANDLE_VALUE) + return result; + + // Get file size + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(hFile, &fileSize)) + { + CloseHandle(hFile); + return result; + } + + // Handle empty files + if (fileSize.QuadPart == 0) + { + CloseHandle(hFile); + return result; + } + + // Pre-allocate and read directly into vector + size_t totalSize = static_cast(fileSize.QuadPart); + result.resize(totalSize); + + DWORD bytesRead = 0; + BOOL success = ReadFile(hFile, &result[0], static_cast(totalSize), &bytesRead, NULL); + CloseHandle(hFile); + + if (!success || bytesRead != totalSize) + { + result.clear(); + return result; + } + + return result; +} + bool watchDirectory(const char* path, const std::function& callback) { HANDLE h = CreateFileW(fromUtf8(path).c_str(), FILE_LIST_DIRECTORY, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); diff --git a/src/project.cpp b/src/project.cpp index 9d89ad4..0c260d7 100644 --- a/src/project.cpp +++ b/src/project.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include static std::string getHomePath() @@ -147,7 +147,7 @@ static bool extractSuffix(const std::string& str, const char* prefix, std::strin return false; } -static std::shared_ptr createRegexCached(const std::string& query, std::map>& regexCache) +static std::shared_ptr createRegexCached(const std::string& query, std::unordered_map>& regexCache) { auto p = regexCache.insert(std::make_pair(query, std::shared_ptr())); @@ -157,7 +157,7 @@ static std::shared_ptr createRegexCached(const std::string& query, std::m return p.first->second; } -static std::shared_ptr createOrRegexCached(const std::vector& list, std::map>& regexCache) +static std::shared_ptr createOrRegexCached(const std::vector& list, std::unordered_map>& regexCache) { if (list.empty()) return std::shared_ptr(); @@ -170,7 +170,7 @@ static std::shared_ptr createOrRegexCached(const std::vector } static std::unique_ptr buildGroup(std::unique_ptr group, const std::vector& include, const std::vector& exclude, - std::map>& regexCache) + std::unordered_map>& regexCache) { group->include = createOrRegexCached(include, regexCache); group->exclude = createOrRegexCached(exclude, regexCache); @@ -179,7 +179,7 @@ static std::unique_ptr buildGroup(std::unique_ptr gr } static std::unique_ptr parseGroup(std::ifstream& in, const char* file, unsigned int& lineId, ProjectGroup* parent, - std::map>& regexCache, const char* pathBase) + std::unordered_map>& regexCache, const char* pathBase) { std::string line, suffix; std::vector include, exclude; @@ -251,7 +251,7 @@ std::unique_ptr parseProject(Output* output, const char* file) std::string pathBase = normalizePath(getCurrentDirectory().c_str(), (std::string(file) + "/..").c_str()); unsigned int line = 0; - std::map> regexCache; + std::unordered_map> regexCache; try { diff --git a/src/stringutil.hpp b/src/stringutil.hpp index 24fc731..908edf9 100644 --- a/src/stringutil.hpp +++ b/src/stringutil.hpp @@ -26,11 +26,8 @@ inline const char* findLineStart(const char* begin, const char* pos) inline const char* findLineEnd(const char* pos, const char* end) { - for (const char* s = pos; s != end; ++s) - if (*s == '\n') - return s; - - return end; + const char* nl = static_cast(memchr(pos, '\n', end - pos)); + return nl ? nl : end; } inline unsigned int countLines(const char* begin, const char* end)