第24章 文件I/O操作
文件I/O基础
1. 标准文件流
C++标准库提供了三种基本的文件流类:
std::ifstream:用于读取文件std::ofstream:用于写入文件std::fstream:用于读写文件
这些类都继承自相应的标准流类(std::istream、std::ostream、std::iostream),因此支持相同的输入输出操作。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| #include <fstream> #include <iostream> #include <string>
int main() { std::ofstream outFile("example.txt"); if (outFile.is_open()) { outFile << "Hello, File I/O!" << std::endl; outFile << "This is a test." << std::endl; outFile.close(); } std::ifstream inFile("example.txt"); if (inFile.is_open()) { std::string line; while (std::getline(inFile, line)) { std::cout << line << std::endl; } inFile.close(); } std::fstream file("example.txt", std::ios::in | std::ios::out | std::ios::app); if (file.is_open()) { file << "Appending a line." << std::endl; file.seekg(0, std::ios::beg); std::string line; while (std::getline(file, line)) { std::cout << line << std::endl; } file.close(); } return 0; }
|
2. 文件打开模式
文件流的构造函数和open()方法接受一个打开模式参数,用于指定文件的打开方式:
| 模式标志 | 描述 |
|---|
std::ios::in | 以输入模式打开文件 |
std::ios::out | 以输出模式打开文件 |
std::ios::app | 以追加模式打开文件 |
std::ios::ate | 打开文件并定位到文件末尾 |
std::ios::trunc | 如果文件存在,截断为零长度 |
std::ios::binary | 以二进制模式打开文件 |
std::ios::noreplace | C++23: 如果文件已存在,打开失败 |
std::ios::create | C++23: 如果文件不存在,创建文件 |
这些标志可以使用位或运算符(|)组合使用。
高级文件操作
1. 二进制文件操作
二进制文件操作允许直接读写二进制数据,适用于存储结构化数据:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| #include <fstream> #include <iostream>
struct Person { char name[50]; int age; double height; };
int main() { std::ofstream outFile("people.bin", std::ios::binary); if (outFile) { Person person1 = {"John Doe", 30, 1.75}; Person person2 = {"Jane Smith", 25, 1.65}; outFile.write(reinterpret_cast<const char*>(&person1), sizeof(person1)); outFile.write(reinterpret_cast<const char*>(&person2), sizeof(person2)); outFile.close(); } std::ifstream inFile("people.bin", std::ios::binary); if (inFile) { Person person; while (inFile.read(reinterpret_cast<char*>(&person), sizeof(person))) { std::cout << "Name: " << person.name << ", Age: " << person.age << ", Height: " << person.height << std::endl; } inFile.close(); } return 0; }
|
2. 文件定位与随机访问
文件流提供了定位功能,允许随机访问文件中的数据:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| #include <fstream> #include <iostream>
int main() { std::fstream file("random_access.txt", std::ios::in | std::ios::out | std::ios::trunc); if (file) { for (int i = 0; i < 10; ++i) { file << "Line " << i << std::endl; } file.seekg(5 * 10, std::ios::beg); std::string line; std::getline(file, line); std::cout << "Line at position 50: " << line << std::endl; std::streampos pos = file.tellg(); std::cout << "Current position: " << pos << std::endl; file.seekp(0, std::ios::end); file << "End of file." << std::endl; file.close(); } return 0; }
|
3. 文件错误处理
文件操作可能会失败,需要进行适当的错误处理:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| #include <fstream> #include <iostream> #include <system_error>
int main() { std::ifstream file("nonexistent.txt"); if (!file) { std::error_code ec(errno, std::generic_category()); std::cout << "Error opening file: " << ec.message() << std::endl; return 1; } file.close(); return 0; }
|
内存映射文件
内存映射文件是一种将文件内容映射到内存的技术,允许直接通过内存操作来读写文件,具有更高的性能:
1. 使用操作系统API
在Windows上,可以使用CreateFileMapping和MapViewOfFile:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
| #include <windows.h> #include <iostream>
int main() { HANDLE hFile = CreateFile( "large_file.bin", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); if (hFile == INVALID_HANDLE_VALUE) { std::cout << "Error opening file: " << GetLastError() << std::endl; return 1; } LARGE_INTEGER fileSize; GetFileSizeEx(hFile, &fileSize); HANDLE hMap = CreateFileMapping( hFile, NULL, PAGE_READWRITE, 0, fileSize.LowPart, NULL ); if (hMap == NULL) { std::cout << "Error creating file mapping: " << GetLastError() << std::endl; CloseHandle(hFile); return 1; } LPVOID lpMapAddress = MapViewOfFile( hMap, FILE_MAP_ALL_ACCESS, 0, 0, 0 ); if (lpMapAddress == NULL) { std::cout << "Error mapping view: " << GetLastError() << std::endl; CloseHandle(hMap); CloseHandle(hFile); return 1; } char* data = static_cast<char*>(lpMapAddress); for (DWORD i = 0; i < fileSize.LowPart; ++i) { data[i] = 'A' + (i % 26); } UnmapViewOfFile(lpMapAddress); CloseHandle(hMap); CloseHandle(hFile); return 0; }
|
在POSIX系统上,可以使用mmap:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| #include <sys/mman.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <iostream>
int main() { int fd = open("large_file.bin", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); if (fd == -1) { perror("Error opening file"); return 1; } off_t size = 1024 * 1024; if (ftruncate(fd, size) == -1) { perror("Error setting file size"); close(fd); return 1; } char* data = static_cast<char*>(mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 )); if (data == MAP_FAILED) { perror("Error mapping file"); close(fd); return 1; } for (off_t i = 0; i < size; ++i) { data[i] = 'A' + (i % 26); } if (msync(data, size, MS_SYNC) == -1) { perror("Error syncing to disk"); } munmap(data, size); close(fd); return 0; }
|
2. 使用Boost.Iostreams
Boost.Iostreams库提供了跨平台的内存映射文件支持:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| #include <boost/iostreams/device/mapped_file.hpp> #include <iostream>
int main() { boost::iostreams::mapped_file_params params; params.path = "boost_mapped_file.bin"; params.new_file_size = 1024 * 1024; params.flags = boost::iostreams::mapped_file::readwrite; boost::iostreams::mapped_file_sink file(params); if (file.is_open()) { char* data = file.data(); for (size_t i = 0; i < file.size(); ++i) { data[i] = 'A' + (i % 26); } file.close(); } boost::iostreams::mapped_file_source file2("boost_mapped_file.bin"); if (file2.is_open()) { const char* data = file2.data(); std::cout << "First 100 bytes: " << std::endl; for (size_t i = 0; i < 100; ++i) { std::cout << data[i]; } std::cout << std::endl; file2.close(); } return 0; }
|
异步I/O
异步I/O允许程序在等待I/O操作完成的同时执行其他任务,提高程序的响应性和吞吐量:
1. 使用C++17标准库
C++17引入了std::filesystem库,但对于异步I/O,我们仍然需要使用操作系统API或第三方库。
2. 使用Boost.Asio
Boost.Asio库提供了跨平台的异步I/O支持:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| #include <boost/asio.hpp> #include <iostream> #include <vector>
namespace asio = boost::asio; using asio::ip::tcp;
int main() { asio::io_context io_context; asio::posix::stream_descriptor file(io_context); int fd = open("async_file.txt", O_RDONLY); if (fd == -1) { perror("Error opening file"); return 1; } file.assign(fd); std::vector<char> buffer(1024); file.async_read_some( asio::buffer(buffer), [&](const boost::system::error_code& ec, std::size_t bytes_transferred) { if (!ec) { std::cout << "Read " << bytes_transferred << " bytes: " << std::endl; std::cout.write(buffer.data(), bytes_transferred); std::cout << std::endl; } else { std::cout << "Error reading file: " << ec.message() << std::endl; } close(fd); } ); io_context.run(); return 0; }
|
文件系统操作
C++17引入了std::filesystem库,提供了跨平台的文件系统操作功能:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| #include <filesystem> #include <iostream> #include <vector>
namespace fs = std::filesystem;
int main() { fs::path dirPath = "test_directory"; if (!fs::exists(dirPath)) { if (fs::create_directory(dirPath)) { std::cout << "Directory created: " << dirPath << std::endl; } } std::cout << "\nDirectory contents: " << std::endl; for (const auto& entry : fs::directory_iterator(".")) { std::cout << entry.path().filename(); if (fs::is_directory(entry.status())) { std::cout << " (directory)"; } std::cout << std::endl; } fs::path filePath = "example.txt"; if (fs::exists(filePath)) { std::cout << "\nFile " << filePath << " exists." << std::endl; std::cout << "Size: " << fs::file_size(filePath) << " bytes" << std::endl; std::cout << "Last write time: " << fs::last_write_time(filePath) << std::endl; } fs::path destPath = "example_copy.txt"; try { fs::copy_file(filePath, destPath, fs::copy_options::overwrite_existing); std::cout << "\nFile copied to: " << destPath << std::endl; } catch (const fs::filesystem_error& e) { std::cout << "Error copying file: " << e.what() << std::endl; } if (fs::exists(destPath)) { fs::remove(destPath); std::cout << "File deleted: " << destPath << std::endl; } if (fs::exists(dirPath)) { fs::remove(dirPath); std::cout << "Directory deleted: " << dirPath << std::endl; } return 0; }
|
序列化与反序列化
序列化是将对象转换为可存储或传输的格式的过程,反序列化则是将其恢复为对象的过程:
1. 基本序列化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| #include <fstream> #include <iostream> #include <string>
class Person { public: std::string name; int age; double height; void serialize(std::ofstream& out) const { size_t nameLength = name.size(); out.write(reinterpret_cast<const char*>(&nameLength), sizeof(nameLength)); out.write(name.data(), nameLength); out.write(reinterpret_cast<const char*>(&age), sizeof(age)); out.write(reinterpret_cast<const char*>(&height), sizeof(height)); } void deserialize(std::ifstream& in) { size_t nameLength; in.read(reinterpret_cast<char*>(&nameLength), sizeof(nameLength)); name.resize(nameLength); in.read(name.data(), nameLength); in.read(reinterpret_cast<char*>(&age), sizeof(age)); in.read(reinterpret_cast<char*>(&height), sizeof(height)); } };
int main() { Person person1 = {"John Doe", 30, 1.75}; std::ofstream outFile("person.bin", std::ios::binary); if (outFile) { person1.serialize(outFile); outFile.close(); } Person person2; std::ifstream inFile("person.bin", std::ios::binary); if (inFile) { person2.deserialize(inFile); inFile.close(); std::cout << "Name: " << person2.name << ", Age: " << person2.age << ", Height: " << person2.height << std::endl; } return 0; }
|
2. 使用第三方库
对于复杂对象的序列化,可以使用第三方库如Boost.Serialization或Protocol Buffers:
Boost.Serialization
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| #include <boost/serialization/serialization.hpp> #include <boost/archive/binary_oarchive.hpp> #include <boost/archive/binary_iarchive.hpp> #include <fstream> #include <iostream> #include <vector>
class Person { public: std::string name; int age; double height; std::vector<std::string> hobbies; private: friend class boost::serialization::access; template <typename Archive> void serialize(Archive& ar, const unsigned int version) { ar & name; ar & age; ar & height; ar & hobbies; } };
int main() { Person person1 = {"John Doe", 30, 1.75, {"Reading", "Hiking", "Coding"}}; std::ofstream outFile("boost_person.bin", std::ios::binary); if (outFile) { boost::archive::binary_oarchive archive(outFile); archive << person1; outFile.close(); } Person person2; std::ifstream inFile("boost_person.bin", std::ios::binary); if (inFile) { boost::archive::binary_iarchive archive(inFile); archive >> person2; inFile.close(); std::cout << "Name: " << person2.name << ", Age: " << person2.age << ", Height: " << person2.height << std::endl; std::cout << "Hobbies: "; for (const auto& hobby : person2.hobbies) { std::cout << hobby << " "; } std::cout << std::endl; } return 0; }
|
性能优化
1. 缓冲区优化
使用适当大小的缓冲区可以显著提高文件I/O性能:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| #include <fstream> #include <iostream>
int main() { const size_t bufferSize = 65536; char buffer[bufferSize]; std::ifstream inFile("large_file.txt"); if (inFile) { inFile.rdbuf()->pubsetbuf(buffer, bufferSize); std::string line; while (std::getline(inFile, line)) { } inFile.close(); } std::ofstream outFile("output.txt"); if (outFile) { outFile.rdbuf()->pubsetbuf(buffer, bufferSize); for (int i = 0; i < 1000000; ++i) { outFile << "Line " << i << std::endl; } outFile.close(); } return 0; }
|
2. 文件I/O性能比较
| 操作类型 | 性能 | 适用场景 |
|---|
| 标准文件流 | 中等 | 一般用途,跨平台 |
| 内存映射文件 | 高 | 大文件处理,随机访问 |
| 原始系统调用 | 很高 | 高性能需求,平台特定 |
| 异步I/O | 高 | 高并发场景 |
3. 并行文件I/O
对于大文件处理,可以使用多线程并行读写:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| #include <fstream> #include <iostream> #include <thread> #include <vector>
void processChunk(const std::string& filename, size_t start, size_t size) { std::ifstream file(filename, std::ios::binary); if (file) { file.seekg(start); std::vector<char> buffer(size); file.read(buffer.data(), size); std::cout << "Thread processing chunk from " << start << " to " << start + size << std::endl; file.close(); } }
int main() { const std::string filename = "large_file.bin"; std::ifstream file(filename, std::ios::binary); if (!file) { std::cout << "Error opening file" << std::endl; return 1; } file.seekg(0, std::ios::end); size_t fileSize = file.tellg(); file.close(); const size_t chunkSize = 1024 * 1024; const int numThreads = std::thread::hardware_concurrency(); std::vector<std::thread> threads; for (size_t i = 0; i < fileSize; i += chunkSize) { size_t currentChunkSize = std::min(chunkSize, fileSize - i); threads.emplace_back(processChunk, filename, i, currentChunkSize); if (threads.size() >= numThreads) { for (auto& thread : threads) { thread.join(); } threads.clear(); } } for (auto& thread : threads) { thread.join(); } return 0; }
|
跨平台文件I/O
1. 路径处理
不同平台的路径分隔符不同,需要使用跨平台的路径处理方法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| #include <filesystem> #include <iostream>
namespace fs = std::filesystem;
int main() { fs::path path1 = "directory"; fs::path path2 = "subdirectory"; fs::path path3 = "file.txt"; fs::path fullPath = path1 / path2 / path3; std::cout << "Full path: " << fullPath << std::endl; std::cout << "Normalized path: " << fs::canonical(fullPath).string() << std::endl; std::cout << "Filename: " << fullPath.filename() << std::endl; std::cout << "Stem: " << fullPath.stem() << std::endl; std::cout << "Extension: " << fullPath.extension() << std::endl; std::cout << "Parent path: " << fullPath.parent_path() << std::endl; return 0; }
|
2. 文件权限
不同平台的文件权限模型不同,需要使用跨平台的权限处理方法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| #include <filesystem> #include <iostream>
namespace fs = std::filesystem;
int main() { fs::path filePath = "test_file.txt"; std::ofstream file(filePath); file << "Test content" << std::endl; file.close(); fs::file_status status = fs::status(filePath); std::cout << "Is readable: " << (status.permissions() & fs::perms::owner_read) << std::endl; std::cout << "Is writable: " << (status.permissions() & fs::perms::owner_write) << std::endl; std::cout << "Is executable: " << (status.permissions() & fs::perms::owner_exec) << std::endl; fs::permissions(filePath, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read); return 0; }
|
实际项目中的文件I/O策略
1. 分层文件I/O架构
在大型项目中,通常采用分层的文件I/O架构:
- 底层:封装原始文件操作,处理平台差异
- 中层:提供业务相关的文件操作接口
- 顶层:应用层使用中层接口进行文件操作
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| class FileUtils { public: static bool readFile(const std::string& path, std::string& content) { std::ifstream file(path, std::ios::binary); if (!file) { return false; } content.assign( std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>() ); return true; } static bool writeFile(const std::string& path, const std::string& content) { std::ofstream file(path, std::ios::binary); if (!file) { return false; } file.write(content.data(), content.size()); return file.good(); } };
class ConfigurationManager { public: static bool loadConfig(const std::string& path, Config& config) { std::string content; if (!FileUtils::readFile(path, content)) { return false; } return parseConfig(content, config); } static bool saveConfig(const std::string& path, const Config& config) { std::string content; if (!serializeConfig(config, content)) { return false; } return FileUtils::writeFile(path, content); } private: static bool parseConfig(const std::string& content, Config& config) { return true; } static bool serializeConfig(const Config& config, std::string& content) { return true; } };
int main() { Config appConfig; if (ConfigurationManager::loadConfig("config.json", appConfig)) { } appConfig.setSetting("key", "value"); ConfigurationManager::saveConfig("config.json", appConfig); return 0; }
|
2. 错误处理策略
文件I/O操作可能会失败,需要制定合理的错误处理策略:
- 错误检测:检查所有文件操作的返回值
- 错误报告:提供详细的错误信息
- 错误恢复:实现适当的错误恢复机制
- 日志记录:记录文件I/O错误以便调试
3. 安全性考虑
文件I/O操作需要考虑安全性:
- 路径遍历攻击:验证文件路径,防止访问预期之外的文件
- 文件权限:确保文件有适当的权限
- 输入验证:验证文件内容,防止注入攻击
- 异常处理:妥善处理文件I/O异常
现代C++中的文件I/O新特性
1. C++20/23中的新特性
- C++20:
std::format库,提供了更方便的字符串格式化功能 - C++23:
std::filesystem库的增强,包括更多文件系统操作 - C++23:
std::stacktrace库,用于获取调用栈信息,有助于调试文件I/O错误
2. 协程与异步文件I/O
C++20引入了协程,为异步文件I/O提供了更简洁的编程模型:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| #include <coroutine> #include <iostream> #include <fstream> #include <string>
struct AsyncFileReader { struct promise_type { std::string result; std::exception_ptr exception; AsyncFileReader get_return_object() { return AsyncFileReader{std::coroutine_handle<promise_type>::from_promise(*this)}; } std::suspend_always initial_suspend() { return {}; } std::suspend_always final_suspend() noexcept { return {}; } void return_value(std::string value) { result = std::move(value); } void unhandled_exception() { exception = std::current_exception(); } }; std::coroutine_handle<promise_type> handle; ~AsyncFileReader() { if (handle) handle.destroy(); } bool ready() const { return handle && handle.done(); } std::string get() { if (handle->exception) { std::rethrow_exception(handle->exception); } return std::move(handle->result); } };
AsyncFileReader readFileAsync(const std::string& path) { std::string content; std::ifstream file(path); if (!file) { throw std::runtime_error("Failed to open file"); } content.assign( std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>() ); co_return content; }
int main() { auto reader = readFileAsync("example.txt"); reader.handle.resume(); if (reader.ready()) { try { std::string content = reader.get(); std::cout << "File content: " << content << std::endl; } catch (const std::exception& e) { std::cout << "Error: " << e.what() << std::endl; } } return 0; }
|
总结
文件I/O操作是C++编程中的重要组成部分,从基本的文件读写到高级的内存映射和异步I/O,都有广泛的应用场景。通过本章的学习,你应该掌握:
- 文件I/O基础:标准文件流的使用,文件打开模式
- 高级文件操作:二进制文件操作,文件定位与随机访问,文件错误处理
- 内存映射文件:使用操作系统API和Boost.Iostreams进行内存映射
- 异步I/O:使用Boost.Asio进行异步文件操作
- 文件系统操作:使用std::filesystem进行跨平台文件系统操作
- 序列化与反序列化:基本序列化和使用第三方库进行复杂对象序列化
- 性能优化:缓冲区优化,并行文件I/O,性能比较
- 跨平台文件I/O:路径处理,文件权限
- 实际项目中的文件I/O策略:分层架构,错误处理,安全性考虑
- 现代C++中的文件I/O新特性:C++20/23新特性,协程与异步文件I/O
文件I/O操作的性能和可靠性对应用程序至关重要,特别是在处理大文件或高并发场景时。通过选择合适的文件I/O技术和优化策略,可以显著提高应用程序的性能和可靠性。
在实际项目中,应该根据具体需求选择合适的文件I/O方案,同时考虑跨平台兼容性、安全性和可维护性。随着C++标准的不断发展,文件I/O操作也在变得更加便捷和高效,为C++程序员提供了更多强大的工具。