怎样快速读取文件

首先使用 mmap 映射文件到内存中,然后返回该文件对应的 memory 首地址。 char* data。

然后对该 char* 类型 构造一个 ifstream(不复制 data 内容到 ifstream 中)。

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <streambuf>
#include <istream>
#include <numeric>

struct membuf: std::streambuf {
membuf(char const* base, size_t size) {
char* p(const_cast<char*>(base));
this->setg(p, p, p + size);
}
};
struct imemstream: virtual membuf, std::istream {
imemstream(char const* base, size_t size)
: membuf(base, size)
, std::istream(static_cast<std::streambuf*>(this)) {
}
};

int main () {

int fd = open("../XXXX", O_RDONLY, (mode_t)0600);
if (fd == -1)
{
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}

struct stat fileInfo = {0};
if (fstat(fd, &fileInfo) == -1)
{
perror("Error getting the file size");
exit(EXIT_FAILURE);
}

if (fileInfo.st_size == 0)
{
fprintf(stderr, "Error: File is empty, nothing to do\n");
exit(EXIT_FAILURE);
}
printf("File size is %ji\n", (intmax_t)fileInfo.st_size);

char *map = (char*)mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED)
{
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}

unsigned int inserted = 0;

// 读取数据的开始
string line;
imemstream in(map, fileInfo.st_size);
auto started = std::chrono::high_resolution_clock::now();
// 这里遇到空格和换行都会当做一个 string 的结束
while(in >> line && inserted < 100000) {
std::cout << line << std::endl;
}

auto done = std::chrono::high_resolution_clock::now();
double time_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count();
std::cout << "Total " << time_in_ms <<" ms" << std::endl;

// Don't forget to free the mmapped memory
if (munmap(map, fileInfo.st_size) == -1)
{
close(fd);
perror("Error un-mmapping the file");
exit(EXIT_FAILURE);
}

// Un-mmaping doesn't close the file, so we still need to do that.
close(fd);

return 0;
}

参考

  1. How to read in a file in C++
  2. 探寻C++最快的读取文件的方案
  3. Creating an input stream from constant memory
  4. mapread.c
0%