/src/gdal/port/cpl_userfaultfd.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Name: cpl_userfaultfd.cpp |
4 | | * Project: CPL - Common Portability Library |
5 | | * Purpose: Use userfaultfd and VSIL to service page faults |
6 | | * Author: James McClain, <james.mcclain@gmail.com> |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 2018, Dr. James McClain <james.mcclain@gmail.com> |
10 | | * |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #ifdef ENABLE_UFFD |
15 | | |
16 | | #include <cstdlib> |
17 | | #include <cinttypes> |
18 | | #include <cstring> |
19 | | #include <string> |
20 | | |
21 | | #include <errno.h> |
22 | | #include <fcntl.h> |
23 | | #include <poll.h> |
24 | | #include <pthread.h> |
25 | | #include <sched.h> |
26 | | #include <signal.h> |
27 | | #include <unistd.h> |
28 | | |
29 | | #include <sys/ioctl.h> |
30 | | #include <sys/mman.h> |
31 | | #include <sys/stat.h> |
32 | | #include <sys/syscall.h> |
33 | | #include <sys/types.h> |
34 | | #include <sys/utsname.h> |
35 | | #include <linux/userfaultfd.h> |
36 | | |
37 | | #include "cpl_conv.h" |
38 | | #include "cpl_error.h" |
39 | | #include "cpl_userfaultfd.h" |
40 | | #include "cpl_string.h" |
41 | | #include "cpl_vsi.h" |
42 | | #include "cpl_multiproc.h" |
43 | | |
44 | | #ifndef UFFD_USER_MODE_ONLY |
45 | | // The UFFD_USER_MODE_ONLY flag got added in kernel 5.11 which is the one |
46 | | // used by Ubuntu 20.04, but the linux-libc-dev package corresponds to 5.4 |
47 | 0 | #define UFFD_USER_MODE_ONLY 1 |
48 | | #endif |
49 | | |
50 | 0 | #define BAD_MMAP (reinterpret_cast<void *>(-1)) |
51 | 0 | #define MAX_MESSAGES (0x100) |
52 | | |
53 | | static int64_t get_page_limit(); |
54 | | static void cpl_uffd_fault_handler(void *ptr); |
55 | | static void signal_handler(int signal); |
56 | | static void uffd_cleanup(void *ptr); |
57 | | |
58 | | struct cpl_uffd_context |
59 | | { |
60 | | bool keep_going = false; |
61 | | |
62 | | int uffd = -1; |
63 | | struct uffdio_register uffdio_register = {}; |
64 | | struct uffd_msg uffd_msgs[MAX_MESSAGES]; |
65 | | |
66 | | std::string filename = std::string(""); |
67 | | |
68 | | int64_t page_limit = -1; |
69 | | int64_t pages_used = 0; |
70 | | |
71 | | size_t file_size = 0; |
72 | | size_t page_size = 0; |
73 | | void *page_ptr = nullptr; |
74 | | size_t vma_size = 0; |
75 | | void *vma_ptr = nullptr; |
76 | | CPLJoinableThread *thread = nullptr; |
77 | | }; |
78 | | |
79 | | static void uffd_cleanup(void *ptr) |
80 | 0 | { |
81 | 0 | struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr); |
82 | |
|
83 | 0 | if (!ctx) |
84 | 0 | return; |
85 | | |
86 | | // Signal shutdown |
87 | 0 | ctx->keep_going = false; |
88 | 0 | if (ctx->thread) |
89 | 0 | { |
90 | 0 | CPLJoinThread(ctx->thread); |
91 | 0 | ctx->thread = nullptr; |
92 | 0 | } |
93 | |
|
94 | 0 | if (ctx->uffd != -1) |
95 | 0 | { |
96 | 0 | ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register); |
97 | 0 | close(ctx->uffd); |
98 | 0 | ctx->uffd = -1; |
99 | 0 | } |
100 | 0 | if (ctx->page_ptr && ctx->page_size) |
101 | 0 | munmap(ctx->page_ptr, ctx->page_size); |
102 | 0 | if (ctx->vma_ptr && ctx->vma_size) |
103 | 0 | munmap(ctx->vma_ptr, ctx->vma_size); |
104 | 0 | ctx->page_ptr = nullptr; |
105 | 0 | ctx->vma_ptr = nullptr; |
106 | 0 | ctx->page_size = 0; |
107 | 0 | ctx->vma_size = 0; |
108 | 0 | ctx->pages_used = 0; |
109 | 0 | ctx->page_limit = 0; |
110 | |
|
111 | 0 | delete ctx; |
112 | |
|
113 | 0 | return; |
114 | 0 | } |
115 | | |
116 | | #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT |
117 | | #pragma GCC diagnostic push |
118 | | #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" |
119 | | #endif |
120 | | static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; |
121 | | #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT |
122 | | #pragma GCC diagnostic pop |
123 | | #endif |
124 | | |
125 | | static int64_t get_page_limit() |
126 | 0 | { |
127 | 0 | int64_t retval; |
128 | 0 | const char *variable = CPLGetConfigOption(GDAL_UFFD_LIMIT, nullptr); |
129 | |
|
130 | 0 | if (variable && sscanf(variable, "%" PRId64, &retval) == 1) |
131 | 0 | return retval; |
132 | 0 | else |
133 | 0 | return -1; |
134 | 0 | } |
135 | | |
136 | | static void cpl_uffd_fault_handler(void *ptr) |
137 | 0 | { |
138 | 0 | struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr); |
139 | 0 | struct uffdio_copy uffdio_copy; |
140 | 0 | struct pollfd pollfd; |
141 | | |
142 | | // Setup pollfd structure |
143 | 0 | pollfd.fd = ctx->uffd; |
144 | 0 | pollfd.events = POLLIN; |
145 | | |
146 | | // Open asset for reading |
147 | 0 | VSILFILE *file = VSIFOpenL(ctx->filename.c_str(), "rb"); |
148 | |
|
149 | 0 | if (!file) |
150 | 0 | return; |
151 | | |
152 | | // Loop until told to stop |
153 | 0 | while (ctx->keep_going) |
154 | 0 | { |
155 | | // Poll for event |
156 | 0 | if (poll(&pollfd, 1, 16) == -1) |
157 | 0 | break; // 60Hz when no demand |
158 | 0 | if ((pollfd.revents & POLLERR) || (pollfd.revents & POLLNVAL)) |
159 | 0 | break; |
160 | 0 | if (!(pollfd.revents & POLLIN)) |
161 | 0 | continue; |
162 | | |
163 | | // Read page fault events |
164 | 0 | ssize_t bytes_read = static_cast<ssize_t>( |
165 | 0 | read(ctx->uffd, ctx->uffd_msgs, MAX_MESSAGES * sizeof(uffd_msg))); |
166 | 0 | if (bytes_read < 1) |
167 | 0 | { |
168 | 0 | if (errno == EWOULDBLOCK) |
169 | 0 | continue; |
170 | 0 | else |
171 | 0 | break; |
172 | 0 | } |
173 | | |
174 | | // If too many pages are in use, evict all pages (evict them from |
175 | | // RAM and swap, not just to swap). It is impossible to control |
176 | | // which/when threads access the VMA, so access to the VMA has to |
177 | | // forbidden while the activity is in progress. |
178 | | // |
179 | | // That is done by (1) installing special handlers for SIGSEGV and |
180 | | // SIGBUS, (2) mprotecting the VMA so that any threads accessing |
181 | | // it receive either SIGSEGV or SIGBUS (which one is apparently a |
182 | | // function of the C library, at least on one non-Linux GNU |
183 | | // system[1]), (3) unregistering the VMA from userfaultfd, |
184 | | // remapping the VMA to evict the pages, registering the VMA |
185 | | // again, (4) making the VMA accessible again, and finally (5) |
186 | | // restoring the previous signal-handling behavior. |
187 | | // |
188 | | // [1] https://lists.debian.org/debian-bsd/2011/05/msg00032.html |
189 | 0 | if (ctx->page_limit > 0) |
190 | 0 | { |
191 | 0 | pthread_mutex_lock(&mutex); |
192 | 0 | if (ctx->pages_used > ctx->page_limit) |
193 | 0 | { |
194 | 0 | struct sigaction segv; |
195 | 0 | struct sigaction old_segv; |
196 | 0 | struct sigaction bus; |
197 | 0 | struct sigaction old_bus; |
198 | |
|
199 | 0 | memset(&segv, 0, sizeof(segv)); |
200 | 0 | memset(&old_segv, 0, sizeof(old_segv)); |
201 | 0 | memset(&bus, 0, sizeof(bus)); |
202 | 0 | memset(&old_bus, 0, sizeof(old_bus)); |
203 | | |
204 | | // Step 1 from the block comment above |
205 | 0 | segv.sa_handler = signal_handler; |
206 | 0 | bus.sa_handler = signal_handler; |
207 | 0 | if (sigaction(SIGSEGV, &segv, &old_segv) == -1) |
208 | 0 | { |
209 | 0 | CPLError( |
210 | 0 | CE_Failure, CPLE_AppDefined, |
211 | 0 | "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed"); |
212 | 0 | pthread_mutex_unlock(&mutex); |
213 | 0 | break; |
214 | 0 | } |
215 | 0 | if (sigaction(SIGBUS, &bus, &old_bus) == -1) |
216 | 0 | { |
217 | 0 | CPLError( |
218 | 0 | CE_Failure, CPLE_AppDefined, |
219 | 0 | "cpl_uffd_fault_handler: sigaction(SIGBUS) failed"); |
220 | 0 | pthread_mutex_unlock(&mutex); |
221 | 0 | break; |
222 | 0 | } |
223 | | |
224 | | // WARNING: LACK OF THREAD-SAFETY. |
225 | | // |
226 | | // For example, if a user program (or another part of the |
227 | | // library) installs a SIGSEGV or SIGBUS handler from another |
228 | | // thread after this one has installed its handlers but before |
229 | | // this one uninstalls its handlers, the intervening handler |
230 | | // will be eliminated. There are other examples, as well, but |
231 | | // there can only be a problems with other threads because the |
232 | | // faulting thread is blocked here. |
233 | | // |
234 | | // This implies that one should not use cpl_virtualmem.h API |
235 | | // while other threads are actively generating faults that use |
236 | | // this mechanism. |
237 | | // |
238 | | // Having multiple active threads that use this mechanism but |
239 | | // with no changes to signal-handling in other threads is NOT a |
240 | | // problem. |
241 | | |
242 | | // Step 2 |
243 | 0 | if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_NONE) == -1) |
244 | 0 | { |
245 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
246 | 0 | "cpl_uffd_fault_handler: mprotect() failed"); |
247 | 0 | pthread_mutex_unlock(&mutex); |
248 | 0 | break; |
249 | 0 | } |
250 | | |
251 | | // Step 3 |
252 | 0 | if (ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register)) |
253 | 0 | { |
254 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
255 | 0 | "cpl_uffd_fault_handler: ioctl(UFFDIO_UNREGISTER) " |
256 | 0 | "failed"); |
257 | 0 | pthread_mutex_unlock(&mutex); |
258 | 0 | break; |
259 | 0 | } |
260 | 0 | ctx->vma_ptr = |
261 | 0 | mmap(ctx->vma_ptr, ctx->vma_size, PROT_NONE, |
262 | 0 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); |
263 | 0 | if (ctx->vma_ptr == BAD_MMAP) |
264 | 0 | { |
265 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
266 | 0 | "cpl_uffd_fault_handler: mmap() failed"); |
267 | 0 | ctx->vma_ptr = nullptr; |
268 | 0 | pthread_mutex_unlock(&mutex); |
269 | 0 | break; |
270 | 0 | } |
271 | 0 | ctx->pages_used = 0; |
272 | 0 | if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register)) |
273 | 0 | { |
274 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
275 | 0 | "cpl_uffd_fault_handler: ioctl(UFFDIO_REGISTER) " |
276 | 0 | "failed"); |
277 | 0 | pthread_mutex_unlock(&mutex); |
278 | 0 | break; |
279 | 0 | } |
280 | | |
281 | | // Step 4. Problem: A thread might attempt to read here (before |
282 | | // the mprotect) and receive a SIGSEGV or SIGBUS. |
283 | 0 | if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_READ) == -1) |
284 | 0 | { |
285 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
286 | 0 | "cpl_uffd_fault_handler: mprotect() failed"); |
287 | 0 | pthread_mutex_unlock(&mutex); |
288 | 0 | break; |
289 | 0 | } |
290 | | |
291 | | // Step 5. Solution: Cannot unregister special handlers before |
292 | | // any such threads have been handled by them, so sleep for |
293 | | // 1/100th of a second. |
294 | | // Coverity complains about sleeping under a mutex |
295 | 0 | #ifndef __COVERITY__ |
296 | | // coverity[sleep] |
297 | 0 | usleep(10000); |
298 | 0 | #endif |
299 | 0 | if (sigaction(SIGSEGV, &old_segv, nullptr) == -1) |
300 | 0 | { |
301 | 0 | CPLError( |
302 | 0 | CE_Failure, CPLE_AppDefined, |
303 | 0 | "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed"); |
304 | 0 | pthread_mutex_unlock(&mutex); |
305 | 0 | break; |
306 | 0 | } |
307 | 0 | if (sigaction(SIGBUS, &old_bus, nullptr) == -1) |
308 | 0 | { |
309 | 0 | CPLError( |
310 | 0 | CE_Failure, CPLE_AppDefined, |
311 | 0 | "cpl_uffd_fault_handler: sigaction(SIGBUS) failed"); |
312 | 0 | pthread_mutex_unlock(&mutex); |
313 | 0 | break; |
314 | 0 | } |
315 | 0 | } |
316 | 0 | pthread_mutex_unlock(&mutex); |
317 | 0 | } |
318 | | |
319 | | // Handle page fault events |
320 | 0 | for (int i = 0; i < static_cast<int>(bytes_read / sizeof(uffd_msg)); |
321 | 0 | ++i) |
322 | 0 | { |
323 | 0 | const uintptr_t fault_addr = |
324 | 0 | ctx->uffd_msgs[i].arg.pagefault.address & ~(ctx->page_size - 1); |
325 | 0 | const uintptr_t offset = |
326 | 0 | fault_addr - reinterpret_cast<uintptr_t>(ctx->vma_ptr); |
327 | 0 | size_t bytes_needed = static_cast<size_t>(ctx->file_size - offset); |
328 | 0 | if (bytes_needed > ctx->page_size) |
329 | 0 | bytes_needed = ctx->page_size; |
330 | | |
331 | | // Copy data into page |
332 | 0 | if (VSIFSeekL(file, offset, SEEK_SET) != 0 || |
333 | 0 | VSIFReadL(ctx->page_ptr, bytes_needed, 1, file) != 1) |
334 | 0 | { |
335 | 0 | CPLError(CE_Failure, CPLE_FileIO, |
336 | 0 | "Cannot get %d bytes at offset " CPL_FRMT_GUIB " of " |
337 | 0 | "file %s", |
338 | 0 | static_cast<int>(bytes_needed), |
339 | 0 | static_cast<GUIntBig>(offset), ctx->filename.c_str()); |
340 | 0 | memset(ctx->page_ptr, 0, bytes_needed); |
341 | 0 | } |
342 | 0 | ctx->pages_used++; |
343 | | |
344 | | // Use the page to fulfill the page fault |
345 | 0 | uffdio_copy.src = reinterpret_cast<uintptr_t>(ctx->page_ptr); |
346 | 0 | uffdio_copy.dst = fault_addr; |
347 | 0 | uffdio_copy.len = static_cast<uintptr_t>(ctx->page_size); |
348 | 0 | uffdio_copy.mode = 0; |
349 | 0 | uffdio_copy.copy = 0; |
350 | 0 | if (ioctl(ctx->uffd, UFFDIO_COPY, &uffdio_copy) == -1) |
351 | 0 | { |
352 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
353 | 0 | "ioctl(UFFDIO_COPY) failed"); |
354 | 0 | break; |
355 | 0 | } |
356 | 0 | } |
357 | 0 | } // end of while loop |
358 | | |
359 | | // Return resources |
360 | 0 | VSIFCloseL(file); |
361 | 0 | } |
362 | | |
363 | | static void signal_handler(int signal) |
364 | 0 | { |
365 | 0 | if (signal == SIGSEGV || signal == SIGBUS) |
366 | 0 | sched_yield(); |
367 | 0 | return; |
368 | 0 | } |
369 | | |
370 | | bool CPLIsUserFaultMappingSupported() |
371 | 0 | { |
372 | | // Check the Linux kernel version. Linux 4.3 or newer is needed for |
373 | | // userfaultfd. |
374 | 0 | int major = 0, minor = 0; |
375 | 0 | struct utsname utsname; |
376 | |
|
377 | 0 | if (uname(&utsname)) |
378 | 0 | return false; |
379 | 0 | sscanf(utsname.release, "%d.%d", &major, &minor); |
380 | 0 | if (major < 4) |
381 | 0 | return false; |
382 | 0 | if (major == 4 && minor < 3) |
383 | 0 | return false; |
384 | | |
385 | 0 | static int nEnableUserFaultFD = -1; |
386 | 0 | if (nEnableUserFaultFD < 0) |
387 | 0 | { |
388 | 0 | nEnableUserFaultFD = |
389 | 0 | CPLTestBool(CPLGetConfigOption("CPL_ENABLE_USERFAULTFD", "YES")); |
390 | 0 | } |
391 | 0 | if (!nEnableUserFaultFD) |
392 | 0 | return false; |
393 | | |
394 | | // Since kernel 5.2, raw userfaultfd is disabled since if the fault |
395 | | // originates from the kernel, that could lead to easier exploitation of |
396 | | // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to |
397 | | // restrict the mechanism to faults occurring only from user space, which is |
398 | | // likely to be our use case. |
399 | 0 | int uffd = static_cast<int>(syscall( |
400 | 0 | __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)); |
401 | 0 | if (uffd == -1 && errno == EINVAL) |
402 | 0 | uffd = |
403 | 0 | static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK)); |
404 | 0 | if (uffd == -1) |
405 | 0 | { |
406 | 0 | const int l_errno = errno; |
407 | 0 | if (l_errno == EPERM) |
408 | 0 | { |
409 | | // Since kernel 5.2 |
410 | 0 | CPLDebug( |
411 | 0 | "GDAL", |
412 | 0 | "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) " |
413 | 0 | "failed: " |
414 | 0 | "insufficient permission. add CAP_SYS_PTRACE capability, or " |
415 | 0 | "set /proc/sys/vm/unprivileged_userfaultfd to 1"); |
416 | 0 | } |
417 | 0 | else |
418 | 0 | { |
419 | 0 | CPLDebug( |
420 | 0 | "GDAL", |
421 | 0 | "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) " |
422 | 0 | "failed: " |
423 | 0 | "error = %d", |
424 | 0 | l_errno); |
425 | 0 | } |
426 | 0 | nEnableUserFaultFD = false; |
427 | 0 | return false; |
428 | 0 | } |
429 | 0 | close(uffd); |
430 | 0 | nEnableUserFaultFD = true; |
431 | 0 | return true; |
432 | 0 | } |
433 | | |
434 | | /* |
435 | | * Returns nullptr on failure, a valid pointer on success. |
436 | | */ |
437 | | cpl_uffd_context *CPLCreateUserFaultMapping(const char *pszFilename, |
438 | | void **ppVma, uint64_t *pnVmaSize) |
439 | 0 | { |
440 | 0 | VSIStatBufL statbuf; |
441 | 0 | struct cpl_uffd_context *ctx = nullptr; |
442 | |
|
443 | 0 | if (!CPLIsUserFaultMappingSupported()) |
444 | 0 | { |
445 | 0 | CPLError( |
446 | 0 | CE_Failure, CPLE_NotSupported, |
447 | 0 | "CPLCreateUserFaultMapping(): Linux kernel 4.3 or newer needed"); |
448 | 0 | return nullptr; |
449 | 0 | } |
450 | | |
451 | | // Get the size of the asset |
452 | 0 | if (VSIStatL(pszFilename, &statbuf)) |
453 | 0 | return nullptr; |
454 | | |
455 | | // Setup the `cpl_uffd_context` struct |
456 | 0 | ctx = new cpl_uffd_context(); |
457 | 0 | ctx->keep_going = true; |
458 | 0 | ctx->filename = std::string(pszFilename); |
459 | 0 | ctx->page_limit = get_page_limit(); |
460 | 0 | ctx->pages_used = 0; |
461 | 0 | ctx->file_size = static_cast<size_t>(statbuf.st_size); |
462 | 0 | ctx->page_size = static_cast<size_t>(sysconf(_SC_PAGESIZE)); |
463 | 0 | ctx->vma_size = static_cast<size_t>( |
464 | 0 | ((static_cast<vsi_l_offset>(statbuf.st_size) / ctx->page_size) + 1) * |
465 | 0 | ctx->page_size); |
466 | 0 | if (ctx->vma_size < static_cast<vsi_l_offset>(statbuf.st_size)) |
467 | 0 | { // Check for overflow |
468 | 0 | uffd_cleanup(ctx); |
469 | 0 | CPLError( |
470 | 0 | CE_Failure, CPLE_AppDefined, |
471 | 0 | "CPLCreateUserFaultMapping(): File too large for architecture"); |
472 | 0 | return nullptr; |
473 | 0 | } |
474 | | |
475 | | // If the mmap failed, free resources and return |
476 | 0 | ctx->vma_ptr = mmap(nullptr, ctx->vma_size, PROT_READ, |
477 | 0 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
478 | 0 | if (ctx->vma_ptr == BAD_MMAP) |
479 | 0 | { |
480 | 0 | ctx->vma_ptr = nullptr; |
481 | 0 | uffd_cleanup(ctx); |
482 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
483 | 0 | "CPLCreateUserFaultMapping(): mmap() failed"); |
484 | 0 | return nullptr; |
485 | 0 | } |
486 | | |
487 | | // Attempt to acquire a scratch page to use to fulfill requests. |
488 | 0 | ctx->page_ptr = |
489 | 0 | mmap(nullptr, static_cast<size_t>(ctx->page_size), |
490 | 0 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
491 | 0 | if (ctx->page_ptr == BAD_MMAP) |
492 | 0 | { |
493 | 0 | ctx->page_ptr = nullptr; |
494 | 0 | uffd_cleanup(ctx); |
495 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
496 | 0 | "CPLCreateUserFaultMapping(): mmap() failed"); |
497 | 0 | return nullptr; |
498 | 0 | } |
499 | | |
500 | | // Get userfaultfd |
501 | | |
502 | | // Since kernel 5.2, raw userfaultfd is disabled since if the fault |
503 | | // originates from the kernel, that could lead to easier exploitation of |
504 | | // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to |
505 | | // restrict the mechanism to faults occurring only from user space, which is |
506 | | // likely to be our use case. |
507 | 0 | ctx->uffd = static_cast<int>(syscall( |
508 | 0 | __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)); |
509 | 0 | if (ctx->uffd == -1 && errno == EINVAL) |
510 | 0 | ctx->uffd = |
511 | 0 | static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK)); |
512 | 0 | if (ctx->uffd == -1) |
513 | 0 | { |
514 | 0 | const int l_errno = errno; |
515 | 0 | ctx->uffd = -1; |
516 | 0 | uffd_cleanup(ctx); |
517 | 0 | if (l_errno == EPERM) |
518 | 0 | { |
519 | | // Since kernel 5.2 |
520 | 0 | CPLError( |
521 | 0 | CE_Failure, CPLE_AppDefined, |
522 | 0 | "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) " |
523 | 0 | "failed: " |
524 | 0 | "insufficient permission. add CAP_SYS_PTRACE capability, or " |
525 | 0 | "set /proc/sys/vm/unprivileged_userfaultfd to 1"); |
526 | 0 | } |
527 | 0 | else |
528 | 0 | { |
529 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
530 | 0 | "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) " |
531 | 0 | "failed: " |
532 | 0 | "error = %d", |
533 | 0 | l_errno); |
534 | 0 | } |
535 | 0 | return nullptr; |
536 | 0 | } |
537 | | |
538 | | // Query API |
539 | 0 | { |
540 | 0 | struct uffdio_api uffdio_api = {}; |
541 | |
|
542 | 0 | uffdio_api.api = UFFD_API; |
543 | 0 | uffdio_api.features = 0; |
544 | |
|
545 | 0 | if (ioctl(ctx->uffd, UFFDIO_API, &uffdio_api) == -1) |
546 | 0 | { |
547 | 0 | uffd_cleanup(ctx); |
548 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
549 | 0 | "CPLCreateUserFaultMapping(): ioctl(UFFDIO_API) failed"); |
550 | 0 | return nullptr; |
551 | 0 | } |
552 | 0 | } |
553 | | |
554 | | // Register memory range |
555 | 0 | ctx->uffdio_register.range.start = |
556 | 0 | reinterpret_cast<uintptr_t>(ctx->vma_ptr); |
557 | 0 | ctx->uffdio_register.range.len = ctx->vma_size; |
558 | 0 | ctx->uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; |
559 | |
|
560 | 0 | if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register) == -1) |
561 | 0 | { |
562 | 0 | uffd_cleanup(ctx); |
563 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
564 | 0 | "CPLCreateUserFaultMapping(): ioctl(UFFDIO_REGISTER) failed"); |
565 | 0 | return nullptr; |
566 | 0 | } |
567 | | |
568 | | // Start handler thread |
569 | 0 | ctx->thread = CPLCreateJoinableThread(cpl_uffd_fault_handler, ctx); |
570 | 0 | if (ctx->thread == nullptr) |
571 | 0 | { |
572 | 0 | CPLError( |
573 | 0 | CE_Failure, CPLE_AppDefined, |
574 | 0 | "CPLCreateUserFaultMapping(): CPLCreateJoinableThread() failed"); |
575 | 0 | uffd_cleanup(ctx); |
576 | 0 | return nullptr; |
577 | 0 | } |
578 | | |
579 | 0 | *ppVma = ctx->vma_ptr; |
580 | 0 | *pnVmaSize = ctx->vma_size; |
581 | 0 | return ctx; |
582 | 0 | } |
583 | | |
584 | | void CPLDeleteUserFaultMapping(cpl_uffd_context *ctx) |
585 | 13.7k | { |
586 | 13.7k | if (ctx) |
587 | 0 | { |
588 | 0 | uffd_cleanup(ctx); |
589 | 0 | } |
590 | 13.7k | } |
591 | | |
592 | | #endif // ENABLE_UFFD |