/src/gdal/port/cpl_userfaultfd.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Name: cpl_userfaultfd.cpp |
4 | | * Project: CPL - Common Portability Library |
5 | | * Purpose: Use userfaultfd and VSIL to service page faults |
6 | | * Author: James McClain, <james.mcclain@gmail.com> |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 2018, Dr. James McClain <james.mcclain@gmail.com> |
10 | | * |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #ifdef ENABLE_UFFD |
15 | | |
16 | | #include <algorithm> |
17 | | #include <cstdlib> |
18 | | #include <cinttypes> |
19 | | #include <cstring> |
20 | | #include <string> |
21 | | |
22 | | #include <errno.h> |
23 | | #include <fcntl.h> |
24 | | #include <poll.h> |
25 | | #include <pthread.h> |
26 | | #include <sched.h> |
27 | | #include <signal.h> |
28 | | #include <unistd.h> |
29 | | |
30 | | #include <sys/ioctl.h> |
31 | | #include <sys/mman.h> |
32 | | #include <sys/stat.h> |
33 | | #include <sys/syscall.h> |
34 | | #include <sys/types.h> |
35 | | #include <sys/utsname.h> |
36 | | #include <linux/userfaultfd.h> |
37 | | |
38 | | #include "cpl_conv.h" |
39 | | #include "cpl_error.h" |
40 | | #include "cpl_userfaultfd.h" |
41 | | #include "cpl_string.h" |
42 | | #include "cpl_vsi.h" |
43 | | #include "cpl_multiproc.h" |
44 | | |
45 | | #ifndef UFFD_USER_MODE_ONLY |
46 | | // The UFFD_USER_MODE_ONLY flag got added in kernel 5.11 which is the one |
47 | | // used by Ubuntu 20.04, but the linux-libc-dev package corresponds to 5.4 |
48 | 0 | #define UFFD_USER_MODE_ONLY 1 |
49 | | #endif |
50 | | |
51 | 0 | #define BAD_MMAP (reinterpret_cast<void *>(-1)) |
52 | 0 | #define MAX_MESSAGES (0x100) |
53 | | |
54 | | static int64_t get_page_limit(); |
55 | | static void cpl_uffd_fault_handler(void *ptr); |
56 | | static void signal_handler(int signal); |
57 | | static void uffd_cleanup(void *ptr); |
58 | | |
59 | | struct cpl_uffd_context |
60 | | { |
61 | | bool keep_going = false; |
62 | | |
63 | | int uffd = -1; |
64 | | struct uffdio_register uffdio_register = {}; |
65 | | struct uffd_msg uffd_msgs[MAX_MESSAGES]; |
66 | | |
67 | | std::string filename = std::string(""); |
68 | | |
69 | | int64_t page_limit = -1; |
70 | | int64_t pages_used = 0; |
71 | | |
72 | | size_t file_size = 0; |
73 | | size_t page_size = 0; |
74 | | void *page_ptr = nullptr; |
75 | | size_t vma_size = 0; |
76 | | void *vma_ptr = nullptr; |
77 | | CPLJoinableThread *thread = nullptr; |
78 | | }; |
79 | | |
80 | | static void uffd_cleanup(void *ptr) |
81 | 0 | { |
82 | 0 | struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr); |
83 | |
|
84 | 0 | if (!ctx) |
85 | 0 | return; |
86 | | |
87 | | // Signal shutdown |
88 | 0 | ctx->keep_going = false; |
89 | 0 | if (ctx->thread) |
90 | 0 | { |
91 | 0 | CPLJoinThread(ctx->thread); |
92 | 0 | ctx->thread = nullptr; |
93 | 0 | } |
94 | |
|
95 | 0 | if (ctx->uffd != -1) |
96 | 0 | { |
97 | 0 | ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register); |
98 | 0 | close(ctx->uffd); |
99 | 0 | ctx->uffd = -1; |
100 | 0 | } |
101 | 0 | if (ctx->page_ptr && ctx->page_size) |
102 | 0 | munmap(ctx->page_ptr, ctx->page_size); |
103 | 0 | if (ctx->vma_ptr && ctx->vma_size) |
104 | 0 | munmap(ctx->vma_ptr, ctx->vma_size); |
105 | 0 | ctx->page_ptr = nullptr; |
106 | 0 | ctx->vma_ptr = nullptr; |
107 | 0 | ctx->page_size = 0; |
108 | 0 | ctx->vma_size = 0; |
109 | 0 | ctx->pages_used = 0; |
110 | 0 | ctx->page_limit = 0; |
111 | |
|
112 | 0 | delete ctx; |
113 | |
|
114 | 0 | return; |
115 | 0 | } |
116 | | |
117 | | #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT |
118 | | #pragma GCC diagnostic push |
119 | | #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" |
120 | | #endif |
121 | | static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; |
122 | | #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT |
123 | | #pragma GCC diagnostic pop |
124 | | #endif |
125 | | |
126 | | static int64_t get_page_limit() |
127 | 0 | { |
128 | 0 | int64_t retval; |
129 | 0 | const char *variable = CPLGetConfigOption(GDAL_UFFD_LIMIT, nullptr); |
130 | |
|
131 | 0 | if (variable && sscanf(variable, "%" PRId64, &retval) == 1) |
132 | 0 | return retval; |
133 | 0 | else |
134 | 0 | return -1; |
135 | 0 | } |
136 | | |
137 | | static void cpl_uffd_fault_handler(void *ptr) |
138 | 0 | { |
139 | 0 | struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr); |
140 | 0 | struct uffdio_copy uffdio_copy; |
141 | 0 | struct pollfd pollfd; |
142 | | |
143 | | // Setup pollfd structure |
144 | 0 | pollfd.fd = ctx->uffd; |
145 | 0 | pollfd.events = POLLIN; |
146 | | |
147 | | // Open asset for reading |
148 | 0 | VSILFILE *file = VSIFOpenL(ctx->filename.c_str(), "rb"); |
149 | |
|
150 | 0 | if (!file) |
151 | 0 | return; |
152 | | |
153 | | // Loop until told to stop |
154 | 0 | while (ctx->keep_going) |
155 | 0 | { |
156 | | // Poll for event |
157 | 0 | if (poll(&pollfd, 1, 16) == -1) |
158 | 0 | break; // 60Hz when no demand |
159 | 0 | if ((pollfd.revents & POLLERR) || (pollfd.revents & POLLNVAL)) |
160 | 0 | break; |
161 | 0 | if (!(pollfd.revents & POLLIN)) |
162 | 0 | continue; |
163 | | |
164 | | // Read page fault events |
165 | 0 | ssize_t bytes_read = static_cast<ssize_t>( |
166 | 0 | read(ctx->uffd, ctx->uffd_msgs, MAX_MESSAGES * sizeof(uffd_msg))); |
167 | 0 | if (bytes_read < 1) |
168 | 0 | { |
169 | 0 | if (errno == EWOULDBLOCK) |
170 | 0 | continue; |
171 | 0 | else |
172 | 0 | break; |
173 | 0 | } |
174 | | |
175 | | // If too many pages are in use, evict all pages (evict them from |
176 | | // RAM and swap, not just to swap). It is impossible to control |
177 | | // which/when threads access the VMA, so access to the VMA has to |
178 | | // forbidden while the activity is in progress. |
179 | | // |
180 | | // That is done by (1) installing special handlers for SIGSEGV and |
181 | | // SIGBUS, (2) mprotecting the VMA so that any threads accessing |
182 | | // it receive either SIGSEGV or SIGBUS (which one is apparently a |
183 | | // function of the C library, at least on one non-Linux GNU |
184 | | // system[1]), (3) unregistering the VMA from userfaultfd, |
185 | | // remapping the VMA to evict the pages, registering the VMA |
186 | | // again, (4) making the VMA accessible again, and finally (5) |
187 | | // restoring the previous signal-handling behavior. |
188 | | // |
189 | | // [1] https://lists.debian.org/debian-bsd/2011/05/msg00032.html |
190 | 0 | if (ctx->page_limit > 0) |
191 | 0 | { |
192 | 0 | pthread_mutex_lock(&mutex); |
193 | 0 | if (ctx->pages_used > ctx->page_limit) |
194 | 0 | { |
195 | 0 | struct sigaction segv; |
196 | 0 | struct sigaction old_segv; |
197 | 0 | struct sigaction bus; |
198 | 0 | struct sigaction old_bus; |
199 | |
|
200 | 0 | memset(&segv, 0, sizeof(segv)); |
201 | 0 | memset(&old_segv, 0, sizeof(old_segv)); |
202 | 0 | memset(&bus, 0, sizeof(bus)); |
203 | 0 | memset(&old_bus, 0, sizeof(old_bus)); |
204 | | |
205 | | // Step 1 from the block comment above |
206 | 0 | segv.sa_handler = signal_handler; |
207 | 0 | bus.sa_handler = signal_handler; |
208 | 0 | if (sigaction(SIGSEGV, &segv, &old_segv) == -1) |
209 | 0 | { |
210 | 0 | CPLError( |
211 | 0 | CE_Failure, CPLE_AppDefined, |
212 | 0 | "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed"); |
213 | 0 | pthread_mutex_unlock(&mutex); |
214 | 0 | break; |
215 | 0 | } |
216 | 0 | if (sigaction(SIGBUS, &bus, &old_bus) == -1) |
217 | 0 | { |
218 | 0 | CPLError( |
219 | 0 | CE_Failure, CPLE_AppDefined, |
220 | 0 | "cpl_uffd_fault_handler: sigaction(SIGBUS) failed"); |
221 | 0 | pthread_mutex_unlock(&mutex); |
222 | 0 | break; |
223 | 0 | } |
224 | | |
225 | | // WARNING: LACK OF THREAD-SAFETY. |
226 | | // |
227 | | // For example, if a user program (or another part of the |
228 | | // library) installs a SIGSEGV or SIGBUS handler from another |
229 | | // thread after this one has installed its handlers but before |
230 | | // this one uninstalls its handlers, the intervening handler |
231 | | // will be eliminated. There are other examples, as well, but |
232 | | // there can only be a problems with other threads because the |
233 | | // faulting thread is blocked here. |
234 | | // |
235 | | // This implies that one should not use cpl_virtualmem.h API |
236 | | // while other threads are actively generating faults that use |
237 | | // this mechanism. |
238 | | // |
239 | | // Having multiple active threads that use this mechanism but |
240 | | // with no changes to signal-handling in other threads is NOT a |
241 | | // problem. |
242 | | |
243 | | // Step 2 |
244 | 0 | if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_NONE) == -1) |
245 | 0 | { |
246 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
247 | 0 | "cpl_uffd_fault_handler: mprotect() failed"); |
248 | 0 | pthread_mutex_unlock(&mutex); |
249 | 0 | break; |
250 | 0 | } |
251 | | |
252 | | // Step 3 |
253 | 0 | if (ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register)) |
254 | 0 | { |
255 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
256 | 0 | "cpl_uffd_fault_handler: ioctl(UFFDIO_UNREGISTER) " |
257 | 0 | "failed"); |
258 | 0 | pthread_mutex_unlock(&mutex); |
259 | 0 | break; |
260 | 0 | } |
261 | 0 | ctx->vma_ptr = |
262 | 0 | mmap(ctx->vma_ptr, ctx->vma_size, PROT_NONE, |
263 | 0 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); |
264 | 0 | if (ctx->vma_ptr == BAD_MMAP) |
265 | 0 | { |
266 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
267 | 0 | "cpl_uffd_fault_handler: mmap() failed"); |
268 | 0 | ctx->vma_ptr = nullptr; |
269 | 0 | pthread_mutex_unlock(&mutex); |
270 | 0 | break; |
271 | 0 | } |
272 | 0 | ctx->pages_used = 0; |
273 | 0 | if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register)) |
274 | 0 | { |
275 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
276 | 0 | "cpl_uffd_fault_handler: ioctl(UFFDIO_REGISTER) " |
277 | 0 | "failed"); |
278 | 0 | pthread_mutex_unlock(&mutex); |
279 | 0 | break; |
280 | 0 | } |
281 | | |
282 | | // Step 4. Problem: A thread might attempt to read here (before |
283 | | // the mprotect) and receive a SIGSEGV or SIGBUS. |
284 | 0 | if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_READ) == -1) |
285 | 0 | { |
286 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
287 | 0 | "cpl_uffd_fault_handler: mprotect() failed"); |
288 | 0 | pthread_mutex_unlock(&mutex); |
289 | 0 | break; |
290 | 0 | } |
291 | | |
292 | | // Step 5. Solution: Cannot unregister special handlers before |
293 | | // any such threads have been handled by them, so sleep for |
294 | | // 1/100th of a second. |
295 | | // Coverity complains about sleeping under a mutex |
296 | 0 | #ifndef __COVERITY__ |
297 | | // coverity[sleep] |
298 | 0 | usleep(10000); |
299 | 0 | #endif |
300 | 0 | if (sigaction(SIGSEGV, &old_segv, nullptr) == -1) |
301 | 0 | { |
302 | 0 | CPLError( |
303 | 0 | CE_Failure, CPLE_AppDefined, |
304 | 0 | "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed"); |
305 | 0 | pthread_mutex_unlock(&mutex); |
306 | 0 | break; |
307 | 0 | } |
308 | 0 | if (sigaction(SIGBUS, &old_bus, nullptr) == -1) |
309 | 0 | { |
310 | 0 | CPLError( |
311 | 0 | CE_Failure, CPLE_AppDefined, |
312 | 0 | "cpl_uffd_fault_handler: sigaction(SIGBUS) failed"); |
313 | 0 | pthread_mutex_unlock(&mutex); |
314 | 0 | break; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | pthread_mutex_unlock(&mutex); |
318 | 0 | } |
319 | | |
320 | | // Handle page fault events |
321 | 0 | for (int i = 0; i < static_cast<int>(bytes_read / sizeof(uffd_msg)); |
322 | 0 | ++i) |
323 | 0 | { |
324 | 0 | const uintptr_t fault_addr = |
325 | 0 | ctx->uffd_msgs[i].arg.pagefault.address & ~(ctx->page_size - 1); |
326 | 0 | const uintptr_t offset = |
327 | 0 | fault_addr - reinterpret_cast<uintptr_t>(ctx->vma_ptr); |
328 | 0 | size_t bytes_needed = static_cast<size_t>(ctx->file_size - offset); |
329 | 0 | if (bytes_needed > ctx->page_size) |
330 | 0 | bytes_needed = ctx->page_size; |
331 | | |
332 | | // Copy data into page |
333 | 0 | if (VSIFSeekL(file, offset, SEEK_SET) != 0 || |
334 | 0 | VSIFReadL(ctx->page_ptr, bytes_needed, 1, file) != 1) |
335 | 0 | { |
336 | 0 | CPLError(CE_Failure, CPLE_FileIO, |
337 | 0 | "Cannot get %d bytes at offset " CPL_FRMT_GUIB " of " |
338 | 0 | "file %s", |
339 | 0 | static_cast<int>(bytes_needed), |
340 | 0 | static_cast<GUIntBig>(offset), ctx->filename.c_str()); |
341 | 0 | memset(ctx->page_ptr, 0, bytes_needed); |
342 | 0 | } |
343 | 0 | ctx->pages_used++; |
344 | | |
345 | | // Use the page to fulfill the page fault |
346 | 0 | uffdio_copy.src = reinterpret_cast<uintptr_t>(ctx->page_ptr); |
347 | 0 | uffdio_copy.dst = fault_addr; |
348 | 0 | uffdio_copy.len = static_cast<uintptr_t>(ctx->page_size); |
349 | 0 | uffdio_copy.mode = 0; |
350 | 0 | uffdio_copy.copy = 0; |
351 | 0 | if (ioctl(ctx->uffd, UFFDIO_COPY, &uffdio_copy) == -1) |
352 | 0 | { |
353 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
354 | 0 | "ioctl(UFFDIO_COPY) failed"); |
355 | 0 | break; |
356 | 0 | } |
357 | 0 | } |
358 | 0 | } // end of while loop |
359 | | |
360 | | // Return resources |
361 | 0 | VSIFCloseL(file); |
362 | 0 | } |
363 | | |
364 | | static void signal_handler(int signal) |
365 | 0 | { |
366 | 0 | if (signal == SIGSEGV || signal == SIGBUS) |
367 | 0 | sched_yield(); |
368 | 0 | return; |
369 | 0 | } |
370 | | |
371 | | bool CPLIsUserFaultMappingSupported() |
372 | 0 | { |
373 | | // Check the Linux kernel version. Linux 4.3 or newer is needed for |
374 | | // userfaultfd. |
375 | 0 | int major = 0, minor = 0; |
376 | 0 | struct utsname utsname; |
377 | |
|
378 | 0 | if (uname(&utsname)) |
379 | 0 | return false; |
380 | 0 | sscanf(utsname.release, "%d.%d", &major, &minor); |
381 | 0 | if (major < 4) |
382 | 0 | return false; |
383 | 0 | if (major == 4 && minor < 3) |
384 | 0 | return false; |
385 | | |
386 | 0 | static int nEnableUserFaultFD = -1; |
387 | 0 | if (nEnableUserFaultFD < 0) |
388 | 0 | { |
389 | 0 | nEnableUserFaultFD = |
390 | 0 | CPLTestBool(CPLGetConfigOption("CPL_ENABLE_USERFAULTFD", "YES")); |
391 | 0 | } |
392 | 0 | if (!nEnableUserFaultFD) |
393 | 0 | return false; |
394 | | |
395 | | // Since kernel 5.2, raw userfaultfd is disabled since if the fault |
396 | | // originates from the kernel, that could lead to easier exploitation of |
397 | | // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to |
398 | | // restrict the mechanism to faults occurring only from user space, which is |
399 | | // likely to be our use case. |
400 | 0 | int uffd = static_cast<int>(syscall( |
401 | 0 | __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)); |
402 | 0 | if (uffd == -1 && errno == EINVAL) |
403 | 0 | uffd = |
404 | 0 | static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK)); |
405 | 0 | if (uffd == -1) |
406 | 0 | { |
407 | 0 | const int l_errno = errno; |
408 | 0 | if (l_errno == EPERM) |
409 | 0 | { |
410 | | // Since kernel 5.2 |
411 | 0 | CPLDebug( |
412 | 0 | "GDAL", |
413 | 0 | "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) " |
414 | 0 | "failed: " |
415 | 0 | "insufficient permission. add CAP_SYS_PTRACE capability, or " |
416 | 0 | "set /proc/sys/vm/unprivileged_userfaultfd to 1"); |
417 | 0 | } |
418 | 0 | else |
419 | 0 | { |
420 | 0 | CPLDebug( |
421 | 0 | "GDAL", |
422 | 0 | "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) " |
423 | 0 | "failed: " |
424 | 0 | "error = %d", |
425 | 0 | l_errno); |
426 | 0 | } |
427 | 0 | nEnableUserFaultFD = false; |
428 | 0 | return false; |
429 | 0 | } |
430 | 0 | close(uffd); |
431 | 0 | nEnableUserFaultFD = true; |
432 | 0 | return true; |
433 | 0 | } |
434 | | |
435 | | /* |
436 | | * Returns nullptr on failure, a valid pointer on success. |
437 | | */ |
438 | | cpl_uffd_context *CPLCreateUserFaultMapping(const char *pszFilename, |
439 | | void **ppVma, uint64_t *pnVmaSize) |
440 | 0 | { |
441 | 0 | VSIStatBufL statbuf; |
442 | 0 | struct cpl_uffd_context *ctx = nullptr; |
443 | |
|
444 | 0 | if (!CPLIsUserFaultMappingSupported()) |
445 | 0 | { |
446 | 0 | CPLError( |
447 | 0 | CE_Failure, CPLE_NotSupported, |
448 | 0 | "CPLCreateUserFaultMapping(): Linux kernel 4.3 or newer needed"); |
449 | 0 | return nullptr; |
450 | 0 | } |
451 | | |
452 | | // Get the size of the asset |
453 | 0 | if (VSIStatL(pszFilename, &statbuf)) |
454 | 0 | return nullptr; |
455 | | |
456 | | // Setup the `cpl_uffd_context` struct |
457 | 0 | ctx = new cpl_uffd_context(); |
458 | 0 | ctx->keep_going = true; |
459 | 0 | ctx->filename = std::string(pszFilename); |
460 | 0 | ctx->page_limit = get_page_limit(); |
461 | 0 | ctx->pages_used = 0; |
462 | 0 | ctx->file_size = static_cast<size_t>(statbuf.st_size); |
463 | 0 | ctx->page_size = static_cast<size_t>(std::max(1L, sysconf(_SC_PAGESIZE))); |
464 | 0 | ctx->vma_size = static_cast<size_t>( |
465 | 0 | ((static_cast<vsi_l_offset>(statbuf.st_size) / ctx->page_size) + 1) * |
466 | 0 | ctx->page_size); |
467 | 0 | if (ctx->vma_size < static_cast<vsi_l_offset>(statbuf.st_size)) |
468 | 0 | { // Check for overflow |
469 | 0 | uffd_cleanup(ctx); |
470 | 0 | CPLError( |
471 | 0 | CE_Failure, CPLE_AppDefined, |
472 | 0 | "CPLCreateUserFaultMapping(): File too large for architecture"); |
473 | 0 | return nullptr; |
474 | 0 | } |
475 | | |
476 | | // If the mmap failed, free resources and return |
477 | 0 | ctx->vma_ptr = mmap(nullptr, ctx->vma_size, PROT_READ, |
478 | 0 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
479 | 0 | if (ctx->vma_ptr == BAD_MMAP) |
480 | 0 | { |
481 | 0 | ctx->vma_ptr = nullptr; |
482 | 0 | uffd_cleanup(ctx); |
483 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
484 | 0 | "CPLCreateUserFaultMapping(): mmap() failed"); |
485 | 0 | return nullptr; |
486 | 0 | } |
487 | | |
488 | | // Attempt to acquire a scratch page to use to fulfill requests. |
489 | 0 | ctx->page_ptr = |
490 | 0 | mmap(nullptr, static_cast<size_t>(ctx->page_size), |
491 | 0 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
492 | 0 | if (ctx->page_ptr == BAD_MMAP) |
493 | 0 | { |
494 | 0 | ctx->page_ptr = nullptr; |
495 | 0 | uffd_cleanup(ctx); |
496 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
497 | 0 | "CPLCreateUserFaultMapping(): mmap() failed"); |
498 | 0 | return nullptr; |
499 | 0 | } |
500 | | |
501 | | // Get userfaultfd |
502 | | |
503 | | // Since kernel 5.2, raw userfaultfd is disabled since if the fault |
504 | | // originates from the kernel, that could lead to easier exploitation of |
505 | | // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to |
506 | | // restrict the mechanism to faults occurring only from user space, which is |
507 | | // likely to be our use case. |
508 | 0 | ctx->uffd = static_cast<int>(syscall( |
509 | 0 | __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)); |
510 | 0 | if (ctx->uffd == -1 && errno == EINVAL) |
511 | 0 | ctx->uffd = |
512 | 0 | static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK)); |
513 | 0 | if (ctx->uffd == -1) |
514 | 0 | { |
515 | 0 | const int l_errno = errno; |
516 | 0 | ctx->uffd = -1; |
517 | 0 | uffd_cleanup(ctx); |
518 | 0 | if (l_errno == EPERM) |
519 | 0 | { |
520 | | // Since kernel 5.2 |
521 | 0 | CPLError( |
522 | 0 | CE_Failure, CPLE_AppDefined, |
523 | 0 | "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) " |
524 | 0 | "failed: " |
525 | 0 | "insufficient permission. add CAP_SYS_PTRACE capability, or " |
526 | 0 | "set /proc/sys/vm/unprivileged_userfaultfd to 1"); |
527 | 0 | } |
528 | 0 | else |
529 | 0 | { |
530 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
531 | 0 | "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) " |
532 | 0 | "failed: " |
533 | 0 | "error = %d", |
534 | 0 | l_errno); |
535 | 0 | } |
536 | 0 | return nullptr; |
537 | 0 | } |
538 | | |
539 | | // Query API |
540 | 0 | { |
541 | 0 | struct uffdio_api uffdio_api = {}; |
542 | |
|
543 | 0 | uffdio_api.api = UFFD_API; |
544 | 0 | uffdio_api.features = 0; |
545 | |
|
546 | 0 | if (ioctl(ctx->uffd, UFFDIO_API, &uffdio_api) == -1) |
547 | 0 | { |
548 | 0 | uffd_cleanup(ctx); |
549 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
550 | 0 | "CPLCreateUserFaultMapping(): ioctl(UFFDIO_API) failed"); |
551 | 0 | return nullptr; |
552 | 0 | } |
553 | 0 | } |
554 | | |
555 | | // Register memory range |
556 | 0 | ctx->uffdio_register.range.start = |
557 | 0 | reinterpret_cast<uintptr_t>(ctx->vma_ptr); |
558 | 0 | ctx->uffdio_register.range.len = ctx->vma_size; |
559 | 0 | ctx->uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; |
560 | |
|
561 | 0 | if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register) == -1) |
562 | 0 | { |
563 | 0 | uffd_cleanup(ctx); |
564 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
565 | 0 | "CPLCreateUserFaultMapping(): ioctl(UFFDIO_REGISTER) failed"); |
566 | 0 | return nullptr; |
567 | 0 | } |
568 | | |
569 | | // Start handler thread |
570 | 0 | ctx->thread = CPLCreateJoinableThread(cpl_uffd_fault_handler, ctx); |
571 | 0 | if (ctx->thread == nullptr) |
572 | 0 | { |
573 | 0 | CPLError( |
574 | 0 | CE_Failure, CPLE_AppDefined, |
575 | 0 | "CPLCreateUserFaultMapping(): CPLCreateJoinableThread() failed"); |
576 | 0 | uffd_cleanup(ctx); |
577 | 0 | return nullptr; |
578 | 0 | } |
579 | | |
580 | 0 | *ppVma = ctx->vma_ptr; |
581 | 0 | *pnVmaSize = ctx->vma_size; |
582 | 0 | return ctx; |
583 | 0 | } |
584 | | |
585 | | void CPLDeleteUserFaultMapping(cpl_uffd_context *ctx) |
586 | 13.5k | { |
587 | 13.5k | if (ctx) |
588 | 0 | { |
589 | 0 | uffd_cleanup(ctx); |
590 | 0 | } |
591 | 13.5k | } |
592 | | |
593 | | #endif // ENABLE_UFFD |