Coverage Report

Created: 2025-11-24 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/unit/src/nxt_isolation.c
Line
Count
Source
1
/*
2
 * Copyright (C) NGINX, Inc.
3
 */
4
5
#include <nxt_main.h>
6
#include <nxt_application.h>
7
#include <nxt_process.h>
8
#include <nxt_isolation.h>
9
#include <nxt_cgroup.h>
10
11
#if (NXT_HAVE_MNTENT_H)
12
#include <mntent.h>
13
#endif
14
15
16
static nxt_int_t nxt_isolation_set(nxt_task_t *task,
17
    nxt_conf_value_t *isolation, nxt_process_t *process);
18
19
#if (NXT_HAVE_CGROUP)
20
static nxt_int_t nxt_isolation_set_cgroup(nxt_task_t *task,
21
    nxt_conf_value_t *isolation, nxt_process_t *process);
22
#endif
23
24
#if (NXT_HAVE_LINUX_NS)
25
static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
26
    nxt_conf_value_t *isolation, nxt_process_t *process);
27
static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
28
    nxt_conf_value_t *namespaces, nxt_clone_t *clone);
29
#endif
30
31
#if (NXT_HAVE_CLONE_NEWUSER)
32
static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
33
    nxt_conf_value_t *isolation, nxt_process_t *process);
34
static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
35
    nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
36
    nxt_clone_credential_map_t *map);
37
static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
38
    nxt_process_t *process);
39
#endif
40
41
#if (NXT_HAVE_ISOLATION_ROOTFS)
42
static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
43
    nxt_conf_value_t *isolation, nxt_process_t *process);
44
static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task,
45
    nxt_conf_value_t *isolation, nxt_process_t *process);
46
static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
47
    nxt_process_t *process, nxt_str_t *app_type);
48
static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
49
    nxt_process_t *process, nxt_array_t *syspaths);
50
static int nxt_cdecl nxt_isolation_mount_compare(const void *v1,
51
    const void *v2);
52
static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
53
54
#if (NXT_HAVE_LINUX_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
55
static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
56
static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
57
    const char *rootfs);
58
nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
59
#endif
60
61
static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
62
#endif
63
64
#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
65
static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
66
    nxt_conf_value_t *isolation, nxt_process_t *process);
67
#endif
68
69
70
nxt_int_t
71
nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
72
    nxt_mp_t *mp)
73
0
{
74
0
    nxt_int_t              cap_setid;
75
0
    nxt_int_t              ret;
76
0
    nxt_runtime_t          *rt;
77
0
    nxt_common_app_conf_t  *app_conf;
78
79
0
    rt = task->thread->runtime;
80
0
    app_conf = process->data.app;
81
0
    cap_setid = rt->capabilities.setid;
82
83
0
#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
84
0
    process->isolation.new_privs = 1;
85
0
#endif
86
87
0
    if (app_conf->isolation != NULL) {
88
0
        ret = nxt_isolation_set(task, app_conf->isolation, process);
89
0
        if (nxt_slow_path(ret != NXT_OK)) {
90
0
            return ret;
91
0
        }
92
0
    }
93
94
0
#if (NXT_HAVE_CLONE_NEWUSER)
95
0
    if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
96
0
        cap_setid = 1;
97
0
    }
98
0
#endif
99
100
0
    if (cap_setid) {
101
0
        ret = nxt_process_creds_set(task, process, &app_conf->user,
102
0
                                    &app_conf->group);
103
104
0
        if (nxt_slow_path(ret != NXT_OK)) {
105
0
            return ret;
106
0
        }
107
108
0
    } else {
109
0
        if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
110
0
                        nxt_strlen(rt->user_cred.user)))
111
0
        {
112
0
            nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
113
0
                      "missing capabilities", &app_conf->user, &app_conf->name);
114
115
0
            return NXT_ERROR;
116
0
        }
117
118
0
        if (app_conf->group.length > 0
119
0
            && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
120
0
                           nxt_strlen(rt->group)))
121
0
        {
122
0
            nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
123
0
                            "missing capabilities", &app_conf->group,
124
0
                            &app_conf->name);
125
126
0
            return NXT_ERROR;
127
0
        }
128
0
    }
129
130
0
#if (NXT_HAVE_ISOLATION_ROOTFS)
131
0
    if (process->isolation.rootfs != NULL) {
132
0
        nxt_int_t  has_mnt;
133
134
0
        ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
135
0
        if (nxt_slow_path(ret != NXT_OK)) {
136
0
            return ret;
137
0
        }
138
139
0
#if (NXT_HAVE_CLONE_NEWNS)
140
0
        has_mnt = nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS);
141
#else
142
        has_mnt = 0;
143
#endif
144
145
0
        if (process->user_cred->uid == 0 && !has_mnt) {
146
0
            nxt_log(task, NXT_LOG_WARN,
147
0
                    "setting user \"root\" with \"rootfs\" is unsafe without "
148
0
                    "\"mount\" namespace isolation");
149
0
        }
150
0
    }
151
0
#endif
152
153
0
#if (NXT_HAVE_CLONE_NEWUSER)
154
0
    ret = nxt_isolation_vldt_creds(task, process);
155
0
    if (nxt_slow_path(ret != NXT_OK)) {
156
0
        return ret;
157
0
    }
158
0
#endif
159
160
0
    return NXT_OK;
161
0
}
162
163
164
static nxt_int_t
165
nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
166
    nxt_process_t *process)
167
0
{
168
#if (NXT_HAVE_CGROUP)
169
    if (nxt_slow_path(nxt_isolation_set_cgroup(task, isolation, process)
170
                      != NXT_OK))
171
    {
172
        return NXT_ERROR;
173
    }
174
#endif
175
176
0
#if (NXT_HAVE_LINUX_NS)
177
0
    if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
178
0
                      != NXT_OK))
179
0
    {
180
0
        return NXT_ERROR;
181
0
    }
182
0
#endif
183
184
0
#if (NXT_HAVE_CLONE_NEWUSER)
185
0
    if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
186
0
                      != NXT_OK))
187
0
    {
188
0
        return NXT_ERROR;
189
0
    }
190
0
#endif
191
192
0
#if (NXT_HAVE_ISOLATION_ROOTFS)
193
0
    if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
194
0
                      != NXT_OK))
195
0
    {
196
0
        return NXT_ERROR;
197
0
    }
198
199
0
    if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process)
200
0
                      != NXT_OK))
201
0
    {
202
0
        return NXT_ERROR;
203
0
    }
204
0
#endif
205
206
0
#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
207
0
    if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
208
0
                      != NXT_OK))
209
0
    {
210
0
        return NXT_ERROR;
211
0
    }
212
0
#endif
213
214
0
    return NXT_OK;
215
0
}
216
217
218
#if (NXT_HAVE_CGROUP)
219
220
static nxt_int_t
221
nxt_isolation_set_cgroup(nxt_task_t *task, nxt_conf_value_t *isolation,
222
    nxt_process_t *process)
223
{
224
    nxt_str_t         str;
225
    nxt_conf_value_t  *obj;
226
227
    static const nxt_str_t  cgname = nxt_string("cgroup");
228
    static const nxt_str_t  path = nxt_string("path");
229
230
    obj = nxt_conf_get_object_member(isolation, &cgname, NULL);
231
    if (obj == NULL) {
232
        return NXT_OK;
233
    }
234
235
    obj = nxt_conf_get_object_member(obj, &path, NULL);
236
    if (obj == NULL) {
237
        return NXT_ERROR;
238
    }
239
240
    nxt_conf_get_string(obj, &str);
241
    process->isolation.cgroup.path = nxt_mp_alloc(process->mem_pool,
242
                                                  str.length + 1);
243
    nxt_memcpy(process->isolation.cgroup.path, str.start, str.length);
244
    process->isolation.cgroup.path[str.length] = '\0';
245
246
    process->isolation.cgroup_cleanup = nxt_cgroup_cleanup;
247
248
    return NXT_OK;
249
}
250
251
#endif
252
253
254
#if (NXT_HAVE_LINUX_NS)
255
256
static nxt_int_t
257
nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
258
    nxt_process_t *process)
259
0
{
260
0
    nxt_int_t         ret;
261
0
    nxt_conf_value_t  *obj;
262
263
0
    static const nxt_str_t  nsname = nxt_string("namespaces");
264
265
0
    obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
266
0
    if (obj != NULL) {
267
0
        ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
268
0
        if (nxt_slow_path(ret != NXT_OK)) {
269
0
            return NXT_ERROR;
270
0
        }
271
0
    }
272
273
0
    return NXT_OK;
274
0
}
275
276
#endif
277
278
279
#if (NXT_HAVE_CLONE_NEWUSER)
280
281
static nxt_int_t
282
nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
283
    nxt_process_t *process)
284
0
{
285
0
    nxt_int_t         ret;
286
0
    nxt_clone_t       *clone;
287
0
    nxt_conf_value_t  *array;
288
289
0
    static const nxt_str_t uidname = nxt_string("uidmap");
290
0
    static const nxt_str_t gidname = nxt_string("gidmap");
291
292
0
    clone = &process->isolation.clone;
293
294
0
    array = nxt_conf_get_object_member(isolation, &uidname, NULL);
295
0
    if (array != NULL) {
296
0
        ret = nxt_isolation_credential_map(task, process->mem_pool, array,
297
0
                                           &clone->uidmap);
298
299
0
        if (nxt_slow_path(ret != NXT_OK)) {
300
0
            return NXT_ERROR;
301
0
        }
302
0
    }
303
304
0
    array = nxt_conf_get_object_member(isolation, &gidname, NULL);
305
0
    if (array != NULL) {
306
0
        ret = nxt_isolation_credential_map(task, process->mem_pool, array,
307
0
                                           &clone->gidmap);
308
309
0
        if (nxt_slow_path(ret != NXT_OK)) {
310
0
            return NXT_ERROR;
311
0
        }
312
0
    }
313
314
0
    return NXT_OK;
315
0
}
316
317
318
static nxt_int_t
319
nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
320
    nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
321
0
{
322
0
    nxt_int_t         ret;
323
0
    nxt_uint_t        i;
324
0
    nxt_conf_value_t  *obj;
325
326
0
    static const nxt_conf_map_t  nxt_clone_map_entry_conf[] = {
327
0
        {
328
0
            nxt_string("container"),
329
0
            NXT_CONF_MAP_INT64,
330
0
            offsetof(nxt_clone_map_entry_t, container),
331
0
        },
332
333
0
        {
334
0
            nxt_string("host"),
335
0
            NXT_CONF_MAP_INT64,
336
0
            offsetof(nxt_clone_map_entry_t, host),
337
0
        },
338
339
0
        {
340
0
            nxt_string("size"),
341
0
            NXT_CONF_MAP_INT64,
342
0
            offsetof(nxt_clone_map_entry_t, size),
343
0
        },
344
0
    };
345
346
0
    map->size = nxt_conf_array_elements_count(map_array);
347
348
0
    if (map->size == 0) {
349
0
        return NXT_OK;
350
0
    }
351
352
0
    map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
353
0
    if (nxt_slow_path(map->map == NULL)) {
354
0
        return NXT_ERROR;
355
0
    }
356
357
0
    for (i = 0; i < map->size; i++) {
358
0
        obj = nxt_conf_get_array_element(map_array, i);
359
360
0
        ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
361
0
                                  nxt_nitems(nxt_clone_map_entry_conf),
362
0
                                  map->map + i);
363
0
        if (nxt_slow_path(ret != NXT_OK)) {
364
0
            nxt_alert(task, "clone map entry map error");
365
0
            return NXT_ERROR;
366
0
        }
367
0
    }
368
369
0
    return NXT_OK;
370
0
}
371
372
373
static nxt_int_t
374
nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
375
0
{
376
0
    nxt_int_t         ret;
377
0
    nxt_clone_t       *clone;
378
0
    nxt_credential_t  *creds;
379
380
0
    clone = &process->isolation.clone;
381
0
    creds = process->user_cred;
382
383
0
    if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
384
0
        return NXT_OK;
385
0
    }
386
387
0
    if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
388
0
        if (nxt_slow_path(clone->uidmap.size > 0)) {
389
0
            nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
390
0
                    "\"isolation.namespaces.credential\" is false or unset");
391
392
0
            return NXT_ERROR;
393
0
        }
394
395
0
        if (nxt_slow_path(clone->gidmap.size > 0)) {
396
0
            nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
397
0
                    "\"isolation.namespaces.credential\" is false or unset");
398
399
0
            return NXT_ERROR;
400
0
        }
401
402
0
        return NXT_OK;
403
0
    }
404
405
0
    ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
406
0
    if (nxt_slow_path(ret != NXT_OK)) {
407
0
        return NXT_ERROR;
408
0
    }
409
410
0
    return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
411
0
}
412
413
#endif
414
415
416
#if (NXT_HAVE_LINUX_NS)
417
418
static nxt_int_t
419
nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
420
    nxt_clone_t *clone)
421
0
{
422
0
    uint32_t          index;
423
0
    nxt_str_t         name;
424
0
    nxt_int_t         flag;
425
0
    nxt_conf_value_t  *value;
426
427
0
    index = 0;
428
429
0
    for ( ;; ) {
430
0
        value = nxt_conf_next_object_member(namespaces, &name, &index);
431
432
0
        if (value == NULL) {
433
0
            break;
434
0
        }
435
436
0
        flag = 0;
437
438
0
#if (NXT_HAVE_CLONE_NEWUSER)
439
0
        if (nxt_str_eq(&name, "credential", 10)) {
440
0
            flag = CLONE_NEWUSER;
441
0
        }
442
0
#endif
443
444
0
#if (NXT_HAVE_CLONE_NEWPID)
445
0
        if (nxt_str_eq(&name, "pid", 3)) {
446
0
            flag = CLONE_NEWPID;
447
0
        }
448
0
#endif
449
450
0
#if (NXT_HAVE_CLONE_NEWNET)
451
0
        if (nxt_str_eq(&name, "network", 7)) {
452
0
            flag = CLONE_NEWNET;
453
0
        }
454
0
#endif
455
456
0
#if (NXT_HAVE_CLONE_NEWUTS)
457
0
        if (nxt_str_eq(&name, "uname", 5)) {
458
0
            flag = CLONE_NEWUTS;
459
0
        }
460
0
#endif
461
462
0
#if (NXT_HAVE_CLONE_NEWNS)
463
0
        if (nxt_str_eq(&name, "mount", 5)) {
464
0
            flag = CLONE_NEWNS;
465
0
        }
466
0
#endif
467
468
0
#if (NXT_HAVE_CLONE_NEWCGROUP)
469
0
        if (nxt_str_eq(&name, "cgroup", 6)) {
470
0
            flag = CLONE_NEWCGROUP;
471
0
        }
472
0
#endif
473
474
0
        if (!flag) {
475
0
            nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
476
0
            return NXT_ERROR;
477
0
        }
478
479
0
        if (nxt_conf_get_boolean(value)) {
480
0
            clone->flags |= flag;
481
0
        }
482
0
    }
483
484
0
    return NXT_OK;
485
0
}
486
487
#endif
488
489
490
#if (NXT_HAVE_ISOLATION_ROOTFS)
491
492
static nxt_int_t
493
nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
494
    nxt_process_t *process)
495
0
{
496
0
    nxt_str_t         str;
497
0
    nxt_conf_value_t  *obj;
498
499
0
    static const nxt_str_t  rootfs_name = nxt_string("rootfs");
500
501
0
    obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
502
0
    if (obj != NULL) {
503
0
        nxt_conf_get_string(obj, &str);
504
505
0
        if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
506
0
            nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
507
0
                    "than \"/\" but given \"%V\"", &str);
508
509
0
            return NXT_ERROR;
510
0
        }
511
512
0
        if (str.start[str.length - 1] == '/') {
513
0
            str.length--;
514
0
        }
515
516
0
        process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
517
0
                                                 str.length + 1);
518
519
0
        if (nxt_slow_path(process->isolation.rootfs == NULL)) {
520
0
            return NXT_ERROR;
521
0
        }
522
523
0
        nxt_memcpy(process->isolation.rootfs, str.start, str.length);
524
525
0
        process->isolation.rootfs[str.length] = '\0';
526
0
    }
527
528
0
    return NXT_OK;
529
0
}
530
531
532
static nxt_int_t
533
nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation,
534
    nxt_process_t *process)
535
0
{
536
0
    nxt_conf_value_t         *conf, *value;
537
0
    nxt_process_automount_t  *automount;
538
539
0
    static const nxt_str_t  automount_name = nxt_string("automount");
540
0
    static const nxt_str_t  langdeps_name = nxt_string("language_deps");
541
0
    static const nxt_str_t  tmp_name = nxt_string("tmpfs");
542
0
    static const nxt_str_t  proc_name = nxt_string("procfs");
543
544
0
    automount = &process->isolation.automount;
545
546
0
    automount->language_deps = 1;
547
0
    automount->tmpfs = 1;
548
0
    automount->procfs = 1;
549
550
0
    conf = nxt_conf_get_object_member(isolation, &automount_name, NULL);
551
0
    if (conf != NULL) {
552
0
        value = nxt_conf_get_object_member(conf, &langdeps_name, NULL);
553
0
        if (value != NULL) {
554
0
            automount->language_deps = nxt_conf_get_boolean(value);
555
0
        }
556
557
0
        value = nxt_conf_get_object_member(conf, &tmp_name, NULL);
558
0
        if (value != NULL) {
559
0
            automount->tmpfs = nxt_conf_get_boolean(value);
560
0
        }
561
562
0
        value = nxt_conf_get_object_member(conf, &proc_name, NULL);
563
0
        if (value != NULL) {
564
0
            automount->procfs = nxt_conf_get_boolean(value);
565
0
        }
566
0
    }
567
568
0
    return NXT_OK;
569
0
}
570
571
572
static nxt_int_t
573
nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
574
    nxt_str_t *app_type)
575
0
{
576
0
    nxt_int_t              ret, cap_chroot;
577
0
    nxt_runtime_t          *rt;
578
0
    nxt_app_lang_module_t  *lang;
579
580
0
    rt = task->thread->runtime;
581
0
    cap_chroot = rt->capabilities.chroot;
582
0
    lang = nxt_app_lang_module(rt, app_type);
583
584
0
    nxt_assert(lang != NULL);
585
586
0
#if (NXT_HAVE_CLONE_NEWUSER)
587
0
    if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
588
0
        cap_chroot = 1;
589
0
    }
590
0
#endif
591
592
0
    if (!cap_chroot) {
593
0
        nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
594
0
        return NXT_ERROR;
595
0
    }
596
597
0
    ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
598
0
    if (nxt_slow_path(ret != NXT_OK)) {
599
0
        return NXT_ERROR;
600
0
    }
601
602
0
    process->isolation.cleanup = nxt_isolation_unmount_all;
603
604
0
    return NXT_OK;
605
0
}
606
607
608
static nxt_int_t
609
nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
610
    nxt_array_t *lang_mounts)
611
0
{
612
0
    u_char          *p;
613
0
    size_t          i, n, rootfs_len, len;
614
0
    nxt_mp_t        *mp;
615
0
    nxt_array_t     *mounts;
616
0
    const u_char    *rootfs;
617
0
    nxt_fs_mount_t  *mnt, *lang_mnt;
618
619
0
    mp = process->mem_pool;
620
621
    /* copy to init mem pool */
622
0
    mounts = nxt_array_copy(mp, NULL, lang_mounts);
623
0
    if (mounts == NULL) {
624
0
        return NXT_ERROR;
625
0
    }
626
627
0
    n = mounts->nelts;
628
0
    mnt = mounts->elts;
629
0
    lang_mnt = lang_mounts->elts;
630
631
0
    rootfs = process->isolation.rootfs;
632
0
    rootfs_len = nxt_strlen(rootfs);
633
634
0
    for (i = 0; i < n; i++) {
635
0
        len = nxt_strlen(lang_mnt[i].dst);
636
637
0
        mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
638
0
        if (nxt_slow_path(mnt[i].dst == NULL)) {
639
0
            return NXT_ERROR;
640
0
        }
641
642
0
        p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
643
0
        p = nxt_cpymem(p, lang_mnt[i].dst, len);
644
0
        *p = '\0';
645
0
    }
646
647
0
    if (process->isolation.automount.tmpfs) {
648
0
        mnt = nxt_array_add(mounts);
649
0
        if (nxt_slow_path(mnt == NULL)) {
650
0
            return NXT_ERROR;
651
0
        }
652
653
0
        mnt->src = (u_char *) "tmpfs";
654
0
        mnt->name = (u_char *) "tmpfs";
655
0
        mnt->type = NXT_FS_TMP;
656
0
        mnt->flags = (NXT_FS_FLAGS_NOSUID
657
0
                      | NXT_FS_FLAGS_NODEV
658
0
                      | NXT_FS_FLAGS_NOEXEC);
659
0
        mnt->data = (u_char *) "size=1m,mode=1777";
660
0
        mnt->builtin = 1;
661
0
        mnt->deps = 0;
662
663
0
        mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1);
664
0
        if (nxt_slow_path(mnt->dst == NULL)) {
665
0
            return NXT_ERROR;
666
0
        }
667
668
0
        p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
669
0
        p = nxt_cpymem(p, "/tmp", 4);
670
0
        *p = '\0';
671
0
    }
672
673
0
    if (process->isolation.automount.procfs) {
674
0
        mnt = nxt_array_add(mounts);
675
0
        if (nxt_slow_path(mnt == NULL)) {
676
0
            return NXT_ERROR;
677
0
        }
678
679
0
        mnt->name = (u_char *) "proc";
680
0
        mnt->type = NXT_FS_PROC;
681
0
        mnt->src = (u_char *) "none";
682
0
        mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1);
683
0
        if (nxt_slow_path(mnt->dst == NULL)) {
684
0
            return NXT_ERROR;
685
0
        }
686
687
0
        p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
688
0
        p = nxt_cpymem(p, "/proc", 5);
689
0
        *p = '\0';
690
691
0
        mnt->data = (u_char *) "";
692
0
        mnt->flags = NXT_FS_FLAGS_NOEXEC | NXT_FS_FLAGS_NOSUID;
693
0
        mnt->builtin = 1;
694
0
        mnt->deps = 0;
695
0
    }
696
697
0
    qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t),
698
0
          nxt_isolation_mount_compare);
699
700
0
    process->isolation.mounts = mounts;
701
702
0
    return NXT_OK;
703
0
}
704
705
706
static int nxt_cdecl
707
nxt_isolation_mount_compare(const void *v1, const void *v2)
708
0
{
709
0
    const nxt_fs_mount_t  *mnt1, *mnt2;
710
711
0
    mnt1 = v1;
712
0
    mnt2 = v2;
713
714
0
    return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src);
715
0
}
716
717
718
void
719
nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
720
0
{
721
0
    size_t                   n;
722
0
    nxt_array_t              *mounts;
723
0
    nxt_runtime_t            *rt;
724
0
    nxt_fs_mount_t           *mnt;
725
0
    nxt_process_automount_t  *automount;
726
727
0
    rt = task->thread->runtime;
728
729
0
    if (!rt->capabilities.setid) {
730
0
        return;
731
0
    }
732
733
0
    nxt_debug(task, "unmount all (%s)", process->name);
734
735
0
    automount = &process->isolation.automount;
736
0
    mounts = process->isolation.mounts;
737
0
    n = mounts->nelts;
738
0
    mnt = mounts->elts;
739
740
0
    while (n > 0) {
741
0
        n--;
742
743
0
        if (mnt[n].deps && !automount->language_deps) {
744
0
            continue;
745
0
        }
746
747
0
        nxt_fs_unmount(mnt[n].dst);
748
0
    }
749
0
}
750
751
752
nxt_int_t
753
nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
754
0
{
755
0
    size_t                   i, n;
756
0
    nxt_int_t                ret;
757
0
    struct stat              st;
758
0
    nxt_array_t              *mounts;
759
0
    const u_char             *dst;
760
0
    nxt_fs_mount_t           *mnt;
761
0
    nxt_process_automount_t  *automount;
762
763
0
    automount = &process->isolation.automount;
764
0
    mounts = process->isolation.mounts;
765
766
0
    n = mounts->nelts;
767
0
    mnt = mounts->elts;
768
769
0
    for (i = 0; i < n; i++) {
770
0
        dst = mnt[i].dst;
771
772
0
        if (mnt[i].deps && !automount->language_deps) {
773
0
            continue;
774
0
        }
775
776
0
        if (nxt_slow_path(mnt[i].type == NXT_FS_BIND
777
0
                          && stat((const char *) mnt[i].src, &st) != 0))
778
0
        {
779
0
            nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
780
0
            continue;
781
0
        }
782
783
0
        ret = nxt_fs_mkdir_p(dst, 0777);
784
0
        if (nxt_slow_path(ret != NXT_OK)) {
785
0
            nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
786
0
            goto undo;
787
0
        }
788
789
0
        ret = nxt_fs_mount(task, &mnt[i]);
790
0
        if (nxt_slow_path(ret != NXT_OK)) {
791
0
            goto undo;
792
0
        }
793
0
    }
794
795
0
    return NXT_OK;
796
797
0
undo:
798
799
0
    n = i + 1;
800
801
0
    for (i = 0; i < n; i++) {
802
0
        nxt_fs_unmount(mnt[i].dst);
803
0
    }
804
805
0
    return NXT_ERROR;
806
0
}
807
808
809
#if (NXT_HAVE_LINUX_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
810
811
nxt_int_t
812
nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
813
0
{
814
0
    char       *rootfs;
815
0
    nxt_int_t  ret;
816
817
0
    rootfs = (char *) process->isolation.rootfs;
818
819
0
    nxt_debug(task, "change root: %s", rootfs);
820
821
0
    if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) {
822
0
        ret = nxt_isolation_pivot_root(task, rootfs);
823
824
0
    } else {
825
0
        ret = nxt_isolation_chroot(task, rootfs);
826
0
    }
827
828
0
    if (nxt_fast_path(ret == NXT_OK)) {
829
0
        if (nxt_slow_path(chdir("/") < 0)) {
830
0
            nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
831
0
            return NXT_ERROR;
832
0
        }
833
0
    }
834
835
0
    return ret;
836
0
}
837
838
839
/*
840
 * pivot_root(2) can only be safely used with containers, otherwise it can
841
 * umount(2) the global root filesystem and screw up the machine.
842
 */
843
844
static nxt_int_t
845
nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
846
0
{
847
    /*
848
     * This implementation makes use of a kernel trick that works for ages
849
     * and now documented in Linux kernel 5.
850
     * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
851
     */
852
853
0
    if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
854
0
        nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno);
855
0
        return NXT_ERROR;
856
0
    }
857
858
0
    if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
859
0
        return NXT_ERROR;
860
0
    }
861
862
0
    if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
863
0
        nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
864
0
        return NXT_ERROR;
865
0
    }
866
867
0
    if (nxt_slow_path(chdir(path) != 0)) {
868
0
        nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
869
0
        return NXT_ERROR;
870
0
    }
871
872
0
    if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
873
0
        nxt_alert(task, "failed to pivot_root %E", nxt_errno);
874
0
        return NXT_ERROR;
875
0
    }
876
877
    /*
878
     * Demote the oldroot mount to avoid unmounts getting propagated to
879
     * the host.
880
     */
881
0
    if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
882
0
        nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
883
0
        return NXT_ERROR;
884
0
    }
885
886
0
    if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
887
0
        nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
888
0
        return NXT_ERROR;
889
0
    }
890
891
0
    return NXT_OK;
892
0
}
893
894
895
static nxt_int_t
896
nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
897
0
{
898
0
    char           *parent_mnt;
899
0
    FILE           *procfile;
900
0
    u_char         **mounts;
901
0
    size_t         len;
902
0
    uint8_t        *shared;
903
0
    nxt_int_t      ret, index, nmounts;
904
0
    struct mntent  *ent;
905
906
0
    static const char  *mount_path = "/proc/self/mounts";
907
908
0
    ret = NXT_ERROR;
909
0
    ent = NULL;
910
0
    shared = NULL;
911
0
    procfile = NULL;
912
0
    parent_mnt = NULL;
913
914
0
    nmounts = 256;
915
916
0
    mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
917
0
    if (nxt_slow_path(mounts == NULL)) {
918
0
        goto fail;
919
0
    }
920
921
0
    shared = nxt_malloc(nmounts);
922
0
    if (nxt_slow_path(shared == NULL)) {
923
0
        goto fail;
924
0
    }
925
926
0
    procfile = setmntent(mount_path, "r");
927
0
    if (nxt_slow_path(procfile == NULL)) {
928
0
        nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
929
930
0
        goto fail;
931
0
    }
932
933
0
    index = 0;
934
935
0
again:
936
937
0
    for ( ; index < nmounts; index++) {
938
0
        ent = getmntent(procfile);
939
0
        if (ent == NULL) {
940
0
            nmounts = index;
941
0
            break;
942
0
        }
943
944
0
        mounts[index] = (u_char *) strdup(ent->mnt_dir);
945
0
        shared[index] = hasmntopt(ent, "shared") != NULL;
946
0
    }
947
948
0
    if (ent != NULL) {
949
        /* there are still entries to be read */
950
951
0
        nmounts *= 2;
952
0
        mounts = nxt_realloc(mounts, nmounts);
953
0
        if (nxt_slow_path(mounts == NULL)) {
954
0
            goto fail;
955
0
        }
956
957
0
        shared = nxt_realloc(shared, nmounts);
958
0
        if (nxt_slow_path(shared == NULL)) {
959
0
            goto fail;
960
0
        }
961
962
0
        goto again;
963
0
    }
964
965
0
    for (index = 0; index < nmounts; index++) {
966
0
        if (nxt_strcmp(mounts[index], rootfs) == 0) {
967
0
            parent_mnt = (char *) rootfs;
968
0
            break;
969
0
        }
970
0
    }
971
972
0
    if (parent_mnt == NULL) {
973
0
        len = nxt_strlen(rootfs);
974
975
0
        parent_mnt = nxt_malloc(len + 1);
976
0
        if (parent_mnt == NULL) {
977
0
            goto fail;
978
0
        }
979
980
0
        nxt_memcpy(parent_mnt, rootfs, len);
981
0
        parent_mnt[len] = '\0';
982
983
0
        if (parent_mnt[len - 1] == '/') {
984
0
            parent_mnt[len - 1] = '\0';
985
0
            len--;
986
0
        }
987
988
0
        for ( ;; ) {
989
0
            for (index = 0; index < nmounts; index++) {
990
0
                if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
991
0
                    goto found;
992
0
                }
993
0
            }
994
995
0
            if (len == 1 && parent_mnt[0] == '/') {
996
0
                nxt_alert(task, "parent mount not found");
997
0
                goto fail;
998
0
            }
999
1000
            /* parent dir */
1001
0
            while (parent_mnt[len - 1] != '/' && len > 0) {
1002
0
                len--;
1003
0
            }
1004
1005
0
            if (nxt_slow_path(len == 0)) {
1006
0
                nxt_alert(task, "parent mount not found");
1007
0
                goto fail;
1008
0
            }
1009
1010
0
            if (len == 1) {
1011
0
                parent_mnt[len] = '\0';     /* / */
1012
0
            } else {
1013
0
                parent_mnt[len - 1] = '\0'; /* /<path> */
1014
0
            }
1015
0
        }
1016
0
    }
1017
1018
0
found:
1019
1020
0
    if (shared[index]) {
1021
0
        if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
1022
0
            nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
1023
0
                      nxt_errno);
1024
1025
0
            goto fail;
1026
0
        }
1027
0
    }
1028
1029
0
    ret = NXT_OK;
1030
1031
0
fail:
1032
1033
0
    if (procfile != NULL) {
1034
0
        endmntent(procfile);
1035
0
    }
1036
1037
0
    if (mounts != NULL) {
1038
0
        for (index = 0; index < nmounts; index++) {
1039
0
            nxt_free(mounts[index]);
1040
0
        }
1041
1042
0
        nxt_free(mounts);
1043
0
    }
1044
1045
0
    if (shared != NULL) {
1046
0
        nxt_free(shared);
1047
0
    }
1048
1049
0
    if (parent_mnt != NULL && parent_mnt != rootfs) {
1050
0
        nxt_free(parent_mnt);
1051
0
    }
1052
1053
0
    return ret;
1054
0
}
1055
1056
1057
nxt_inline int
1058
nxt_pivot_root(const char *new_root, const char *old_root)
1059
0
{
1060
0
    return syscall(SYS_pivot_root, new_root, old_root);
1061
0
}
1062
1063
1064
#else /* !(NXT_HAVE_LINUX_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
1065
1066
1067
nxt_int_t
1068
nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
1069
{
1070
    char       *rootfs;
1071
1072
    rootfs = (char *) process->isolation.rootfs;
1073
1074
    nxt_debug(task, "change root: %s", rootfs);
1075
1076
    if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
1077
        if (nxt_slow_path(chdir("/") < 0)) {
1078
            nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
1079
            return NXT_ERROR;
1080
        }
1081
1082
        return NXT_OK;
1083
    }
1084
1085
    return NXT_ERROR;
1086
}
1087
1088
#endif
1089
1090
1091
static nxt_int_t
1092
nxt_isolation_chroot(nxt_task_t *task, const char *path)
1093
0
{
1094
0
    if (nxt_slow_path(chroot(path) < 0)) {
1095
0
        nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
1096
0
        return NXT_ERROR;
1097
0
    }
1098
1099
0
    return NXT_OK;
1100
0
}
1101
1102
#endif /* NXT_HAVE_ISOLATION_ROOTFS */
1103
1104
1105
#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
1106
1107
static nxt_int_t
1108
nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
1109
    nxt_process_t *process)
1110
0
{
1111
0
    nxt_conf_value_t  *obj;
1112
1113
0
    static const nxt_str_t  new_privs_name = nxt_string("new_privs");
1114
1115
0
    obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
1116
0
    if (obj != NULL) {
1117
0
        process->isolation.new_privs = nxt_conf_get_boolean(obj);
1118
0
    }
1119
1120
0
    return NXT_OK;
1121
0
}
1122
1123
#endif