Coverage Report

Created: 2026-01-16 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/lxc/src/lxc/criu.c
Line
Count
Source
1
/* SPDX-License-Identifier: LGPL-2.1+ */
2
3
#include "config.h"
4
5
#include <inttypes.h>
6
#include <linux/limits.h>
7
#include <sched.h>
8
#include <stdio.h>
9
#include <stdlib.h>
10
#include <string.h>
11
#include <sys/mount.h>
12
#include <sys/types.h>
13
#include <sys/wait.h>
14
#include <unistd.h>
15
#include <mntent.h>
16
17
#include "attach_options.h"
18
19
#include "cgroup.h"
20
#include "commands.h"
21
#include "conf.h"
22
#include "criu.h"
23
#include "log.h"
24
#include "lxc.h"
25
#include "lxclock.h"
26
#include "memory_utils.h"
27
#include "network.h"
28
#include "storage.h"
29
#include "syscall_wrappers.h"
30
#include "utils.h"
31
32
#if !HAVE_STRLCPY
33
#include "strlcpy.h"
34
#endif
35
36
0
#define CRIU_VERSION    "2.0"
37
38
0
#define CRIU_GITID_VERSION  "2.0"
39
0
#define CRIU_GITID_PATCHLEVEL 0
40
41
0
#define CRIU_IN_FLIGHT_SUPPORT  "2.4"
42
0
#define CRIU_EXTERNAL_NOT_VETH  "2.8"
43
0
#define CRIU_EXTERNAL_NETDEV  "3.15"
44
45
0
lxc_log_define(criu, lxc);
Unexecuted instantiation: criu.c:LXC_ERROR
Unexecuted instantiation: criu.c:LXC_INFO
Unexecuted instantiation: criu.c:LXC_WARN
Unexecuted instantiation: criu.c:LXC_TRACE
46
0
47
0
struct criu_opts {
48
0
  /* the thing to hook to stdout and stderr for logging */
49
0
  int pipefd;
50
0
51
0
  /* The type of criu invocation, one of "dump" or "restore" */
52
0
  char *action;
53
0
54
0
  /* the user-provided migrate options relevant to this action */
55
0
  struct migrate_opts *user;
56
0
57
0
  /* The container to dump */
58
0
  struct lxc_container *c;
59
0
60
0
  /* dump: stop the container or not after dumping? */
61
0
  char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
62
0
63
0
  /* restore: the file to write the init process' pid into */
64
0
  struct lxc_handler *handler;
65
0
  int console_fd;
66
0
  /* The path that is bind mounted from /dev/console, if any. We don't
67
0
   * want to use `--ext-mount-map auto`'s result here because the pty
68
0
   * device may have a different path (e.g. if the pty number is
69
0
   * different) on the target host. NULL if lxc.console.path = "none".
70
0
   */
71
0
  char *console_name;
72
0
73
0
  /* The detected version of criu */
74
0
  char *criu_version;
75
0
};
76
0
77
0
static int load_tty_major_minor(char *directory, char *output, int len)
78
0
{
79
0
  char path[PATH_MAX];
80
0
  ssize_t ret;
81
82
0
  ret = strnprintf(path, sizeof(path), "%s/tty.info", directory);
83
0
  if (ret < 0)
84
0
    return ret_errno(EIO);
85
86
0
  ret = lxc_read_from_file(path, output, len);
87
0
  if (ret < 0) {
88
    /*
89
     * This means we're coming from a liblxc which didn't export
90
     * the tty info. In this case they had to have lxc.console.path
91
     * = * none, so there's no problem restoring.
92
     */
93
0
    if (errno == ENOENT)
94
0
      return 0;
95
96
0
    return log_error_errno(-errno, errno, "Failed to open \"%s\"", path);
97
0
  }
98
99
0
  return 0;
100
0
}
101
102
static int cmp_version(const char *v1, const char *v2)
103
0
{
104
0
  int ret;
105
0
  int oct_v1[3], oct_v2[3];
106
107
0
  memset(oct_v1, -1, sizeof(oct_v1));
108
0
  memset(oct_v2, -1, sizeof(oct_v2));
109
110
0
  ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
111
0
  if (ret < 1)
112
0
    return -1;
113
114
0
  ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
115
0
  if (ret < 1)
116
0
    return -1;
117
118
  /* Major version is greater. */
119
0
  if (oct_v1[0] > oct_v2[0])
120
0
    return 1;
121
122
0
  if (oct_v1[0] < oct_v2[0])
123
0
    return -1;
124
125
  /* Minor number is greater.*/
126
0
  if (oct_v1[1] > oct_v2[1])
127
0
    return 1;
128
129
0
  if (oct_v1[1] < oct_v2[1])
130
0
    return -1;
131
132
  /* Patch number is greater. */
133
0
  if (oct_v1[2] > oct_v2[2])
134
0
    return 1;
135
136
  /* Patch numbers are equal. */
137
0
  if (oct_v1[2] == oct_v2[2])
138
0
    return 0;
139
140
0
  return -1;
141
0
}
142
143
struct criu_exec_args {
144
  int argc;
145
  char *argv[];
146
};
147
148
static void put_criu_exec_args(struct criu_exec_args *args)
149
0
{
150
0
  if (args) {
151
0
    for (int i = 0; i < args->argc; i++)
152
0
      free_disarm(args->argv[i]);
153
0
    free_disarm(args);
154
0
  }
155
0
}
156
157
define_cleanup_function(struct criu_exec_args *, put_criu_exec_args);
158
159
static int exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
160
         struct criu_opts *opts)
161
0
{
162
0
  call_cleaner(put_criu_exec_args) struct criu_exec_args *args = NULL;
163
0
  __do_fclose FILE *f_mnt = NULL;
164
0
  char log[PATH_MAX];
165
0
  int static_args = 23, ret;
166
0
  int netnr = 0;
167
0
  struct mntent mntent;
168
0
  struct lxc_netdev *netdev;
169
0
  struct string_entry *strentry;
170
171
0
  char buf[4096], ttys[32];
172
173
  /* If we are currently in a cgroup /foo/bar, and the container is in a
174
   * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
175
   * container has an open fd that points to one of the cgroup files
176
   * (systemd always opens its "root" cgroup). So, let's escape to the
177
   * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
178
   * see all cgroups.
179
   */
180
0
  if (!cgroup_ops->criu_escape(cgroup_ops, conf))
181
0
    return log_error_errno(-ENOENT, ENOENT, "Failed to escape to root cgroup");
182
183
  /* The command line always looks like:
184
   * criu $(action) --tcp-established --file-locks --link-remap \
185
   * --manage-cgroups=full --action-script foo.sh -D $(directory) \
186
   * -o $(directory)/$(action).log --ext-mount-map auto
187
   * --enable-external-sharing --enable-external-masters
188
   * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
189
   * +1 for final NULL */
190
191
0
  if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
192
    /* -t pid --freeze-cgroup /lxc/ct */
193
0
    static_args += 4;
194
195
    /* --prev-images-dir <path-to-directory-A-relative-to-B> */
196
0
    if (opts->user->predump_dir)
197
0
      static_args += 2;
198
199
    /* --page-server --address <address> --port <port> */
200
0
    if (opts->user->pageserver_address && opts->user->pageserver_port)
201
0
      static_args += 5;
202
203
    /* --leave-running (only for final dump) */
204
0
    if (strequal(opts->action, "dump") && !opts->user->stop)
205
0
      static_args++;
206
207
    /* --external tty[88,4] */
208
0
    if (opts->tty_id[0])
209
0
      static_args += 2;
210
211
    /* --force-irmap */
212
0
    if (!opts->user->preserves_inodes)
213
0
      static_args++;
214
215
    /* --ghost-limit 1024 */
216
0
    if (opts->user->ghost_limit)
217
0
      static_args += 2;
218
0
  } else if (strequal(opts->action, "restore")) {
219
    /* --root $(lxc_mount_point) --restore-detached
220
     * --restore-sibling
221
     * --lsm-profile apparmor:whatever
222
     */
223
0
    static_args += 6;
224
225
0
    ttys[0] = 0;
226
0
    if (load_tty_major_minor(opts->user->directory, ttys, sizeof(ttys)))
227
0
      return log_error_errno(-EINVAL, EINVAL, "Failed to load tty information");
228
229
    /* --inherit-fd fd[%d]:tty[%s] */
230
0
    if (ttys[0])
231
0
      static_args += 2;
232
233
0
    static_args += list_len(netdev, &opts->c->lxc_conf->netdevs, head) * 2;
234
0
  } else {
235
0
    return log_error_errno(-EINVAL, EINVAL, "Invalid criu operation specified");
236
0
  }
237
238
0
  if (cgroup_ops->criu_num_hierarchies(cgroup_ops) > 0)
239
0
    static_args += 2 * cgroup_ops->criu_num_hierarchies(cgroup_ops);
240
241
0
  if (opts->user->verbose)
242
0
    static_args++;
243
244
0
  if (opts->user->action_script)
245
0
    static_args += 2;
246
247
0
  static_args += 2 * list_len(strentry, &opts->c->lxc_conf->mount_entries, head);
248
249
0
  ret = strnprintf(log, sizeof(log), "%s/%s.log", opts->user->directory, opts->action);
250
0
  if (ret < 0)
251
0
    return ret_errno(EIO);
252
253
0
  args = zalloc(sizeof(struct criu_exec_args) + (static_args * sizeof(char **)));
254
0
  if (!args)
255
0
    return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate static arguments");
256
257
0
#define DECLARE_ARG(arg)                                                                 \
258
0
  do {                                                                             \
259
0
    if (arg == NULL)                                                         \
260
0
      return log_error_errno(-EINVAL, EINVAL,                          \
261
0
                 "Got NULL argument for criu");            \
262
0
    args->argv[(args->argc)++] = strdup(arg);                                \
263
0
    if (!args->argv[args->argc - 1])                                         \
264
0
      return log_error_errno(-ENOMEM, ENOMEM,        \
265
0
          "Failed to duplicate argumen %s", arg);          \
266
0
  } while (0)
267
268
0
  args->argv[(args->argc)++] = on_path("criu", NULL);
269
0
  if (!args->argv[args->argc - 1])
270
0
    return log_error_errno(-ENOENT, ENOENT, "Failed to find criu binary");
271
272
0
  DECLARE_ARG(opts->action);
273
0
  DECLARE_ARG("--tcp-established");
274
0
  DECLARE_ARG("--file-locks");
275
0
  DECLARE_ARG("--link-remap");
276
0
  DECLARE_ARG("--manage-cgroups=full");
277
0
  DECLARE_ARG("--ext-mount-map");
278
0
  DECLARE_ARG("auto");
279
0
  DECLARE_ARG("--enable-external-sharing");
280
0
  DECLARE_ARG("--enable-external-masters");
281
0
  DECLARE_ARG("--enable-fs");
282
0
  DECLARE_ARG("hugetlbfs");
283
0
  DECLARE_ARG("--enable-fs");
284
0
  DECLARE_ARG("tracefs");
285
0
  DECLARE_ARG("-D");
286
0
  DECLARE_ARG(opts->user->directory);
287
0
  DECLARE_ARG("-o");
288
0
  DECLARE_ARG(log);
289
290
0
  for (int i = 0; i < cgroup_ops->criu_num_hierarchies(cgroup_ops); i++) {
291
0
    __do_free char *cgroup_base_path = NULL, *controllers;
292
0
    char **controllers_list = NULL;
293
0
    char *tmp;
294
295
0
    if (!cgroup_ops->criu_get_hierarchies(cgroup_ops, i, &controllers_list))
296
0
      return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve cgroup hierarchies %d", i);
297
298
    /*
299
     * If we are in a dump, we have to ask the monitor process what
300
     * the right cgroup is. if this is a restore, we can just use
301
     * the handler the restore task created.
302
     */
303
0
    if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
304
0
      cgroup_base_path = lxc_cmd_get_limit_cgroup_path(opts->c->name, opts->c->config_path, controllers_list[0]);
305
0
      if (!cgroup_base_path)
306
0
        return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve limit cgroup path for %s", controllers_list[0] ?: "(null)");
307
0
    } else {
308
0
      const char *p;
309
310
0
      p = cgroup_ops->get_limit_cgroup(cgroup_ops, controllers_list[0]);
311
0
      if (!p)
312
0
        return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve limit cgroup path for %s", controllers_list[0] ?: "(null)");
313
314
0
      cgroup_base_path = strdup(p);
315
0
      if (!cgroup_base_path)
316
0
        return log_error_errno(-ENOMEM, ENOMEM, "Failed to duplicate limit cgroup path");
317
0
    }
318
319
0
    tmp = lxc_path_simplify(cgroup_base_path);
320
0
    if (!tmp)
321
0
      return log_error_errno(-ENOMEM, ENOMEM, "Failed to remove extraneous slashes from \"%s\"", cgroup_base_path);
322
0
    free_move_ptr(cgroup_base_path, tmp);
323
324
0
    if (controllers_list[0]) {
325
0
      controllers = lxc_string_join(",", (const char **)controllers_list, false);
326
0
      if (!controllers)
327
0
        return log_error_errno(-ENOMEM, ENOMEM, "Failed to join controllers");
328
329
0
      ret = sprintf(buf, "%s:%s", controllers, cgroup_base_path);
330
0
    } else {
331
0
      WARN("No cgroup controllers configured in container's cgroup %s", cgroup_base_path);
332
0
      ret = sprintf(buf, "%s", cgroup_base_path);
333
0
    }
334
0
    if (ret < 0 || (size_t)ret >= sizeof(buf))
335
0
      return log_error_errno(-EIO, EIO, "sprintf of cgroup root arg failed");
336
337
0
    DECLARE_ARG("--cgroup-root");
338
0
    DECLARE_ARG(buf);
339
0
  }
340
341
0
  if (opts->user->verbose)
342
0
    DECLARE_ARG("-v4");
343
344
0
  if (opts->user->action_script) {
345
0
    DECLARE_ARG("--action-script");
346
0
    DECLARE_ARG(opts->user->action_script);
347
0
  }
348
349
0
  f_mnt = make_anonymous_mount_file(&opts->c->lxc_conf->mount_entries,
350
0
                                   opts->c->lxc_conf->lsm_aa_allow_nesting);
351
0
  if (!f_mnt)
352
0
    return log_error_errno(-ENOENT, ENOENT, "Failed to create anonymous mount file");
353
354
0
  while (getmntent_r(f_mnt, &mntent, buf, sizeof(buf))) {
355
0
    __do_free char *mnt_options = NULL;
356
0
    unsigned long flags = 0;
357
0
    char arg[2 * PATH_MAX + 2];
358
359
0
    if (parse_mntopts_legacy(mntent.mnt_opts, &flags, &mnt_options) < 0)
360
0
      return log_error_errno(-EINVAL, EINVAL, "Failed to parse mount options");
361
362
    /* only add --ext-mount-map for actual bind mounts */
363
0
    if (!(flags & MS_BIND))
364
0
      continue;
365
366
0
    if (strequal(opts->action, "dump"))
367
0
      ret = strnprintf(arg, sizeof(arg), "/%s:%s", mntent.mnt_dir, mntent.mnt_dir);
368
0
    else
369
0
      ret = strnprintf(arg, sizeof(arg), "%s:%s", mntent.mnt_dir, mntent.mnt_fsname);
370
0
    if (ret < 0)
371
0
      return log_error_errno(-EIO, EIO, "Failed to create mount entry");
372
373
0
    DECLARE_ARG("--ext-mount-map");
374
0
    DECLARE_ARG(arg);
375
0
  }
376
377
0
  if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
378
0
    pid_t init_pid;
379
0
    char init_pid_str[INTTYPE_TO_STRLEN(int)];
380
0
    char *freezer_relative;
381
382
0
    init_pid = opts->c->init_pid(opts->c);
383
0
    if (init_pid < 0)
384
0
      return log_error_errno(-ESRCH, ESRCH, "Failed to retrieve init pid of container");
385
386
0
    ret = strnprintf(init_pid_str, sizeof(init_pid_str), "%d", init_pid);
387
0
    if (ret < 0)
388
0
      return log_error_errno(-EIO, EIO, "Failed to create entry for init pid of container");
389
390
0
    DECLARE_ARG("-t");
391
0
    DECLARE_ARG(init_pid_str);
392
393
0
    freezer_relative = lxc_cmd_get_limit_cgroup_path(opts->c->name,
394
0
                 opts->c->config_path,
395
0
                 "freezer");
396
0
    if (!freezer_relative)
397
0
      return log_error_errno(-ENOENT, ENOENT, "Failed getting freezer path");
398
399
0
    if (pure_unified_layout(cgroup_ops))
400
0
      ret = strnprintf(log, sizeof(log), "/sys/fs/cgroup/%s", freezer_relative);
401
0
    else
402
0
      ret = strnprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
403
0
    if (ret < 0)
404
0
      return log_error_errno(-EIO, EIO, "Failed to freezer cgroup entry");
405
406
0
    if (!opts->user->disable_skip_in_flight &&
407
0
        strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
408
0
      DECLARE_ARG("--skip-in-flight");
409
410
0
    DECLARE_ARG("--freeze-cgroup");
411
0
    DECLARE_ARG(log);
412
413
0
    if (opts->tty_id[0]) {
414
0
      DECLARE_ARG("--ext-mount-map");
415
0
      DECLARE_ARG("/dev/console:console");
416
417
0
      DECLARE_ARG("--external");
418
0
      DECLARE_ARG(opts->tty_id);
419
0
    }
420
421
0
    if (opts->user->predump_dir) {
422
0
      DECLARE_ARG("--prev-images-dir");
423
0
      DECLARE_ARG(opts->user->predump_dir);
424
0
      DECLARE_ARG("--track-mem");
425
0
    }
426
427
0
    if (opts->user->pageserver_address && opts->user->pageserver_port) {
428
0
      DECLARE_ARG("--page-server");
429
0
      DECLARE_ARG("--address");
430
0
      DECLARE_ARG(opts->user->pageserver_address);
431
0
      DECLARE_ARG("--port");
432
0
      DECLARE_ARG(opts->user->pageserver_port);
433
0
    }
434
435
0
    if (!opts->user->preserves_inodes)
436
0
      DECLARE_ARG("--force-irmap");
437
438
0
    if (opts->user->ghost_limit) {
439
0
      char ghost_limit[32];
440
441
0
      ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
442
0
      if (ret < 0 || (size_t)ret >= sizeof(ghost_limit))
443
0
        return log_error_errno(-EIO, EIO, "Failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
444
445
0
      DECLARE_ARG("--ghost-limit");
446
0
      DECLARE_ARG(ghost_limit);
447
0
    }
448
449
    /* only for final dump */
450
0
    if (strequal(opts->action, "dump") && !opts->user->stop)
451
0
      DECLARE_ARG("--leave-running");
452
0
  } else if (strequal(opts->action, "restore")) {
453
0
    struct lxc_conf *lxc_conf = opts->c->lxc_conf;
454
455
0
    DECLARE_ARG("--root");
456
0
    DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
457
0
    DECLARE_ARG("--restore-detached");
458
0
    DECLARE_ARG("--restore-sibling");
459
460
0
    if (ttys[0]) {
461
0
      if (opts->console_fd < 0)
462
0
        return log_error_errno(-EINVAL, EINVAL, "lxc.console.path configured on source host but not target");
463
464
0
      ret = strnprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, ttys);
465
0
      if (ret < 0)
466
0
        return log_error_errno(-EIO, EIO, "Failed to create console entry");
467
468
0
      DECLARE_ARG("--inherit-fd");
469
0
      DECLARE_ARG(buf);
470
0
    }
471
0
    if (opts->console_name) {
472
0
      if (strnprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0)
473
0
        return log_error_errno(-EIO, EIO, "Failed to create console entry");
474
475
0
      DECLARE_ARG("--ext-mount-map");
476
0
      DECLARE_ARG(buf);
477
0
    }
478
479
0
    if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
480
481
0
      if (lxc_conf->lsm_aa_profile)
482
0
        ret = strnprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
483
0
      else
484
0
        ret = strnprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
485
0
      if (ret < 0)
486
0
        return log_error_errno(-EIO, EIO, "Failed to create lsm entry");
487
488
0
      DECLARE_ARG("--lsm-profile");
489
0
      DECLARE_ARG(buf);
490
0
    }
491
492
0
    list_for_each_entry(netdev, &opts->c->lxc_conf->netdevs, head) {
493
0
      size_t retlen;
494
0
      char eth[128], *veth;
495
0
      bool external_not_veth;
496
497
0
      if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
498
        /* Since criu version 2.8 the usage of --veth-pair
499
         * has been deprecated:
500
         * git tag --contains f2037e6d3445fc400
501
         * v2.8 */
502
0
        external_not_veth = true;
503
0
      } else {
504
0
        external_not_veth = false;
505
0
      }
506
507
0
      if (netdev->name[0] != '\0') {
508
0
        retlen = strlcpy(eth, netdev->name, sizeof(eth));
509
0
        if (retlen >= sizeof(eth))
510
0
          return log_error_errno(-E2BIG, E2BIG, "Failed to append veth device name");
511
0
      } else {
512
0
        ret = strnprintf(eth, sizeof(eth), "eth%d", netnr);
513
0
        if (ret < 0)
514
0
          return log_error_errno(-E2BIG, E2BIG, "Failed to append veth device name");
515
0
      }
516
517
0
      switch (netdev->type) {
518
0
      case LXC_NET_VETH:
519
0
        veth = netdev->priv.veth_attr.pair;
520
0
        if (veth[0] == '\0')
521
0
          veth = netdev->priv.veth_attr.veth1;
522
523
0
        if (netdev->link[0] != '\0') {
524
0
          if (external_not_veth)
525
0
            ret = strnprintf(buf, sizeof(buf), "veth[%s]:%s@%s", eth, veth, netdev->link);
526
0
          else
527
0
            ret = strnprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, netdev->link);
528
0
        } else {
529
0
          if (external_not_veth)
530
0
            ret = strnprintf(buf, sizeof(buf), "veth[%s]:%s", eth, veth);
531
0
          else
532
0
            ret = strnprintf(buf, sizeof(buf), "%s=%s", eth, veth);
533
0
        }
534
0
        if (ret < 0)
535
0
          return log_error_errno(-EIO, EIO, "Failed to append veth device name");
536
537
0
        TRACE("Added veth device entry %s", buf);
538
0
        break;
539
0
      case LXC_NET_MACVLAN:
540
0
        if (netdev->link[0] == '\0')
541
0
          return log_error_errno(-EINVAL, EINVAL, "Failed to find host interface for macvlan %s", netdev->name);
542
543
0
        ret = strnprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, netdev->link);
544
0
        if (ret < 0)
545
0
          return log_error_errno(-EIO, EIO, "Failed to add macvlan entry");
546
547
0
        TRACE("Added macvlan device entry %s", buf);
548
549
0
        break;
550
0
      case LXC_NET_PHYS:
551
0
        if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NETDEV) < 0)
552
0
          return syserror_set(-EOPNOTSUPP, "Restoring physical network devices not supported");
553
554
0
        if (is_empty_string(netdev->link))
555
0
          return syserror_set(-EINVAL, "Specifying link is required");
556
557
0
        ret = strnprintf(buf, sizeof(buf), "netdev[%s]:%s", eth, netdev->link);
558
0
        if (ret < 0)
559
0
          return syserror_set(-EIO, "Failed to append phys device name");
560
561
0
        TRACE("Added phys device entry %s", buf);
562
0
        break;
563
0
      case LXC_NET_NONE:
564
0
        __fallthrough;
565
0
      case LXC_NET_EMPTY:
566
0
        break;
567
0
      default:
568
        /* we have screened for this earlier... */
569
0
        return log_error_errno(-EINVAL, EINVAL, "Unsupported network type %d", netdev->type);
570
0
      }
571
572
0
      if (external_not_veth)
573
0
        DECLARE_ARG("--external");
574
0
      else
575
0
        DECLARE_ARG("--veth-pair");
576
0
      DECLARE_ARG(buf);
577
0
      netnr++;
578
0
    }
579
580
0
  }
581
582
0
  args->argv[args->argc] = NULL;
583
584
0
  if (lxc_log_trace()) {
585
0
    buf[0] = 0;
586
0
    for (int i = 0, pos = 0; i < args->argc && args->argv[i]; i++) {
587
0
      ret = strnprintf(buf + pos, sizeof(buf) - pos, "%s ", args->argv[i]);
588
0
      if (ret < 0)
589
0
        return log_error_errno(-EIO, EIO, "Failed to reorder entries");
590
0
      else
591
0
        pos += ret;
592
0
    }
593
594
0
    TRACE("Using command line %s", buf);
595
0
  }
596
597
  /* before criu inits its log, it sometimes prints things to stdout/err;
598
   * let's be sure we capture that.
599
   */
600
0
  if (dup2(opts->pipefd, STDOUT_FILENO) < 0)
601
0
    return log_error_errno(-errno, errno, "Failed to duplicate stdout");
602
603
0
  if (dup2(opts->pipefd, STDERR_FILENO) < 0)
604
0
    return log_error_errno(-errno, errno, "Failed to duplicate stderr");
605
606
0
  close(opts->pipefd);
607
608
0
#undef DECLARE_ARG
609
0
  execv(args->argv[0], args->argv);
610
0
  return -ENOEXEC;
611
0
}
612
613
/*
614
 * Function to check if the checks activated in 'features_to_check' are
615
 * available with the current architecture/kernel/criu combination.
616
 *
617
 * Parameter features_to_check is a bit mask of all features that should be
618
 * checked (see feature check defines in lxc/lxccontainer.h).
619
 *
620
 * If the return value is true, all requested features are supported. If
621
 * the return value is false the features_to_check parameter is updated
622
 * to reflect which features are available. '0' means no feature but
623
 * also that something went totally wrong.
624
 *
625
 * Some of the code flow of criu_version_ok() is duplicated and maybe it
626
 * is a good candidate for refactoring.
627
 */
628
bool __criu_check_feature(uint64_t *features_to_check)
629
0
{
630
0
  pid_t pid;
631
0
  uint64_t current_bit = 0;
632
0
  int ret;
633
0
  uint64_t features = *features_to_check;
634
  /* Feature checking is currently always like
635
   * criu check --feature <feature-name>
636
   */
637
0
  char *args[] = { "criu", "check", "--feature", NULL, NULL };
638
639
0
  if ((features & ~FEATURE_MEM_TRACK & ~FEATURE_LAZY_PAGES) != 0) {
640
    /* There are feature bits activated we do not understand.
641
     * Refusing to answer at all */
642
0
    *features_to_check = 0;
643
0
    return false;
644
0
  }
645
646
0
  while (current_bit < (sizeof(uint64_t) * 8 - 1)) {
647
    /* only test requested features */
648
0
    if (!(features & (1ULL << current_bit))) {
649
      /* skip this */
650
0
      current_bit++;
651
0
      continue;
652
0
    }
653
654
0
    pid = fork();
655
0
    if (pid < 0) {
656
0
      SYSERROR("fork() failed");
657
0
      *features_to_check = 0;
658
0
      return false;
659
0
    }
660
661
0
    if (pid == 0) {
662
0
      if ((1ULL << current_bit) == FEATURE_MEM_TRACK)
663
        /* This is needed for pre-dump support, which
664
         * enables pre-copy migration. */
665
0
        args[3] = "mem_dirty_track";
666
0
      else if ((1ULL << current_bit) == FEATURE_LAZY_PAGES)
667
        /* CRIU has two checks for userfaultfd support.
668
         *
669
         * The simpler check is only for 'uffd'. If the
670
         * kernel supports userfaultfd without noncoop
671
         * then only process can be lazily restored
672
         * which do not fork. With 'uffd-noncoop'
673
         * it is also possible to lazily restore processes
674
         * which do fork. For a container runtime like
675
         * LXC checking only for 'uffd' makes not much sense. */
676
0
        args[3] = "uffd-noncoop";
677
0
      else
678
0
        _exit(EXIT_FAILURE);
679
680
0
      null_stdfds();
681
682
0
      execvp("criu", args);
683
0
      SYSERROR("Failed to exec \"criu\"");
684
0
      _exit(EXIT_FAILURE);
685
0
    }
686
687
0
    ret = wait_for_pid(pid);
688
689
0
    if (ret == -1) {
690
      /* It is not known why CRIU failed. Either
691
       * CRIU is not available, the feature check
692
       * does not exist or the feature is not
693
       * supported. */
694
0
      INFO("feature not supported");
695
      /* Clear not supported feature bit */
696
0
      features &= ~(1ULL << current_bit);
697
0
    }
698
699
0
    current_bit++;
700
    /* no more checks requested; exit check loop */
701
0
    if (!(features & ~((1ULL << current_bit)-1)))
702
0
      break;
703
0
  }
704
0
  if (features != *features_to_check) {
705
0
    *features_to_check = features;
706
0
    return false;
707
0
  }
708
0
  return true;
709
0
}
710
711
/*
712
 * Check to see if the criu version is recent enough for all the features we
713
 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
714
 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
715
 * things potentially before a version is released with a particular feature.
716
 *
717
 * The intent is that when criu development slows down, we can drop this, but
718
 * for now we shouldn't attempt to c/r with versions that we know won't work.
719
 *
720
 * Note: If version != NULL criu_version() stores the detected criu version in
721
 * version. Allocates memory for version which must be freed by caller.
722
 */
723
static bool criu_version_ok(char **version)
724
0
{
725
0
  int pipes[2];
726
0
  pid_t pid;
727
728
0
  if (pipe(pipes) < 0) {
729
0
    SYSERROR("pipe() failed");
730
0
    return false;
731
0
  }
732
733
0
  pid = fork();
734
0
  if (pid < 0) {
735
0
    SYSERROR("fork() failed");
736
0
    return false;
737
0
  }
738
739
0
  if (pid == 0) {
740
0
    char *args[] = { "criu", "--version", NULL };
741
0
    char *path;
742
0
    close(pipes[0]);
743
744
0
    close(STDERR_FILENO);
745
0
    if (dup2(pipes[1], STDOUT_FILENO) < 0)
746
0
      _exit(EXIT_FAILURE);
747
748
0
    path = on_path("criu", NULL);
749
0
    if (!path)
750
0
      _exit(EXIT_FAILURE);
751
752
0
    execv(path, args);
753
0
    _exit(EXIT_FAILURE);
754
0
  } else {
755
0
    FILE *f;
756
0
    char *tmp;
757
0
    int patch;
758
759
0
    close(pipes[1]);
760
0
    if (wait_for_pid(pid) < 0) {
761
0
      close(pipes[0]);
762
0
      SYSERROR("execing criu failed, is it installed?");
763
0
      return false;
764
0
    }
765
766
0
    f = fdopen(pipes[0], "re");
767
0
    if (!f) {
768
0
      close(pipes[0]);
769
0
      return false;
770
0
    }
771
772
0
    tmp = malloc(1024);
773
0
    if (!tmp) {
774
0
      fclose(f);
775
0
      return false;
776
0
    }
777
778
0
    if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
779
0
      goto version_error;
780
781
0
    if (fgetc(f) != '\n')
782
0
      goto version_error;
783
784
0
    if (strcmp(tmp, CRIU_VERSION) >= 0)
785
0
      goto version_match;
786
787
0
    if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
788
0
      goto version_error;
789
790
0
    if (fgetc(f) != '-')
791
0
      goto version_error;
792
793
0
    if (fscanf(f, "%d", &patch) != 1)
794
0
      goto version_error;
795
796
0
    if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
797
0
      goto version_error;
798
799
0
    if (patch < CRIU_GITID_PATCHLEVEL)
800
0
      goto version_error;
801
802
0
version_match:
803
0
    fclose(f);
804
0
    if (!version)
805
0
      free(tmp);
806
0
    else
807
0
      *version = tmp;
808
0
    return true;
809
810
0
version_error:
811
0
    fclose(f);
812
0
    free(tmp);
813
0
    ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
814
0
    return false;
815
0
  }
816
0
}
817
818
/* Check and make sure the container has a configuration that we know CRIU can
819
 * dump. */
820
static bool criu_ok(struct lxc_container *c, char **criu_version)
821
0
{
822
0
  struct lxc_netdev *netdev;
823
824
0
  if (geteuid()) {
825
0
    ERROR("Must be root to checkpoint");
826
0
    return false;
827
0
  }
828
829
0
  if (!criu_version_ok(criu_version))
830
0
    return false;
831
832
  /* We only know how to restore containers with veth networks. */
833
0
  list_for_each_entry(netdev, &c->lxc_conf->netdevs, head) {
834
0
    switch(netdev->type) {
835
0
    case LXC_NET_VETH:
836
0
    case LXC_NET_NONE:
837
0
    case LXC_NET_EMPTY:
838
0
    case LXC_NET_PHYS:
839
0
    case LXC_NET_MACVLAN:
840
0
      break;
841
0
    default:
842
0
      ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(netdev->type), netdev->name);
843
0
      if (criu_version) {
844
0
        free(*criu_version);
845
0
        *criu_version = NULL;
846
0
      }
847
0
      return false;
848
0
    }
849
0
  }
850
851
0
  return true;
852
0
}
853
854
static bool restore_net_info(struct lxc_container *c)
855
0
{
856
0
  int ret;
857
0
  bool has_error = true;
858
0
  struct lxc_netdev *netdev;
859
860
0
  if (container_mem_lock(c))
861
0
    return false;
862
863
0
  list_for_each_entry(netdev, &c->lxc_conf->netdevs, head) {
864
0
    char template[IFNAMSIZ];
865
866
0
    if (netdev->type != LXC_NET_VETH)
867
0
      continue;
868
869
0
    ret = strnprintf(template, sizeof(template), "vethXXXXXX");
870
0
    if (ret < 0)
871
0
      goto out_unlock;
872
873
0
    if (netdev->priv.veth_attr.pair[0] == '\0' &&
874
0
        netdev->priv.veth_attr.veth1[0] == '\0') {
875
0
      if (!lxc_ifname_alnum_case_sensitive(template))
876
0
        goto out_unlock;
877
878
0
      (void)strlcpy(netdev->priv.veth_attr.veth1, template, IFNAMSIZ);
879
0
    }
880
0
  }
881
882
0
  has_error = false;
883
884
0
out_unlock:
885
0
  container_mem_unlock(c);
886
0
  return !has_error;
887
0
}
888
889
/* do_restore never returns, the calling process is used as the monitor process.
890
 * do_restore calls _exit() if it fails.
891
 */
892
static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
893
0
{
894
0
  int fd, ret;
895
0
  pid_t pid;
896
0
  struct lxc_handler *handler;
897
0
  int status = 0;
898
0
  int pipes[2] = {-1, -1};
899
0
  struct cgroup_ops *cgroup_ops;
900
901
  /* Try to detach from the current controlling tty if it exists.
902
   * Otherwise, lxc_init (via lxc_console) will attach the container's
903
   * console output to the current tty, which is probably not what any
904
   * library user wants, and if they do, they can just manually configure
905
   * it :)
906
   */
907
0
  fd = open("/dev/tty", O_RDWR);
908
0
  if (fd >= 0) {
909
0
    if (ioctl(fd, TIOCNOTTY, NULL) < 0)
910
0
      SYSERROR("couldn't detach from tty");
911
0
    close(fd);
912
0
  }
913
914
0
  handler = lxc_init_handler(NULL, c->name, c->lxc_conf, c->config_path, false);
915
0
  if (!handler)
916
0
    goto out;
917
918
0
  if (lxc_init(c->name, handler) < 0)
919
0
    goto out;
920
0
  cgroup_ops = handler->cgroup_ops;
921
922
0
  if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
923
0
    ERROR("Failed to create monitor cgroup");
924
0
    goto out_fini_handler;
925
0
  }
926
927
0
  if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
928
0
    ERROR("Failed to enter monitor cgroup");
929
0
    goto out_fini_handler;
930
0
  }
931
932
0
  if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
933
0
    ERROR("Failed to delegate controllers to monitor cgroup");
934
0
    goto out_fini_handler;
935
0
  }
936
937
0
  if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
938
0
    ERROR("Failed creating cgroups");
939
0
    goto out_fini_handler;
940
0
  }
941
942
0
  if (!restore_net_info(c)) {
943
0
    ERROR("failed restoring network info");
944
0
    goto out_fini_handler;
945
0
  }
946
947
0
  ret = resolve_clone_flags(handler);
948
0
  if (ret < 0) {
949
0
    SYSERROR("Unsupported clone flag specified");
950
0
    goto out_fini_handler;
951
0
  }
952
953
0
  if (pipe2(pipes, O_CLOEXEC) < 0) {
954
0
    SYSERROR("pipe() failed");
955
0
    goto out_fini_handler;
956
0
  }
957
958
0
  pid = fork();
959
0
  if (pid < 0)
960
0
    goto out_fini_handler;
961
962
0
  if (pid == 0) {
963
0
    struct criu_opts os;
964
0
    struct lxc_rootfs *rootfs;
965
0
    int flags;
966
967
0
    close(status_pipe);
968
0
    status_pipe = -1;
969
970
0
    close(pipes[0]);
971
0
    pipes[0] = -1;
972
973
0
    if (unshare(CLONE_NEWNS))
974
0
      goto out_fini_handler;
975
976
0
    ret = lxc_storage_prepare(c->lxc_conf);
977
0
    if (ret)
978
0
      goto out_fini_handler;
979
980
    /* CRIU needs the lxc root bind mounted so that it is the root of some
981
     * mount. */
982
0
    rootfs = &c->lxc_conf->rootfs;
983
984
0
    if (rootfs_is_blockdev(c->lxc_conf)) {
985
0
      if (lxc_setup_rootfs_prepare_root(c->lxc_conf, c->name,
986
0
                c->config_path) < 0)
987
0
        goto out_fini_handler;
988
0
    } else {
989
0
      if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
990
0
        goto out_fini_handler;
991
992
0
      if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
993
0
        SYSERROR("remount / to private failed");
994
0
        goto out_fini_handler;
995
0
      }
996
997
0
      if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
998
0
        (void)rmdir(rootfs->mount);
999
0
        goto out_fini_handler;
1000
0
      }
1001
0
    }
1002
1003
0
    os.pipefd = pipes[1];
1004
0
    os.action = "restore";
1005
0
    os.user = opts;
1006
0
    os.c = c;
1007
0
    os.console_fd = c->lxc_conf->console.pty;
1008
0
    os.criu_version = criu_version;
1009
0
    os.handler = handler;
1010
1011
0
    if (os.console_fd >= 0) {
1012
      /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
1013
       * via --inherit-fd, so we don't want it to close.
1014
       */
1015
0
      flags = fcntl(os.console_fd, F_GETFD);
1016
0
      if (flags < 0) {
1017
0
        SYSERROR("F_GETFD failed: %d", os.console_fd);
1018
0
        goto out_fini_handler;
1019
0
      }
1020
1021
0
      flags &= ~FD_CLOEXEC;
1022
1023
0
      if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
1024
0
        SYSERROR("F_SETFD failed");
1025
0
        goto out_fini_handler;
1026
0
      }
1027
0
    }
1028
0
    os.console_name = c->lxc_conf->console.name;
1029
1030
    /* exec_criu() returning is an error */
1031
0
    ret = exec_criu(handler->cgroup_ops, c->lxc_conf, &os);
1032
0
    if (ret)
1033
0
      SYSERROR("Failed to execute criu");
1034
0
    umount(rootfs->mount);
1035
0
    (void)rmdir(rootfs->mount);
1036
0
    goto out_fini_handler;
1037
0
  } else {
1038
0
    char title[2048];
1039
1040
0
    close(pipes[1]);
1041
0
    pipes[1] = -1;
1042
1043
0
    pid_t w = waitpid(pid, &status, 0);
1044
0
    if (w == -1) {
1045
0
      SYSERROR("waitpid");
1046
0
      goto out_fini_handler;
1047
0
    }
1048
1049
0
    if (WIFEXITED(status)) {
1050
0
      char buf[4096];
1051
1052
0
      if (WEXITSTATUS(status)) {
1053
0
        int n;
1054
1055
0
        n = lxc_read_nointr(pipes[0], buf, sizeof(buf));
1056
0
        if (n < 0) {
1057
0
          SYSERROR("failed reading from criu stderr");
1058
0
          goto out_fini_handler;
1059
0
        }
1060
1061
0
        if (n == sizeof(buf))
1062
0
          n--;
1063
0
        buf[n] = 0;
1064
1065
0
        ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
1066
0
        goto out_fini_handler;
1067
0
      } else {
1068
0
        ret = strnprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
1069
0
        if (ret < 0) {
1070
0
          ERROR("strnprintf'd too many characters: %d", ret);
1071
0
          goto out_fini_handler;
1072
0
        }
1073
1074
0
        FILE *f = fopen(buf, "re");
1075
0
        if (!f) {
1076
0
          SYSERROR("couldn't read restore's children file %s", buf);
1077
0
          goto out_fini_handler;
1078
0
        }
1079
1080
0
        ret = fscanf(f, "%d", (int*) &handler->pid);
1081
0
        fclose(f);
1082
0
        if (ret != 1) {
1083
0
          ERROR("reading restore pid failed");
1084
0
          goto out_fini_handler;
1085
0
        }
1086
1087
0
        if (lxc_set_state(c->name, handler, RUNNING)) {
1088
0
          ERROR("error setting running state after restore");
1089
0
          goto out_fini_handler;
1090
0
        }
1091
0
      }
1092
0
    } else {
1093
0
      ERROR("CRIU was killed with signal %d", WTERMSIG(status));
1094
0
      goto out_fini_handler;
1095
0
    }
1096
1097
0
    close(pipes[0]);
1098
1099
0
    ret = lxc_write_nointr(status_pipe, &status, sizeof(status));
1100
0
    close(status_pipe);
1101
0
    status_pipe = -1;
1102
1103
0
    if (sizeof(status) != ret) {
1104
0
      SYSERROR("failed to write all of status");
1105
0
      goto out_fini_handler;
1106
0
    }
1107
1108
    /*
1109
     * See comment in lxcapi_start; we don't care if these
1110
     * fail because it's just a beauty thing. We just
1111
     * assign the return here to silence potential.
1112
     */
1113
0
    ret = strnprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
1114
0
    if (ret < 0)
1115
0
      INFO("Setting truncated process name");
1116
1117
0
    ret = setproctitle(title);
1118
0
    if (ret < 0)
1119
0
      INFO("Failed to set process name");
1120
1121
0
    ret = lxc_poll(c->name, handler);
1122
0
    if (ret)
1123
0
      lxc_abort(handler);
1124
0
    lxc_end(handler);
1125
0
    _exit(ret);
1126
0
  }
1127
1128
0
out_fini_handler:
1129
0
  if (pipes[0] >= 0)
1130
0
    close(pipes[0]);
1131
0
  if (pipes[1] >= 0)
1132
0
    close(pipes[1]);
1133
1134
0
  lxc_end(handler);
1135
1136
0
out:
1137
0
  if (status_pipe >= 0) {
1138
    /* ensure getting here was a failure, e.g. if we failed to
1139
     * parse the child pid or something, even after a successful
1140
     * restore
1141
     */
1142
0
    if (!status)
1143
0
      status = 1;
1144
1145
0
    if (lxc_write_nointr(status_pipe, &status, sizeof(status)) != sizeof(status))
1146
0
      SYSERROR("writing status failed");
1147
0
    close(status_pipe);
1148
0
  }
1149
1150
0
  _exit(EXIT_FAILURE);
1151
0
}
1152
1153
static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
1154
0
{
1155
0
  FILE *f;
1156
0
  char path[PATH_MAX];
1157
0
  int ret;
1158
0
  struct stat sb;
1159
1160
0
  if (c->lxc_conf->console.path && strequal(c->lxc_conf->console.path, "none")) {
1161
0
    tty_id[0] = 0;
1162
0
    return 0;
1163
0
  }
1164
1165
0
  ret = strnprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
1166
0
  if (ret < 0) {
1167
0
    ERROR("strnprintf'd too many characters: %d", ret);
1168
0
    return -1;
1169
0
  }
1170
1171
0
  ret = stat(path, &sb);
1172
0
  if (ret < 0) {
1173
0
    SYSERROR("stat of %s failed", path);
1174
0
    return -1;
1175
0
  }
1176
1177
0
  ret = strnprintf(path, sizeof(path), "%s/tty.info", directory);
1178
0
  if (ret < 0) {
1179
0
    ERROR("strnprintf'd too many characters: %d", ret);
1180
0
    return -1;
1181
0
  }
1182
1183
0
  ret = strnprintf(tty_id, len, "tty[%llx:%llx]",
1184
0
          (long long unsigned) sb.st_rdev,
1185
0
          (long long unsigned) sb.st_dev);
1186
0
  if (ret < 0) {
1187
0
    ERROR("strnprintf'd too many characters: %d", ret);
1188
0
    return -1;
1189
0
  }
1190
1191
0
  f = fopen(path, "we");
1192
0
  if (!f) {
1193
0
    SYSERROR("failed to open %s", path);
1194
0
    return -1;
1195
0
  }
1196
1197
0
  ret = fprintf(f, "%s", tty_id);
1198
0
  fclose(f);
1199
0
  if (ret < 0)
1200
0
    SYSERROR("failed to write to %s", path);
1201
0
  return ret;
1202
0
}
1203
1204
/* do one of either predump or a regular dump */
1205
static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
1206
0
{
1207
0
  int ret;
1208
0
  pid_t pid;
1209
0
  int criuout[2];
1210
0
  char *criu_version = NULL;
1211
1212
0
  if (!criu_ok(c, &criu_version))
1213
0
    return false;
1214
1215
0
  ret = pipe(criuout);
1216
0
  if (ret < 0) {
1217
0
    SYSERROR("pipe() failed");
1218
0
    free(criu_version);
1219
0
    return false;
1220
0
  }
1221
1222
0
  if (lxc_mkdir_p(opts->directory, 0700) < 0)
1223
0
    goto fail;
1224
1225
0
  pid = fork();
1226
0
  if (pid < 0) {
1227
0
    SYSERROR("fork failed");
1228
0
    goto fail;
1229
0
  }
1230
1231
0
  if (pid == 0) {
1232
0
    struct criu_opts os;
1233
0
    struct cgroup_ops *cgroup_ops;
1234
1235
0
    close(criuout[0]);
1236
1237
0
    cgroup_ops = cgroup_init(c->lxc_conf);
1238
0
    if (!cgroup_ops) {
1239
0
      ERROR("failed to cgroup_init()");
1240
0
      _exit(EXIT_FAILURE);
1241
0
    }
1242
1243
0
    os.pipefd = criuout[1];
1244
0
    os.action = mode;
1245
0
    os.user = opts;
1246
0
    os.c = c;
1247
0
    os.console_name = c->lxc_conf->console.path;
1248
0
    os.criu_version = criu_version;
1249
0
    os.handler = NULL;
1250
1251
0
    ret = save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id));
1252
0
    if (ret < 0) {
1253
0
      free(criu_version);
1254
0
      _exit(EXIT_FAILURE);
1255
0
    }
1256
1257
    /* exec_criu() returning is an error */
1258
0
    ret = exec_criu(cgroup_ops, c->lxc_conf, &os);
1259
0
    if (ret)
1260
0
      SYSERROR("Failed to execute criu");
1261
0
    free(criu_version);
1262
0
    _exit(EXIT_FAILURE);
1263
0
  } else {
1264
0
    int status;
1265
0
    ssize_t n;
1266
0
    char buf[4096];
1267
1268
0
    close(criuout[1]);
1269
1270
0
    pid_t w = waitpid(pid, &status, 0);
1271
0
    if (w == -1) {
1272
0
      SYSERROR("waitpid");
1273
0
      close(criuout[0]);
1274
0
      free(criu_version);
1275
0
      return false;
1276
0
    }
1277
1278
0
    n = lxc_read_nointr(criuout[0], buf, sizeof(buf));
1279
0
    close(criuout[0]);
1280
0
    if (n < 0) {
1281
0
      SYSERROR("read");
1282
0
      n = 0;
1283
0
    }
1284
1285
0
    if (n == sizeof(buf))
1286
0
      buf[n-1] = 0;
1287
0
    else
1288
0
      buf[n] = 0;
1289
1290
0
    if (WIFEXITED(status)) {
1291
0
      if (WEXITSTATUS(status)) {
1292
0
        ERROR("dump failed with %d", WEXITSTATUS(status));
1293
0
        ret = false;
1294
0
      } else {
1295
0
        ret = true;
1296
0
      }
1297
0
    } else if (WIFSIGNALED(status)) {
1298
0
      ERROR("dump signaled with %d", WTERMSIG(status));
1299
0
      ret = false;
1300
0
    } else {
1301
0
      ERROR("unknown dump exit %d", status);
1302
0
      ret = false;
1303
0
    }
1304
1305
0
    if (!ret)
1306
0
      ERROR("criu output: %s", buf);
1307
1308
0
    free(criu_version);
1309
0
    return ret;
1310
0
  }
1311
0
fail:
1312
0
  close(criuout[0]);
1313
0
  close(criuout[1]);
1314
0
  (void)rmdir(opts->directory);
1315
0
  free(criu_version);
1316
0
  return false;
1317
0
}
1318
1319
bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
1320
0
{
1321
0
  return do_dump(c, "pre-dump", opts);
1322
0
}
1323
1324
bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
1325
0
{
1326
0
  char path[PATH_MAX];
1327
0
  int ret;
1328
1329
0
  ret = strnprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
1330
0
  if (ret < 0)
1331
0
    return false;
1332
1333
0
  if (access(path, F_OK) == 0) {
1334
0
    ERROR("please use a fresh directory for the dump directory");
1335
0
    return false;
1336
0
  }
1337
1338
0
  return do_dump(c, "dump", opts);
1339
0
}
1340
1341
bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
1342
0
{
1343
0
  pid_t pid;
1344
0
  int status, nread;
1345
0
  int pipefd[2];
1346
0
  char *criu_version = NULL;
1347
1348
0
  if (geteuid()) {
1349
0
    ERROR("Must be root to restore");
1350
0
    return false;
1351
0
  }
1352
1353
0
  if (pipe(pipefd)) {
1354
0
    ERROR("failed to create pipe");
1355
0
    return false;
1356
0
  }
1357
1358
0
  if (!criu_ok(c, &criu_version)) {
1359
0
    close(pipefd[0]);
1360
0
    close(pipefd[1]);
1361
0
    return false;
1362
0
  }
1363
1364
0
  pid = fork();
1365
0
  if (pid < 0) {
1366
0
    close(pipefd[0]);
1367
0
    close(pipefd[1]);
1368
0
    free(criu_version);
1369
0
    return false;
1370
0
  }
1371
1372
0
  if (pid == 0) {
1373
0
    close(pipefd[0]);
1374
    /* this never returns */
1375
0
    do_restore(c, pipefd[1], opts, criu_version);
1376
0
  }
1377
1378
0
  close(pipefd[1]);
1379
0
  free(criu_version);
1380
1381
0
  nread = lxc_read_nointr(pipefd[0], &status, sizeof(status));
1382
0
  close(pipefd[0]);
1383
0
  if (sizeof(status) != nread) {
1384
0
    ERROR("reading status from pipe failed");
1385
0
    goto err_wait;
1386
0
  }
1387
1388
  /* If the criu process was killed or exited nonzero, wait() for the
1389
   * handler, since the restore process died. Otherwise, we don't need to
1390
   * wait, since the child becomes the monitor process.
1391
   */
1392
0
  if (!WIFEXITED(status) || WEXITSTATUS(status))
1393
0
    goto err_wait;
1394
0
  return true;
1395
1396
0
err_wait:
1397
0
  if (wait_for_pid(pid))
1398
0
    ERROR("restore process died");
1399
  return false;
1400
0
}