Coverage Report

Created: 2026-05-30 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dovecot/src/lib-index/mail-transaction-log-file.c
Line
Count
Source
1
/* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "array.h"
5
#include "ioloop.h"
6
#include "file-dotlock.h"
7
#include "nfs-workarounds.h"
8
#include "read-full.h"
9
#include "write-full.h"
10
#include "mmap-util.h"
11
#include "mail-index-private.h"
12
#include "mail-index-modseq.h"
13
#include "mail-transaction-log-private.h"
14
15
0
#define LOG_PREFETCH IO_BLOCK_SIZE
16
0
#define MEMORY_LOG_NAME "(in-memory transaction log file)"
17
0
#define LOG_NEW_DOTLOCK_SUFFIX ".newlock"
18
19
static int
20
mail_transaction_log_file_sync(struct mail_transaction_log_file *file,
21
             bool *retry_r, const char **reason_r);
22
23
static void
24
log_file_set_syscall_error(struct mail_transaction_log_file *file,
25
         const char *function)
26
0
{
27
0
  mail_index_file_set_syscall_error(file->log->index,
28
0
            file->filepath, function);
29
0
}
30
31
static void
32
mail_transaction_log_mark_corrupted(struct mail_transaction_log_file *file)
33
0
{
34
0
  unsigned int offset =
35
0
    offsetof(struct mail_transaction_log_header, indexid);
36
0
  int flags;
37
38
0
  if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file) ||
39
0
      file->log->index->readonly)
40
0
    return;
41
42
  /* indexid=0 marks the log file as corrupted. we opened the file with
43
     O_APPEND, and now we need to drop it for pwrite() to work (at least
44
     in Linux) */
45
0
  flags = fcntl(file->fd, F_GETFL, 0);
46
0
  if (flags < 0) {
47
0
    mail_index_file_set_syscall_error(file->log->index,
48
0
      file->filepath, "fcntl(F_GETFL)");
49
0
    return;
50
0
  }
51
0
  if (fcntl(file->fd, F_SETFL, flags & ~O_APPEND) < 0) {
52
0
    mail_index_file_set_syscall_error(file->log->index,
53
0
      file->filepath, "fcntl(F_SETFL)");
54
0
    return;
55
0
  }
56
0
  if (pwrite_full(file->fd, &file->hdr.indexid,
57
0
      sizeof(file->hdr.indexid), offset) < 0) {
58
0
    mail_index_file_set_syscall_error(file->log->index,
59
0
              file->filepath, "pwrite()");
60
0
  }
61
0
}
62
63
void
64
mail_transaction_log_file_set_corrupted(struct mail_transaction_log_file *file,
65
          const char *fmt, ...)
66
0
{
67
0
  va_list va;
68
69
0
  file->corrupted = TRUE;
70
0
  file->hdr.indexid = 0;
71
0
  mail_transaction_log_mark_corrupted(file);
72
73
0
  va_start(va, fmt);
74
0
  T_BEGIN {
75
0
    mail_index_set_error(file->log->index,
76
0
      "Corrupted transaction log file %s seq %u: %s "
77
0
      "(sync_offset=%"PRIuUOFF_T")",
78
0
      file->filepath, file->hdr.file_seq,
79
0
      t_strdup_vprintf(fmt, va), file->sync_offset);
80
0
  } T_END;
81
0
  va_end(va);
82
0
}
83
84
struct mail_transaction_log_file *
85
mail_transaction_log_file_alloc(struct mail_transaction_log *log,
86
        const char *path)
87
0
{
88
0
  struct mail_transaction_log_file *file;
89
90
0
  file = i_new(struct mail_transaction_log_file, 1);
91
0
  file->log = log;
92
0
  file->filepath = i_strdup(path);
93
0
  file->fd = -1;
94
0
  return file;
95
0
}
96
97
void mail_transaction_log_file_free(struct mail_transaction_log_file **_file)
98
0
{
99
0
  struct mail_transaction_log_file *file = *_file;
100
0
  struct mail_transaction_log_file **p;
101
0
  int old_errno = errno;
102
103
0
  *_file = NULL;
104
105
0
  i_assert(!file->locked);
106
0
  i_assert(file->refcount == 0);
107
108
0
  for (p = &file->log->files; *p != NULL; p = &(*p)->next) {
109
0
    if (*p == file) {
110
0
      *p = file->next;
111
0
      break;
112
0
    }
113
0
  }
114
115
0
  if (file == file->log->head)
116
0
    file->log->head = NULL;
117
118
0
  buffer_free(&file->buffer);
119
120
0
  if (file->mmap_base != NULL) {
121
0
    if (munmap(file->mmap_base, file->mmap_size) < 0)
122
0
      log_file_set_syscall_error(file, "munmap()");
123
0
  }
124
125
0
  if (file->fd != -1) {
126
0
    if (close(file->fd) < 0)
127
0
      log_file_set_syscall_error(file, "close()");
128
0
  }
129
130
0
  i_free(file->filepath);
131
0
  i_free(file->need_rotate);
132
0
        i_free(file);
133
134
0
        errno = old_errno;
135
0
}
136
137
static void
138
mail_transaction_log_file_skip_to_head(struct mail_transaction_log_file *file)
139
0
{
140
0
  struct mail_transaction_log *log = file->log;
141
0
  struct mail_index_map *map = log->index->map;
142
0
  const struct mail_index_modseq_header *modseq_hdr;
143
0
  uoff_t head_offset;
144
145
0
  if (map == NULL || file->hdr.file_seq != map->hdr.log_file_seq ||
146
0
      map->hdr.log_file_head_offset == 0)
147
0
    return;
148
149
  /* we can get a valid log offset from index file. initialize
150
     sync_offset from it so we don't have to read the whole log
151
     file from beginning. */
152
0
  head_offset = map->hdr.log_file_head_offset;
153
154
0
  modseq_hdr = mail_index_map_get_modseq_header(map);
155
0
  if (head_offset < file->hdr.hdr_size) {
156
0
    mail_index_set_error(log->index,
157
0
             "%s: log_file_head_offset too small",
158
0
             log->index->filepath);
159
0
    file->sync_offset = file->hdr.hdr_size;
160
0
    file->sync_highest_modseq = file->hdr.initial_modseq;
161
0
  } else if (modseq_hdr == NULL && file->hdr.initial_modseq == 0) {
162
    /* modseqs not used yet */
163
0
    file->sync_offset = head_offset;
164
0
    file->sync_highest_modseq = 0;
165
0
  } else if (modseq_hdr == NULL ||
166
0
       modseq_hdr->log_seq != file->hdr.file_seq) {
167
    /* highest_modseq not synced, start from beginning */
168
0
    file->sync_offset = file->hdr.hdr_size;
169
0
    file->sync_highest_modseq = file->hdr.initial_modseq;
170
0
  } else if (modseq_hdr->log_offset > head_offset) {
171
0
    mail_index_set_error(log->index,
172
0
             "%s: modseq_hdr.log_offset too large",
173
0
             log->index->filepath);
174
0
    file->sync_offset = file->hdr.hdr_size;
175
0
    file->sync_highest_modseq = file->hdr.initial_modseq;
176
0
  } else {
177
    /* start from where we last stopped tracking modseqs */
178
0
    file->sync_offset = modseq_hdr->log_offset;
179
0
    file->sync_highest_modseq = modseq_hdr->highest_modseq;
180
0
  }
181
0
  if (file->hdr.file_seq == log->index->map->hdr.log_file_seq) {
182
0
    file->last_read_hdr_tail_offset =
183
0
      log->index->map->hdr.log_file_tail_offset;
184
0
  }
185
0
  if (file->last_read_hdr_tail_offset > file->max_tail_offset)
186
0
    file->max_tail_offset = file->last_read_hdr_tail_offset;
187
0
}
188
189
static void
190
mail_transaction_log_file_add_to_list(struct mail_transaction_log_file *file)
191
0
{
192
0
  struct mail_transaction_log_file **p;
193
0
  const char *reason;
194
0
  bool retry;
195
196
0
  file->sync_offset = file->hdr.hdr_size;
197
0
  file->sync_highest_modseq = file->hdr.initial_modseq;
198
0
  mail_transaction_log_file_skip_to_head(file);
199
200
  /* insert it to correct position */
201
0
  for (p = &file->log->files; *p != NULL; p = &(*p)->next) {
202
0
    if ((*p)->hdr.file_seq > file->hdr.file_seq)
203
0
      break;
204
0
    i_assert((*p)->hdr.file_seq < file->hdr.file_seq);
205
0
  }
206
207
0
  file->next = *p;
208
0
  *p = file;
209
210
0
  if (file->buffer != NULL) {
211
    /* if we read any unfinished data, make sure the buffer gets
212
       truncated. */
213
0
    (void)mail_transaction_log_file_sync(file, &retry, &reason);
214
0
    buffer_set_used_size(file->buffer,
215
0
             file->sync_offset - file->buffer_offset);
216
0
  }
217
0
}
218
219
static int
220
mail_transaction_log_init_hdr(struct mail_transaction_log *log,
221
            struct mail_transaction_log_header *hdr)
222
0
{
223
0
  struct mail_index *index = log->index;
224
0
  struct mail_transaction_log_file *file;
225
226
0
  i_assert(index->indexid != 0);
227
228
0
  i_zero(hdr);
229
0
  hdr->major_version = MAIL_TRANSACTION_LOG_MAJOR_VERSION;
230
0
  hdr->minor_version = MAIL_TRANSACTION_LOG_MINOR_VERSION;
231
0
  hdr->hdr_size = sizeof(struct mail_transaction_log_header);
232
0
  hdr->indexid = log->index->indexid;
233
0
  hdr->create_stamp = ioloop_time32;
234
0
#ifndef WORDS_BIGENDIAN
235
0
  hdr->compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
236
0
#endif
237
238
0
  if (index->fd != -1) {
239
    /* not creating index - make sure we have latest header */
240
0
    if (!index->mapping) {
241
0
      if (mail_index_map(index,
242
0
             MAIL_INDEX_SYNC_HANDLER_HEAD) <= 0)
243
0
        return -1;
244
0
    } else {
245
      /* if we got here from mapping, the .log file is
246
         corrupted. use whatever values we got from index
247
         file */
248
0
    }
249
0
  }
250
0
  if (index->map != NULL) {
251
0
    hdr->prev_file_seq = index->map->hdr.log_file_seq;
252
0
    hdr->prev_file_offset = index->map->hdr.log_file_head_offset;
253
0
    hdr->file_seq = index->map->hdr.log_file_seq + 1;
254
0
    hdr->initial_modseq =
255
0
      mail_index_map_modseq_get_highest(index->map);
256
0
  } else {
257
0
    hdr->file_seq = 1;
258
0
  }
259
0
  if (hdr->initial_modseq == 0) {
260
    /* modseq tracking in log files is required for many reasons
261
       nowadays, even if per-message modseqs aren't enabled in
262
       dovecot.index. */
263
0
    hdr->initial_modseq = 1;
264
0
  }
265
266
0
  if (log->head != NULL) {
267
    /* make sure the sequence always increases to avoid crashes
268
       later. this catches the buggy case where two processes
269
       happen to replace the same log file. */
270
0
    for (file = log->head->next; file != NULL; file = file->next) {
271
0
      if (hdr->file_seq <= file->hdr.file_seq)
272
0
        hdr->file_seq = file->hdr.file_seq + 1;
273
0
    }
274
275
0
    if (hdr->file_seq <= log->head->hdr.file_seq) {
276
      /* make sure the sequence grows */
277
0
      hdr->file_seq = log->head->hdr.file_seq+1;
278
0
    }
279
0
    if (hdr->initial_modseq < log->head->sync_highest_modseq) {
280
      /* this should be always up-to-date */
281
0
      hdr->initial_modseq = log->head->sync_highest_modseq;
282
0
    }
283
0
  }
284
0
  return 0;
285
0
}
286
287
struct mail_transaction_log_file *
288
mail_transaction_log_file_alloc_in_memory(struct mail_transaction_log *log)
289
0
{
290
0
  struct mail_transaction_log_file *file;
291
292
0
  file = mail_transaction_log_file_alloc(log, MEMORY_LOG_NAME);
293
0
  if (mail_transaction_log_init_hdr(log, &file->hdr) < 0) {
294
0
    i_free(file);
295
0
    return NULL;
296
0
  }
297
298
0
  file->buffer = buffer_create_dynamic(default_pool, 4096);
299
0
  file->buffer_offset = sizeof(file->hdr);
300
301
0
  mail_transaction_log_file_add_to_list(file);
302
0
  return file;
303
0
}
304
305
static int
306
mail_transaction_log_file_dotlock(struct mail_transaction_log_file *file)
307
0
{
308
0
  struct dotlock_settings dotlock_set;
309
0
  int ret;
310
311
0
  if (file->log->dotlock_refcount > 0)
312
0
    ret = 1;
313
0
  else {
314
0
    i_assert(file->log->dotlock_refcount == 0);
315
0
    mail_transaction_log_get_dotlock_set(file->log, &dotlock_set);
316
0
    ret = file_dotlock_create(&dotlock_set, file->filepath, 0,
317
0
            &file->log->dotlock);
318
0
  }
319
0
  if (ret > 0) {
320
0
    file->log->dotlock_refcount++;
321
0
    file->locked = TRUE;
322
0
    file->lock_create_time = time(NULL);
323
0
    return 0;
324
0
  }
325
0
  if (ret < 0) {
326
0
    log_file_set_syscall_error(file, "file_dotlock_create()");
327
0
    return -1;
328
0
  }
329
330
0
  mail_index_set_error(file->log->index,
331
0
           "Timeout (%us) while waiting for "
332
0
           "dotlock for transaction log file %s",
333
0
           dotlock_set.timeout, file->filepath);
334
0
  return -1;
335
0
}
336
337
static int
338
mail_transaction_log_file_undotlock(struct mail_transaction_log_file *file)
339
0
{
340
0
  int ret;
341
342
0
  i_assert(file->log->dotlock_refcount >= 0);
343
0
  if (--file->log->dotlock_refcount > 0)
344
0
    return 0;
345
346
0
  ret = file_dotlock_delete(&file->log->dotlock);
347
0
  if (ret < 0) {
348
0
    log_file_set_syscall_error(file, "file_dotlock_delete()");
349
0
    return -1;
350
0
  }
351
352
0
  if (ret == 0) {
353
0
    mail_index_set_error(file->log->index,
354
0
      "Dotlock was lost for transaction log file %s",
355
0
      file->filepath);
356
0
    return -1;
357
0
  }
358
0
  return 0;
359
0
}
360
361
int mail_transaction_log_file_lock(struct mail_transaction_log_file *file)
362
0
{
363
0
  struct mail_index *index = file->log->index;
364
0
  unsigned int lock_timeout_secs;
365
0
  int ret;
366
367
0
  if (file->locked)
368
0
    return 0;
369
370
0
  if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
371
0
    file->locked = TRUE;
372
0
    return 0;
373
0
  }
374
375
0
  if (index->readonly) {
376
0
    mail_index_set_error_code(
377
0
      index, MAIL_INDEX_ERROR_CODE_NO_ACCESS,
378
0
      "Index is read-only, can't write-lock %s",
379
0
      file->filepath);
380
0
    return -1;
381
0
  }
382
383
0
  if (index->set.lock_method == FILE_LOCK_METHOD_DOTLOCK)
384
0
    return mail_transaction_log_file_dotlock(file);
385
386
0
  i_assert(file->file_lock == NULL);
387
0
  lock_timeout_secs = I_MIN(MAIL_TRANSACTION_LOG_LOCK_TIMEOUT,
388
0
          index->set.max_lock_timeout_secs);
389
0
  ret = mail_index_lock_fd(index, file->filepath, file->fd,
390
0
         F_WRLCK, lock_timeout_secs,
391
0
         &file->file_lock);
392
0
  if (ret > 0) {
393
0
    file->locked = TRUE;
394
0
    file->lock_create_time = time(NULL);
395
0
    return 0;
396
0
  }
397
0
  if (ret < 0) {
398
0
    log_file_set_syscall_error(file, "mail_index_wait_lock_fd()");
399
0
    return -1;
400
0
  }
401
402
0
  mail_index_set_error(index,
403
0
    "Timeout (%us) while waiting for lock for "
404
0
    "transaction log file %s%s",
405
0
    lock_timeout_secs, file->filepath,
406
0
    file_lock_find(file->fd, index->set.lock_method, F_WRLCK));
407
0
  return -1;
408
0
}
409
410
void mail_transaction_log_file_unlock(struct mail_transaction_log_file *file,
411
              const char *lock_reason)
412
0
{
413
0
  unsigned int lock_time;
414
415
0
  if (!file->locked)
416
0
    return;
417
418
0
  file->locked = FALSE;
419
0
  file->locked_sync_offset_updated = FALSE;
420
421
0
  if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file))
422
0
    return;
423
424
0
  lock_time = time(NULL) - file->lock_create_time;
425
0
  if (lock_time >= MAIL_TRANSACTION_LOG_LOCK_WARN_SECS && lock_reason != NULL) {
426
0
    e_warning(file->log->index->event,
427
0
        "Transaction log file %s was locked for %u seconds (%s)",
428
0
        file->filepath, lock_time, lock_reason);
429
0
  }
430
431
0
  if (file->log->index->set.lock_method == FILE_LOCK_METHOD_DOTLOCK) {
432
0
    (void)mail_transaction_log_file_undotlock(file);
433
0
    return;
434
0
  }
435
436
0
  file_unlock(&file->file_lock);
437
0
}
438
439
static ssize_t
440
mail_transaction_log_file_read_header(struct mail_transaction_log_file *file)
441
0
{
442
0
  void *dest;
443
0
  size_t pos, dest_size;
444
0
  ssize_t ret;
445
446
0
  i_assert(file->buffer == NULL && file->mmap_base == NULL);
447
448
0
  i_zero(&file->hdr);
449
0
  if (file->last_size < mmap_get_page_size() && file->last_size > 0) {
450
    /* just read the entire transaction log to memory.
451
       note that if some of the data hasn't been fully committed
452
       yet (hdr.size=0), the buffer must be truncated later */
453
0
    file->buffer = buffer_create_dynamic(default_pool, 4096);
454
0
    file->buffer_offset = 0;
455
0
    dest_size = file->last_size;
456
0
    dest = buffer_append_space_unsafe(file->buffer, dest_size);
457
0
  } else {
458
    /* read only the header */
459
0
    dest = &file->hdr;
460
0
    dest_size = sizeof(file->hdr);
461
0
  }
462
463
  /* it's not necessarily an error to read less than wanted header size,
464
     since older versions of the log format used smaller headers. */
465
0
        pos = 0;
466
0
  do {
467
0
    ret = pread(file->fd, PTR_OFFSET(dest, pos),
468
0
          dest_size - pos, pos);
469
0
    if (ret > 0)
470
0
      pos += ret;
471
0
  } while (ret > 0 && pos < dest_size);
472
473
0
  if (file->buffer != NULL) {
474
0
    buffer_set_used_size(file->buffer, pos);
475
0
    memcpy(&file->hdr, file->buffer->data,
476
0
           I_MIN(pos, sizeof(file->hdr)));
477
0
  }
478
479
0
  return ret < 0 ? -1 : (ssize_t)pos;
480
0
}
481
482
static int
483
mail_transaction_log_file_fail_dupe(struct mail_transaction_log_file *file)
484
0
{
485
0
  int ret;
486
487
  /* mark the old file corrupted. we can't safely remove
488
     it from the list however, so return failure. */
489
0
  file->hdr.indexid = 0;
490
0
  if (strcmp(file->filepath, file->log->head->filepath) != 0) {
491
    /* only mark .2 corrupted, just to make sure we don't lose any
492
       changes from .log in case we're somehow wrong */
493
0
    mail_transaction_log_mark_corrupted(file);
494
0
    ret = 0;
495
0
  } else {
496
0
    ret = -1;
497
0
  }
498
0
  if (!file->corrupted) {
499
0
    file->corrupted = TRUE;
500
0
    mail_index_set_error(file->log->index,
501
0
             "Transaction log %s: "
502
0
             "duplicate transaction log sequence (%u)",
503
0
             file->filepath, file->hdr.file_seq);
504
0
  }
505
0
  return ret;
506
0
}
507
508
static int
509
mail_transaction_log_file_read_hdr(struct mail_transaction_log_file *file,
510
           bool ignore_estale)
511
0
{
512
0
        struct mail_transaction_log_file *f;
513
0
  int ret;
514
515
0
  i_assert(!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file));
516
517
0
  if (file->corrupted)
518
0
    return 0;
519
520
0
  ret = mail_transaction_log_file_read_header(file);
521
0
  if (ret < 0) {
522
0
                if (errno != ESTALE || !ignore_estale)
523
0
      log_file_set_syscall_error(file, "pread()");
524
0
    return -1;
525
0
  }
526
0
  if (file->hdr.major_version != MAIL_TRANSACTION_LOG_MAJOR_VERSION) {
527
    /* incompatible version - fix silently */
528
0
    return 0;
529
0
  }
530
0
  if (ret < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) {
531
0
    mail_transaction_log_file_set_corrupted(file,
532
0
      "unexpected end of file while reading header");
533
0
    return 0;
534
0
  }
535
536
0
  const unsigned int hdr_version =
537
0
    MAIL_TRANSACTION_LOG_HDR_VERSION(&file->hdr);
538
0
  if (MAIL_TRANSACTION_LOG_VERSION_HAVE(hdr_version, COMPAT_FLAGS)) {
539
    /* we have compatibility flags */
540
0
    enum mail_index_header_compat_flags compat_flags = 0;
541
542
0
#ifndef WORDS_BIGENDIAN
543
0
    compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
544
0
#endif
545
0
    if (file->hdr.compat_flags != compat_flags) {
546
      /* architecture change */
547
0
      mail_index_set_error(file->log->index,
548
0
               "Rebuilding index file %s: "
549
0
               "CPU architecture changed",
550
0
               file->log->index->filepath);
551
0
      return 0;
552
0
    }
553
0
  }
554
0
  if (file->hdr.hdr_size < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) {
555
0
    mail_transaction_log_file_set_corrupted(file,
556
0
      "Header size too small");
557
0
    return 0;
558
0
  }
559
0
  if (file->hdr.hdr_size < sizeof(file->hdr)) {
560
    /* @UNSAFE: smaller than we expected - zero out the fields we
561
       shouldn't have filled */
562
0
    memset(PTR_OFFSET(&file->hdr, file->hdr.hdr_size), 0,
563
0
           sizeof(file->hdr) - file->hdr.hdr_size);
564
0
  }
565
566
0
  if (file->hdr.indexid == 0) {
567
    /* corrupted */
568
0
    file->corrupted = TRUE;
569
0
    mail_index_set_error(file->log->index,
570
0
      "Transaction log file %s: marked corrupted",
571
0
      file->filepath);
572
0
    return 0;
573
0
  }
574
0
  if (file->hdr.indexid != file->log->index->indexid) {
575
0
    if (file->log->index->indexid == 0 ||
576
0
        file->log->index->initial_create)
577
0
      ;
578
0
    else if (strcmp(file->filepath, file->log->filepath2) == 0) {
579
      /* .log.2 has a different indexid. This is rather
580
         unlikely situation to notice. We'll handle it by
581
         deleting the .log.2 so a permanently wrong indexid
582
         gets fixed automatically. Since .log.2 doesn't
583
         contain anything critical, it's not so bad even if
584
         the deletion wasn't really necessary. */
585
0
      mail_transaction_log_file_set_corrupted(file,
586
0
        "indexid changed: %u -> %u - deleting",
587
0
        file->log->index->indexid, file->hdr.indexid);
588
0
      return 0;
589
0
    } else {
590
      /* Index was just rebuilt, possibly because the whole
591
         mailbox was recreated under us. Handle it the same
592
         as if the mailbox directory had been deleted. */
593
0
      e_debug(file->log->index->event,
594
0
        "Transaction log file %s indexid changed: %u -> %u",
595
0
        file->filepath,
596
0
        file->log->index->indexid, file->hdr.indexid);
597
0
      file->log->index->index_deleted = TRUE;
598
0
      errno = ENOENT;
599
0
      return -1;
600
0
    }
601
602
    /* creating index file. since transaction log is created
603
       first, use the indexid in it to create the main index
604
       to avoid races. */
605
0
    file->log->index->indexid = file->hdr.indexid;
606
0
  }
607
608
  /* make sure we already don't have a file with the same sequence
609
     opened. it shouldn't happen unless the old log file was
610
     corrupted. */
611
0
  for (f = file->log->files; f != NULL; f = f->next) {
612
0
    if (f->hdr.file_seq == file->hdr.file_seq) {
613
0
      if (strcmp(f->filepath, f->log->head->filepath) != 0) {
614
        /* old "f" is the .log.2 */
615
0
        return mail_transaction_log_file_fail_dupe(f);
616
0
      } else {
617
        /* new "file" is probably the .log.2 */
618
0
        return mail_transaction_log_file_fail_dupe(file);
619
0
      }
620
0
    }
621
0
  }
622
623
0
  file->sync_highest_modseq = file->hdr.initial_modseq;
624
0
  return 1;
625
0
}
626
627
static int
628
mail_transaction_log_file_stat(struct mail_transaction_log_file *file,
629
             bool ignore_estale)
630
0
{
631
0
  struct stat st;
632
633
0
  if (fstat(file->fd, &st) < 0) {
634
0
                if (!ESTALE_FSTAT(errno) || !ignore_estale)
635
0
      log_file_set_syscall_error(file, "fstat()");
636
0
    return -1;
637
0
  }
638
639
0
  file->st_dev = st.st_dev;
640
0
  file->st_ino = st.st_ino;
641
0
  file->last_mtime = st.st_mtime;
642
0
  file->last_size = st.st_size;
643
0
  return 0;
644
0
}
645
646
static bool
647
mail_transaction_log_file_is_dupe(struct mail_transaction_log_file *file)
648
0
{
649
0
  struct mail_transaction_log_file *tmp;
650
651
0
  for (tmp = file->log->files; tmp != NULL; tmp = tmp->next) {
652
0
    if (tmp->st_ino == file->st_ino &&
653
0
        CMP_DEV_T(tmp->st_dev, file->st_dev))
654
0
      return TRUE;
655
0
  }
656
0
  return FALSE;
657
0
}
658
659
static void log_write_ext_hdr_init_data(struct mail_index *index, buffer_t *buf)
660
0
{
661
0
  const struct mail_index_registered_ext *rext;
662
0
  struct mail_transaction_header *hdr;
663
0
  struct mail_transaction_ext_intro *intro;
664
0
  struct mail_transaction_ext_hdr_update *ext_hdr;
665
0
  unsigned int hdr_offset;
666
667
0
  rext = array_idx(&index->extensions, index->set.ext_hdr_init_id);
668
669
  /* introduce the extension */
670
0
  hdr_offset = buf->used;
671
0
  hdr = buffer_append_space_unsafe(buf, sizeof(*hdr));
672
0
  hdr->type = MAIL_TRANSACTION_EXT_INTRO;
673
674
0
  intro = buffer_append_space_unsafe(buf, sizeof(*intro));
675
0
  intro->ext_id = (uint32_t)-1;
676
0
  intro->hdr_size = rext->hdr_size;
677
0
  intro->record_size = rext->record_size;
678
0
  intro->record_align = rext->record_align;
679
0
  intro->name_size = strlen(rext->name);
680
0
  buffer_append(buf, rext->name, intro->name_size);
681
0
  if (buf->used % 4 != 0)
682
0
    buffer_append_zero(buf, 4 - buf->used % 4);
683
684
0
  hdr = buffer_get_space_unsafe(buf, hdr_offset, sizeof(*hdr));
685
0
  hdr->size = mail_index_uint32_to_offset(buf->used - hdr_offset);
686
687
  /* add the extension header data */
688
0
  hdr_offset = buf->used;
689
0
  hdr = buffer_append_space_unsafe(buf, sizeof(*hdr));
690
0
  hdr->type = MAIL_TRANSACTION_EXT_HDR_UPDATE;
691
692
0
  ext_hdr = buffer_append_space_unsafe(buf, sizeof(*ext_hdr));
693
0
  ext_hdr->size = rext->hdr_size;
694
0
  buffer_append(buf, index->set.ext_hdr_init_data, rext->hdr_size);
695
696
0
  hdr = buffer_get_space_unsafe(buf, hdr_offset, sizeof(*hdr));
697
0
  hdr->size = mail_index_uint32_to_offset(buf->used - hdr_offset);
698
0
}
699
700
static int
701
mail_transaction_log_file_create2(struct mail_transaction_log_file *file,
702
          int new_fd, bool reset,
703
          struct dotlock **dotlock)
704
0
{
705
0
  struct mail_index *index = file->log->index;
706
0
  struct stat st;
707
0
  const char *path2;
708
0
  buffer_t *writebuf;
709
0
  int fd, ret;
710
0
  bool rename_existing, need_lock;
711
712
0
  need_lock = file->log->head != NULL && file->log->head->locked;
713
714
0
  if (fcntl(new_fd, F_SETFL, O_APPEND) < 0) {
715
0
    log_file_set_syscall_error(file, "fcntl(O_APPEND)");
716
0
    return -1;
717
0
  }
718
719
0
  if ((index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0) {
720
    /* although we check also mtime and file size below, it's done
721
       only to fix broken log files. we don't bother flushing
722
       attribute cache just for that. */
723
0
    nfs_flush_file_handle_cache(file->filepath);
724
0
  }
725
726
  /* log creation is locked now - see if someone already created it.
727
     note that if we're rotating, we need to keep the log locked until
728
     the file has been rewritten. and because fcntl() locks are stupid,
729
     if we go and open()+close() the file and we had it already opened,
730
     its locks are lost. so we use stat() to check if the file has been
731
     recreated, although it almost never is. */
732
0
  if (reset)
733
0
    rename_existing = FALSE;
734
0
  else if (nfs_safe_stat(file->filepath, &st) < 0) {
735
0
    if (errno != ENOENT) {
736
0
      log_file_set_syscall_error(file, "stat()");
737
0
      return -1;
738
0
    }
739
0
    rename_existing = FALSE;
740
0
  } else if (st.st_ino == file->st_ino &&
741
0
       CMP_DEV_T(st.st_dev, file->st_dev) &&
742
       /* inode/dev checks are enough when we're rotating the file,
743
          but not when we're replacing a broken log file */
744
0
       st.st_mtime == file->last_mtime &&
745
0
       (uoff_t)st.st_size == file->last_size) {
746
    /* no-one else recreated the file */
747
0
    rename_existing = TRUE;
748
0
  } else {
749
    /* recreated. use the file if its header is ok */
750
0
    fd = nfs_safe_open(file->filepath, O_RDWR | O_APPEND);
751
0
    if (fd == -1) {
752
0
      if (errno != ENOENT) {
753
0
        log_file_set_syscall_error(file, "open()");
754
0
        return -1;
755
0
      }
756
0
    } else {
757
0
      file->fd = fd;
758
0
      file->last_size = 0;
759
0
      if (mail_transaction_log_file_read_hdr(file,
760
0
                     FALSE) > 0 &&
761
0
          mail_transaction_log_file_stat(file, FALSE) == 0) {
762
        /* yes, it was ok */
763
0
        file_dotlock_delete(dotlock);
764
0
        mail_transaction_log_file_add_to_list(file);
765
0
        return 0;
766
0
      }
767
0
      file->fd = -1;
768
0
      if (close(fd) < 0)
769
0
        log_file_set_syscall_error(file, "close()");
770
0
    }
771
0
    rename_existing = FALSE;
772
0
  }
773
774
0
  if (index->fd == -1 && !rename_existing) {
775
    /* creating the initial index */
776
0
    reset = TRUE;
777
0
  }
778
779
0
  if (mail_transaction_log_init_hdr(file->log, &file->hdr) < 0)
780
0
    return -1;
781
782
0
  if (reset) {
783
    /* don't reset modseqs. if we're resetting due to rebuilding
784
       indexes we'll probably want to keep uidvalidity and in such
785
       cases we really don't want to shrink modseqs. */
786
0
    file->hdr.prev_file_seq = 0;
787
0
    file->hdr.prev_file_offset = 0;
788
0
  }
789
790
0
  writebuf = t_buffer_create(128);
791
0
  buffer_append(writebuf, &file->hdr, sizeof(file->hdr));
792
793
0
  if (index->set.ext_hdr_init_data != NULL && reset)
794
0
    log_write_ext_hdr_init_data(index, writebuf);
795
0
  if (write_full(new_fd, writebuf->data, writebuf->used) < 0) {
796
0
    log_file_set_syscall_error(file, "write_full()");
797
0
    return -1;
798
0
  }
799
800
0
  if (file->log->index->set.fsync_mode == FSYNC_MODE_ALWAYS) {
801
    /* the header isn't important, so don't bother calling
802
       fdatasync() unless it's required */
803
0
    if (fdatasync(new_fd) < 0) {
804
0
      log_file_set_syscall_error(file, "fdatasync()");
805
0
      return -1;
806
0
    }
807
0
  }
808
809
0
  file->fd = new_fd;
810
0
  ret = mail_transaction_log_file_stat(file, FALSE);
811
812
0
  if (need_lock && ret == 0) {
813
    /* we'll need to preserve the lock */
814
0
    if (mail_transaction_log_file_lock(file) < 0)
815
0
      ret = -1;
816
0
  }
817
818
  /* if we return -1 the dotlock deletion code closes the fd */
819
0
  file->fd = -1;
820
0
  if (ret < 0)
821
0
    return -1;
822
823
  /* keep two log files */
824
0
  if (rename_existing) {
825
    /* rename() would be nice and easy way to do this, except then
826
       there's a race condition between the rename and
827
       file_dotlock_replace(). during that time the log file
828
       doesn't exist, which could cause problems. */
829
0
    path2 = t_strconcat(file->filepath, ".2", NULL);
830
0
    if (i_unlink_if_exists(path2) < 0) {
831
      /* try to link() anyway */
832
0
    }
833
0
    if (nfs_safe_link(file->filepath, path2, FALSE) < 0 &&
834
0
        errno != ENOENT && errno != EEXIST) {
835
0
                        mail_index_set_error(index, "link(%s, %s) failed: %m",
836
0
               file->filepath, path2);
837
      /* ignore the error. we don't care that much about the
838
         second log file and we're going to overwrite this
839
         first one. */
840
0
    }
841
    /* NOTE: here's a race condition where both .log and .log.2
842
       point to the same file. our reading code should ignore that
843
       though by comparing the inodes. */
844
0
  }
845
846
0
  if (file_dotlock_replace(dotlock,
847
0
         DOTLOCK_REPLACE_FLAG_DONT_CLOSE_FD) <= 0) {
848
    /* need to unlock to avoid assert-crash in
849
       mail_transaction_log_file_free() */
850
0
    mail_transaction_log_file_unlock(file, "creation failed");
851
0
    return -1;
852
0
  }
853
854
  /* success */
855
0
  file->fd = new_fd;
856
0
  mail_transaction_log_file_add_to_list(file);
857
858
0
  i_assert(!need_lock || file->locked);
859
0
  return 1;
860
0
}
861
862
int mail_transaction_log_file_create(struct mail_transaction_log_file *file,
863
             bool reset)
864
0
{
865
0
  struct mail_index *index = file->log->index;
866
0
  struct dotlock_settings new_dotlock_set;
867
0
  struct dotlock *dotlock;
868
0
  mode_t old_mask;
869
0
  int fd, ret;
870
871
0
  i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
872
873
0
  if (file->log->index->readonly) {
874
0
    mail_index_set_error(index,
875
0
      "Can't create log file %s: Index is read-only",
876
0
      file->filepath);
877
0
    return -1;
878
0
  }
879
880
0
  if (index->indexid == 0) {
881
0
    mail_index_set_error(index,
882
0
      "Can't create log file %s: Index is marked corrupted",
883
0
      file->filepath);
884
0
    return -1;
885
0
  }
886
887
0
  mail_transaction_log_get_dotlock_set(file->log, &new_dotlock_set);
888
0
  new_dotlock_set.lock_suffix = LOG_NEW_DOTLOCK_SUFFIX;
889
890
  /* With dotlocking we might already have path.lock created, so this
891
     filename has to be different. */
892
0
  old_mask = umask(index->set.mode ^ 0666);
893
0
  fd = file_dotlock_open(&new_dotlock_set, file->filepath, 0, &dotlock);
894
0
  umask(old_mask);
895
896
0
  if (fd == -1) {
897
0
    log_file_set_syscall_error(file, "file_dotlock_open()");
898
0
    return -1;
899
0
  }
900
0
  mail_index_fchown(index, fd, file_dotlock_get_lock_path(dotlock));
901
902
        /* either fd gets used or the dotlock gets deleted and returned fd
903
           is for the existing file */
904
0
  ret = mail_transaction_log_file_create2(file, fd, reset, &dotlock);
905
0
  if (ret < 0) {
906
0
    if (dotlock != NULL)
907
0
      file_dotlock_delete(&dotlock);
908
0
    return -1;
909
0
  }
910
0
  return ret;
911
0
}
912
913
int mail_transaction_log_file_open(struct mail_transaction_log_file *file,
914
           const char **reason_r)
915
0
{
916
0
  struct mail_index *index = file->log->index;
917
0
        unsigned int i;
918
0
  bool ignore_estale;
919
0
  int ret;
920
921
0
        for (i = 0;; i++) {
922
0
    if (!index->readonly) {
923
0
      file->fd = nfs_safe_open(file->filepath,
924
0
             O_RDWR | O_APPEND);
925
0
    } else {
926
0
      file->fd = nfs_safe_open(file->filepath, O_RDONLY);
927
0
    }
928
0
    if (file->fd == -1 && ENOACCESS(errno)) {
929
0
      file->fd = nfs_safe_open(file->filepath, O_RDONLY);
930
0
      index->readonly = TRUE;
931
0
    }
932
0
    if (file->fd == -1) {
933
0
      if (errno == ENOENT) {
934
0
        *reason_r = "File doesn't exist";
935
0
        return 0;
936
0
      }
937
938
0
      log_file_set_syscall_error(file, "open()");
939
0
      *reason_r = t_strdup_printf("open() failed: %m");
940
0
      return -1;
941
0
                }
942
943
0
    ignore_estale = i < MAIL_INDEX_ESTALE_RETRY_COUNT;
944
0
    if (mail_transaction_log_file_stat(file, ignore_estale) < 0)
945
0
      ret = -1;
946
0
    else if (mail_transaction_log_file_is_dupe(file)) {
947
      /* probably our already opened .log file has been
948
         renamed to .log.2 and we're trying to reopen it.
949
         also possible that hit a race condition where .log
950
         and .log.2 are linked. */
951
0
      *reason_r = "File is already open";
952
0
      return 0;
953
0
    } else {
954
0
      ret = mail_transaction_log_file_read_hdr(file,
955
0
                 ignore_estale);
956
0
    }
957
0
    if (ret > 0) {
958
      /* success */
959
0
      break;
960
0
    }
961
962
0
    if (ret == 0) {
963
      /* corrupted */
964
0
      if (index->readonly) {
965
        /* don't delete */
966
0
      } else {
967
0
        i_unlink_if_exists(file->filepath);
968
0
      }
969
0
      *reason_r = "File is corrupted";
970
0
      return 0;
971
0
    }
972
0
    if (errno != ESTALE ||
973
0
        i == MAIL_INDEX_ESTALE_RETRY_COUNT) {
974
      /* syscall error */
975
0
      *reason_r = t_strdup_printf("fstat() failed: %m");
976
0
      return -1;
977
0
    }
978
979
    /* ESTALE - try again */
980
0
    buffer_free(&file->buffer);
981
0
        }
982
983
0
  mail_transaction_log_file_add_to_list(file);
984
0
  return 1;
985
0
}
986
987
static int
988
log_file_track_mailbox_sync_offset_hdr(struct mail_transaction_log_file *file,
989
               const void *data, unsigned int trans_size,
990
               const char **error_r)
991
0
{
992
0
  const struct mail_transaction_header_update *u = data;
993
0
  const struct mail_index_header *ihdr;
994
0
  const unsigned int size = trans_size - sizeof(struct mail_transaction_header);
995
0
  const unsigned int offset_pos =
996
0
    offsetof(struct mail_index_header, log_file_tail_offset);
997
0
  const unsigned int offset_size = sizeof(ihdr->log_file_tail_offset);
998
0
  uint32_t tail_offset;
999
1000
0
  i_assert(offset_size == sizeof(tail_offset));
1001
1002
0
  if (size < sizeof(*u) || size < sizeof(*u) + u->size) {
1003
0
    *error_r = "header update extends beyond record size";
1004
0
    mail_transaction_log_file_set_corrupted(file, "%s", *error_r);
1005
0
    return -1;
1006
0
  }
1007
1008
0
  if (u->offset <= offset_pos &&
1009
0
      u->offset + u->size >= offset_pos + offset_size) {
1010
0
    memcpy(&tail_offset,
1011
0
           CONST_PTR_OFFSET(u + 1, offset_pos - u->offset),
1012
0
           sizeof(tail_offset));
1013
1014
0
    if (tail_offset < file->last_read_hdr_tail_offset) {
1015
      /* ignore shrinking tail offsets */
1016
0
      return 1;
1017
0
    } else if (tail_offset > file->sync_offset + trans_size) {
1018
0
      mail_transaction_log_file_set_corrupted(file,
1019
0
        "log_file_tail_offset %u goes past sync offset %"PRIuUOFF_T,
1020
0
        tail_offset, file->sync_offset + trans_size);
1021
0
    } else {
1022
0
      file->last_read_hdr_tail_offset = tail_offset;
1023
0
      if (tail_offset > file->max_tail_offset)
1024
0
        file->max_tail_offset = tail_offset;
1025
0
      return 1;
1026
0
    }
1027
0
  }
1028
0
  return 0;
1029
0
}
1030
1031
static bool
1032
flag_updates_have_non_internal(const struct mail_transaction_flag_update *u,
1033
             unsigned int count, unsigned int version)
1034
0
{
1035
  /* Hide internal flags from modseqs if the log file's version
1036
     is new enough. This allows upgrading without the modseqs suddenly
1037
     shrinking. */
1038
0
  if (!MAIL_TRANSACTION_LOG_VERSION_HAVE(version, HIDE_INTERNAL_MODSEQS))
1039
0
    return TRUE;
1040
1041
0
  for (unsigned int i = 0; i < count; i++) {
1042
0
    if (!MAIL_TRANSACTION_FLAG_UPDATE_IS_INTERNAL(&u[i]))
1043
0
      return TRUE;
1044
0
  }
1045
0
  return FALSE;
1046
0
}
1047
1048
void mail_transaction_update_modseq(const struct mail_transaction_header *hdr,
1049
            const void *data, uint64_t *cur_modseq,
1050
            unsigned int version)
1051
0
{
1052
0
  uint32_t trans_size;
1053
1054
0
  trans_size = mail_index_offset_to_uint32(hdr->size);
1055
0
  i_assert(trans_size != 0);
1056
1057
0
  if (*cur_modseq != 0) {
1058
    /* tracking modseqs */
1059
0
  } else if ((hdr->type & MAIL_TRANSACTION_TYPE_MASK) ==
1060
0
       MAIL_TRANSACTION_EXT_INTRO) {
1061
    /* modseqs not tracked yet. see if this is a modseq
1062
       extension introduction. */
1063
0
    const struct mail_transaction_ext_intro *intro = data;
1064
0
    const unsigned int modseq_ext_len =
1065
0
      strlen(MAIL_INDEX_MODSEQ_EXT_NAME);
1066
1067
0
    if (intro->name_size == modseq_ext_len &&
1068
0
        memcmp(intro + 1, MAIL_INDEX_MODSEQ_EXT_NAME,
1069
0
         modseq_ext_len) == 0) {
1070
      /* modseq tracking started */
1071
0
      *cur_modseq += 1;
1072
0
    }
1073
0
    return;
1074
0
  } else {
1075
    /* not tracking modseqs */
1076
0
    return;
1077
0
  }
1078
1079
0
  switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
1080
0
  case MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_EXPUNGE_PROT:
1081
0
  case MAIL_TRANSACTION_EXPUNGE_GUID | MAIL_TRANSACTION_EXPUNGE_PROT:
1082
0
    if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) {
1083
      /* ignore expunge requests */
1084
0
      break;
1085
0
    }
1086
    /* fall through */
1087
0
  case MAIL_TRANSACTION_APPEND:
1088
0
  case MAIL_TRANSACTION_KEYWORD_UPDATE:
1089
0
  case MAIL_TRANSACTION_KEYWORD_RESET:
1090
0
  case MAIL_TRANSACTION_ATTRIBUTE_UPDATE:
1091
    /* these changes increase modseq */
1092
0
    *cur_modseq += 1;
1093
0
    break;
1094
0
  case MAIL_TRANSACTION_FLAG_UPDATE: {
1095
0
    const struct mail_transaction_flag_update *rec = data;
1096
0
    unsigned int count;
1097
1098
0
    count = (trans_size - sizeof(*hdr)) / sizeof(*rec);
1099
0
    if (flag_updates_have_non_internal(rec, count, version))
1100
0
      *cur_modseq += 1;
1101
0
    break;
1102
0
  }
1103
0
  case MAIL_TRANSACTION_MODSEQ_UPDATE: {
1104
0
    const struct mail_transaction_modseq_update *rec, *end;
1105
1106
0
    end = CONST_PTR_OFFSET(data, trans_size - sizeof(*hdr));
1107
0
    for (rec = data; rec < end; rec++) {
1108
0
      uint64_t modseq = ((uint64_t)rec->modseq_high32 << 32) |
1109
0
        rec->modseq_low32;
1110
0
      if (*cur_modseq < modseq)
1111
0
        *cur_modseq = modseq;
1112
0
    }
1113
0
  }
1114
0
  }
1115
0
}
1116
1117
static int
1118
log_file_track_sync(struct mail_transaction_log_file *file,
1119
        const struct mail_transaction_header *hdr,
1120
        unsigned int trans_size, const char **error_r)
1121
0
{
1122
0
  const void *data = hdr + 1;
1123
0
  int ret;
1124
1125
0
  mail_transaction_update_modseq(hdr, hdr + 1, &file->sync_highest_modseq,
1126
0
    MAIL_TRANSACTION_LOG_HDR_VERSION(&file->hdr));
1127
0
  if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0)
1128
0
    return 1;
1129
1130
  /* external transactions: */
1131
0
  switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
1132
0
  case MAIL_TRANSACTION_HEADER_UPDATE:
1133
    /* see if this updates mailbox_sync_offset */
1134
0
    ret = log_file_track_mailbox_sync_offset_hdr(file, data,
1135
0
                   trans_size, error_r);
1136
0
    if (ret != 0)
1137
0
      return ret < 0 ? -1 : 1;
1138
0
    break;
1139
0
  case MAIL_TRANSACTION_INDEX_DELETED:
1140
0
    if (file->sync_offset < file->index_undeleted_offset ||
1141
0
        file->hdr.file_seq < file->log->index->index_delete_changed_file_seq)
1142
0
      break;
1143
0
    file->log->index->index_deleted = TRUE;
1144
0
    file->log->index->index_delete_requested = FALSE;
1145
0
    file->log->index->index_delete_changed_file_seq = file->hdr.file_seq;
1146
0
    file->index_deleted_offset = file->sync_offset + trans_size;
1147
0
    break;
1148
0
  case MAIL_TRANSACTION_INDEX_UNDELETED:
1149
0
    if (file->sync_offset < file->index_deleted_offset ||
1150
0
        file->hdr.file_seq < file->log->index->index_delete_changed_file_seq)
1151
0
      break;
1152
0
    file->log->index->index_deleted = FALSE;
1153
0
    file->log->index->index_delete_requested = FALSE;
1154
0
    file->log->index->index_delete_changed_file_seq = file->hdr.file_seq;
1155
0
    file->index_undeleted_offset = file->sync_offset + trans_size;
1156
0
    break;
1157
0
  case MAIL_TRANSACTION_BOUNDARY: {
1158
0
    const struct mail_transaction_boundary *boundary =
1159
0
      (const void *)(hdr + 1);
1160
0
    size_t wanted_buffer_size;
1161
1162
0
    wanted_buffer_size = file->sync_offset - file->buffer_offset +
1163
0
      boundary->size;
1164
0
    if (wanted_buffer_size > file->buffer->used) {
1165
      /* the full transaction hasn't been written yet */
1166
0
      return 0;
1167
0
    }
1168
0
    break;
1169
0
  }
1170
0
  }
1171
1172
0
  if (file->max_tail_offset == file->sync_offset) {
1173
    /* external transactions aren't synced to mailbox. we can
1174
       update mailbox sync offset to skip this transaction to
1175
       avoid re-reading it at the next sync. */
1176
0
    file->max_tail_offset += trans_size;
1177
0
  }
1178
0
  return 1;
1179
0
}
1180
1181
static int
1182
mail_transaction_log_file_sync(struct mail_transaction_log_file *file,
1183
             bool *retry_r, const char **reason_r)
1184
0
{
1185
0
        const struct mail_transaction_header *hdr;
1186
0
  const void *data;
1187
0
  struct stat st;
1188
0
  size_t size, avail;
1189
0
  uint32_t trans_size = 0;
1190
0
  int ret;
1191
1192
0
  i_assert(file->sync_offset >= file->buffer_offset);
1193
1194
0
  *retry_r = FALSE;
1195
1196
0
  data = buffer_get_data(file->buffer, &size);
1197
0
  if (file->buffer_offset + size < file->sync_offset) {
1198
0
    *reason_r = t_strdup_printf(
1199
0
      "log file shrank (%"PRIuUOFF_T" < %"PRIuUOFF_T")",
1200
0
      file->buffer_offset + (uoff_t)size, file->sync_offset);
1201
0
    mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1202
    /* fix the sync_offset to avoid crashes later on */
1203
0
    file->sync_offset = file->buffer_offset + size;
1204
0
    return 0;
1205
0
  }
1206
0
  while (file->sync_offset - file->buffer_offset + sizeof(*hdr) <= size) {
1207
0
    hdr = CONST_PTR_OFFSET(data, file->sync_offset -
1208
0
               file->buffer_offset);
1209
0
    trans_size = mail_index_offset_to_uint32(hdr->size);
1210
0
    if (trans_size == 0) {
1211
      /* unfinished or corrupted */
1212
0
      break;
1213
0
    }
1214
0
    if (trans_size < sizeof(*hdr)) {
1215
0
      *reason_r = t_strdup_printf(
1216
0
        "hdr.size too small (%u)", trans_size);
1217
0
      mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1218
0
      return 0;
1219
0
    }
1220
1221
0
    if (file->sync_offset - file->buffer_offset + trans_size > size)
1222
0
      break;
1223
1224
    /* transaction has been fully written */
1225
0
    if ((ret = log_file_track_sync(file, hdr, trans_size, reason_r)) <= 0) {
1226
0
      if (ret < 0)
1227
0
        return 0;
1228
0
      break;
1229
0
    }
1230
1231
0
    file->sync_offset += trans_size;
1232
0
  }
1233
1234
0
  if (file->mmap_base != NULL && !file->locked) {
1235
    /* Now that all the mmaped pages have page faulted, check if
1236
       the file had changed while doing that. Only after the last
1237
       page has faulted, the size returned by fstat() can be
1238
       trusted. Otherwise it might point to a page boundary while
1239
       the next page is still being written.
1240
1241
       Without this check we might see partial transactions,
1242
       sometimes causing "Extension record updated without intro
1243
       prefix" errors. */
1244
0
    if (fstat(file->fd, &st) < 0) {
1245
0
      log_file_set_syscall_error(file, "fstat()");
1246
0
      *reason_r = t_strdup_printf("fstat() failed: %m");
1247
0
      return -1;
1248
0
    }
1249
0
    if ((uoff_t)st.st_size != file->last_size) {
1250
0
      file->last_size = st.st_size;
1251
0
      *retry_r = TRUE;
1252
0
      *reason_r = "File size changed - retrying";
1253
0
      return 0;
1254
0
    }
1255
0
  }
1256
1257
0
  avail = file->sync_offset - file->buffer_offset;
1258
0
  if (avail != size) {
1259
    /* There's more data than we could sync at the moment. If the
1260
       last record's size wasn't valid, we can't know if it will
1261
       be updated unless we've locked the log. */
1262
0
    if (file->locked) {
1263
0
      *reason_r = "Unexpected garbage at EOF";
1264
0
      mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1265
0
      return 0;
1266
0
    }
1267
    /* The size field will be updated soon */
1268
0
    mail_index_flush_read_cache(file->log->index, file->filepath,
1269
0
              file->fd, file->locked);
1270
0
  }
1271
1272
0
  if (file->next != NULL &&
1273
0
      file->hdr.file_seq == file->next->hdr.prev_file_seq &&
1274
0
      file->next->hdr.prev_file_offset != file->sync_offset) {
1275
0
    *reason_r = t_strdup_printf(
1276
0
      "Invalid transaction log size "
1277
0
      "(%"PRIuUOFF_T" vs %u): %s", file->sync_offset,
1278
0
      file->log->head->hdr.prev_file_offset, file->filepath);
1279
0
    mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1280
0
    return 0;
1281
0
  }
1282
1283
0
  return 1;
1284
0
}
1285
1286
static int
1287
mail_transaction_log_file_insert_read(struct mail_transaction_log_file *file,
1288
              uoff_t offset, const char **reason_r)
1289
0
{
1290
0
  void *data;
1291
0
  size_t size;
1292
0
  ssize_t ret;
1293
1294
0
  size = file->buffer_offset - offset;
1295
0
  buffer_copy(file->buffer, size, file->buffer, 0, SIZE_MAX);
1296
1297
0
  data = buffer_get_space_unsafe(file->buffer, 0, size);
1298
0
  ret = pread_full(file->fd, data, size, offset);
1299
0
  if (ret > 0) {
1300
    /* success */
1301
0
    file->buffer_offset -= size;
1302
0
    return 1;
1303
0
  }
1304
1305
  /* failure. don't leave ourself to inconsistent state */
1306
0
  buffer_copy(file->buffer, 0, file->buffer, size, SIZE_MAX);
1307
0
  buffer_set_used_size(file->buffer, file->buffer->used - size);
1308
1309
0
  if (ret == 0) {
1310
0
    *reason_r = "file shrank unexpectedly";
1311
0
    mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1312
0
    return 0;
1313
0
  } else if (errno == ESTALE) {
1314
    /* log file was deleted in NFS server, fail silently */
1315
0
    *reason_r = t_strdup_printf("read() failed: %m");
1316
0
    return 0;
1317
0
  } else {
1318
0
    log_file_set_syscall_error(file, "pread()");
1319
0
    *reason_r = t_strdup_printf("read() failed: %m");
1320
0
    return -1;
1321
0
  }
1322
0
}
1323
1324
static int
1325
mail_transaction_log_file_read_more(struct mail_transaction_log_file *file,
1326
            const char **reason_r)
1327
0
{
1328
0
  void *data;
1329
0
  size_t size;
1330
0
  uint32_t read_offset;
1331
0
  ssize_t ret;
1332
1333
0
  read_offset = file->buffer_offset + file->buffer->used;
1334
1335
0
  do {
1336
0
    data = buffer_append_space_unsafe(file->buffer, LOG_PREFETCH);
1337
0
    ret = pread(file->fd, data, LOG_PREFETCH, read_offset);
1338
0
    if (ret > 0)
1339
0
      read_offset += ret;
1340
1341
0
    size = read_offset - file->buffer_offset;
1342
0
    buffer_set_used_size(file->buffer, size);
1343
0
  } while (ret > 0 || (ret < 0 && errno == EINTR));
1344
1345
0
  file->last_size = read_offset;
1346
1347
0
  if (ret < 0) {
1348
0
    *reason_r = t_strdup_printf("pread() failed: %m");
1349
0
    if (errno == ESTALE) {
1350
      /* log file was deleted in NFS server, fail silently */
1351
0
      return 0;
1352
0
    }
1353
0
    log_file_set_syscall_error(file, "pread()");
1354
0
    return -1;
1355
0
  }
1356
0
  return 1;
1357
0
}
1358
1359
static bool
1360
mail_transaction_log_file_need_nfs_flush(struct mail_transaction_log_file *file)
1361
0
{
1362
0
  const struct mail_index_header *hdr = &file->log->index->map->hdr;
1363
0
  uoff_t max_offset = file->last_size;
1364
1365
0
  if (file->next != NULL &&
1366
0
      file->hdr.file_seq == file->next->hdr.prev_file_seq &&
1367
0
      file->next->hdr.prev_file_offset != max_offset) {
1368
    /* we already have a newer log file which says that we haven't
1369
       synced the entire file. */
1370
0
    return TRUE;
1371
0
  }
1372
1373
0
  if (file->hdr.file_seq == hdr->log_file_seq &&
1374
0
      max_offset < hdr->log_file_head_offset)
1375
0
    return TRUE;
1376
1377
0
  return FALSE;
1378
0
}
1379
1380
static int
1381
mail_transaction_log_file_read(struct mail_transaction_log_file *file,
1382
             uoff_t start_offset, bool nfs_flush,
1383
             const char **reason_r)
1384
0
{
1385
0
  bool retry;
1386
0
  int ret;
1387
1388
0
  i_assert(file->mmap_base == NULL);
1389
1390
  /* NFS: if file isn't locked, we're optimistic that we can read enough
1391
     data without flushing attribute cache. if after reading we notice
1392
     that we really should have read more, flush the cache and try again.
1393
     if file is locked, the attribute cache was already flushed when
1394
     refreshing the log. */
1395
0
  if (nfs_flush &&
1396
0
      (file->log->index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0) {
1397
0
    if (!file->locked)
1398
0
      nfs_flush_attr_cache_unlocked(file->filepath);
1399
0
    else
1400
0
      nfs_flush_attr_cache_fd_locked(file->filepath, file->fd);
1401
0
  }
1402
1403
0
  if (file->buffer != NULL && file->buffer_offset > start_offset) {
1404
    /* we have to insert missing data to beginning of buffer */
1405
0
    ret = mail_transaction_log_file_insert_read(file, start_offset, reason_r);
1406
0
    if (ret <= 0)
1407
0
      return ret;
1408
0
  }
1409
1410
0
  if (file->buffer == NULL) {
1411
0
    file->buffer =
1412
0
      buffer_create_dynamic(default_pool, LOG_PREFETCH);
1413
0
    file->buffer_offset = start_offset;
1414
0
  }
1415
1416
0
  if ((ret = mail_transaction_log_file_read_more(file, reason_r)) <= 0)
1417
0
    ;
1418
0
  else if (!nfs_flush &&
1419
0
     (file->log->index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0 &&
1420
0
     mail_transaction_log_file_need_nfs_flush(file)) {
1421
    /* we didn't read enough data. flush and try again. */
1422
0
    return mail_transaction_log_file_read(file, start_offset, TRUE, reason_r);
1423
0
  } else if ((ret = mail_transaction_log_file_sync(file, &retry, reason_r)) == 0) {
1424
0
    i_assert(!retry); /* retry happens only with mmap */
1425
0
  }
1426
0
  i_assert(file->sync_offset >= file->buffer_offset);
1427
0
  buffer_set_used_size(file->buffer,
1428
0
           file->sync_offset - file->buffer_offset);
1429
0
  return ret;
1430
0
}
1431
1432
static bool
1433
log_file_map_check_offsets(struct mail_transaction_log_file *file,
1434
         uoff_t start_offset, uoff_t end_offset,
1435
         const char **reason_r)
1436
0
{
1437
0
  struct stat st, st2;
1438
1439
0
  if (start_offset > file->sync_offset) {
1440
    /* broken start offset */
1441
0
    if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
1442
0
      *reason_r = t_strdup_printf(
1443
0
        "%s: start_offset (%"PRIuUOFF_T") > "
1444
0
        "current sync_offset (%"PRIuUOFF_T")",
1445
0
        file->filepath, start_offset, file->sync_offset);
1446
0
      return FALSE;
1447
0
    }
1448
1449
0
    if (fstat(file->fd, &st) < 0) {
1450
0
      log_file_set_syscall_error(file, "fstat()");
1451
0
      st.st_size = -1;
1452
0
    }
1453
0
    *reason_r = t_strdup_printf(
1454
0
      "%s: start_offset (%"PRIuUOFF_T") > "
1455
0
      "current sync_offset (%"PRIuUOFF_T"), file size=%"PRIuUOFF_T,
1456
0
      file->filepath, start_offset, file->sync_offset,
1457
0
      st.st_size);
1458
0
    if (stat(file->filepath, &st2) == 0) {
1459
0
      if (st.st_ino != st2.st_ino) {
1460
0
        *reason_r = t_strdup_printf(
1461
0
          "%s, file unexpectedly replaced", *reason_r);
1462
0
      }
1463
0
    } else if (errno == ENOENT) {
1464
0
      *reason_r = t_strdup_printf(
1465
0
        "%s, file unexpectedly deleted", *reason_r);
1466
0
    } else {
1467
0
      log_file_set_syscall_error(file, "stat()");
1468
0
    }
1469
0
    return FALSE;
1470
0
  }
1471
0
  if (end_offset != UOFF_T_MAX && end_offset > file->sync_offset) {
1472
0
    *reason_r = t_strdup_printf(
1473
0
      "%s: end_offset (%"PRIuUOFF_T") > "
1474
0
      "current sync_offset (%"PRIuUOFF_T")",
1475
0
      file->filepath, start_offset, file->sync_offset);
1476
0
    return FALSE;
1477
0
  }
1478
1479
0
  return TRUE;
1480
0
}
1481
1482
static int
1483
mail_transaction_log_file_mmap(struct mail_transaction_log_file *file,
1484
             const char **reason_r)
1485
0
{
1486
  /* we may have switched to mmaping */
1487
0
  buffer_free(&file->buffer);
1488
1489
0
  file->mmap_size = file->last_size;
1490
0
  file->mmap_base = mmap(NULL, file->mmap_size, PROT_READ, MAP_SHARED,
1491
0
             file->fd, 0);
1492
0
  if (file->mmap_base == MAP_FAILED) {
1493
0
    file->mmap_base = NULL;
1494
0
    if (ioloop_time != file->last_mmap_error_time) {
1495
0
      file->last_mmap_error_time = ioloop_time;
1496
0
      log_file_set_syscall_error(file, t_strdup_printf(
1497
0
        "mmap(size=%zu)", file->mmap_size));
1498
0
    }
1499
0
    *reason_r = t_strdup_printf("mmap(size=%zu) failed: %m",
1500
0
              file->mmap_size);
1501
0
    file->mmap_size = 0;
1502
0
    return -1;
1503
0
  }
1504
1505
0
  if (file->mmap_size > mmap_get_page_size()) {
1506
0
    errno = posix_madvise(file->mmap_base, file->mmap_size,
1507
0
          POSIX_MADV_SEQUENTIAL);
1508
0
    if (errno != 0)
1509
0
      log_file_set_syscall_error(file, "posix_madvise()");
1510
0
  }
1511
1512
0
  buffer_create_from_const_data(&file->mmap_buffer,
1513
0
              file->mmap_base, file->mmap_size);
1514
0
  file->buffer = &file->mmap_buffer;
1515
0
  file->buffer_offset = 0;
1516
0
  return 0;
1517
0
}
1518
1519
static void
1520
mail_transaction_log_file_munmap(struct mail_transaction_log_file *file)
1521
0
{
1522
0
  if (file->mmap_base == NULL)
1523
0
    return;
1524
1525
0
  i_assert(file->buffer != NULL);
1526
0
  if (munmap(file->mmap_base, file->mmap_size) < 0)
1527
0
    log_file_set_syscall_error(file, "munmap()");
1528
0
  file->mmap_base = NULL;
1529
0
  file->mmap_size = 0;
1530
0
  buffer_free(&file->buffer);
1531
0
}
1532
1533
static int
1534
mail_transaction_log_file_map_mmap(struct mail_transaction_log_file *file,
1535
           uoff_t start_offset, const char **reason_r)
1536
0
{
1537
0
  struct stat st;
1538
0
  bool retry;
1539
0
  int ret;
1540
1541
  /* we are going to mmap() this file, but it's not necessarily
1542
     mmaped currently. */
1543
0
  i_assert(file->buffer_offset == 0 || file->mmap_base == NULL);
1544
0
  i_assert(file->mmap_size == 0 || file->mmap_base != NULL);
1545
1546
0
  if (fstat(file->fd, &st) < 0) {
1547
0
    log_file_set_syscall_error(file, "fstat()");
1548
0
    *reason_r = t_strdup_printf("fstat() failed: %m");
1549
0
    return -1;
1550
0
  }
1551
0
  file->last_size = st.st_size;
1552
1553
0
  if ((uoff_t)st.st_size < file->sync_offset) {
1554
0
    *reason_r = t_strdup_printf(
1555
0
      "file size shrank (%"PRIuUOFF_T" < %"PRIuUOFF_T")",
1556
0
      (uoff_t)st.st_size, file->sync_offset);
1557
0
    mail_transaction_log_file_set_corrupted(file, "%s", *reason_r);
1558
0
    return 0;
1559
0
  }
1560
1561
0
  if (file->buffer != NULL && file->buffer_offset <= start_offset &&
1562
0
      (uoff_t)st.st_size == file->buffer_offset + file->buffer->used) {
1563
    /* we already have the whole file mapped */
1564
0
    if ((ret = mail_transaction_log_file_sync(file, &retry, reason_r)) != 0 ||
1565
0
        !retry)
1566
0
      return ret;
1567
    /* size changed, re-mmap */
1568
0
  }
1569
1570
0
  do {
1571
0
    mail_transaction_log_file_munmap(file);
1572
1573
0
    if (file->last_size - start_offset < mmap_get_page_size()) {
1574
      /* just reading the file is probably faster */
1575
0
      return mail_transaction_log_file_read(file,
1576
0
                    start_offset,
1577
0
                    FALSE, reason_r);
1578
0
    }
1579
1580
0
    if (mail_transaction_log_file_mmap(file, reason_r) < 0)
1581
0
      return -1;
1582
0
    ret = mail_transaction_log_file_sync(file, &retry, reason_r);
1583
0
  } while (retry);
1584
1585
0
  return ret;
1586
0
}
1587
1588
int mail_transaction_log_file_map(struct mail_transaction_log_file *file,
1589
          uoff_t start_offset, uoff_t end_offset,
1590
          const char **reason_r)
1591
0
{
1592
0
  uoff_t map_start_offset = start_offset;
1593
0
  size_t size;
1594
0
  int ret;
1595
1596
0
  if (file->hdr.indexid == 0) {
1597
    /* corrupted */
1598
0
    *reason_r = "corrupted, indexid=0";
1599
0
    return 0;
1600
0
  }
1601
1602
0
  i_assert(start_offset >= file->hdr.hdr_size);
1603
0
  i_assert(start_offset <= end_offset);
1604
0
  i_assert(file->buffer == NULL || file->mmap_base != NULL ||
1605
0
     file->sync_offset >= file->buffer_offset + file->buffer->used);
1606
1607
0
  if (file->locked_sync_offset_updated && file == file->log->head &&
1608
0
      end_offset == UOFF_T_MAX) {
1609
    /* we're not interested of going further than sync_offset */
1610
0
    if (!log_file_map_check_offsets(file, start_offset,
1611
0
            end_offset, reason_r))
1612
0
      return 0;
1613
0
    i_assert(start_offset <= file->sync_offset);
1614
0
    end_offset = file->sync_offset;
1615
0
  }
1616
1617
0
  if (file->buffer != NULL && file->buffer_offset <= start_offset) {
1618
    /* see if we already have it */
1619
0
    size = file->buffer->used;
1620
0
    if (file->buffer_offset + size >= end_offset)
1621
0
      return 1;
1622
0
  }
1623
1624
0
  if (file->locked) {
1625
    /* set this only when we've synced to end of file while locked
1626
       (either end_offset=UOFF_T_MAX or we had to read anyway) */
1627
0
    file->locked_sync_offset_updated = TRUE;
1628
0
  }
1629
1630
0
  if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
1631
0
    if (start_offset < file->buffer_offset || file->buffer == NULL) {
1632
      /* we had moved the log to memory but failed to read
1633
         the beginning of the log file */
1634
0
      *reason_r = "Beginning of the log isn't available";
1635
0
      return 0;
1636
0
    }
1637
0
    return log_file_map_check_offsets(file, start_offset,
1638
0
              end_offset, reason_r) ? 1 : 0;
1639
0
  }
1640
1641
0
  if (start_offset > file->sync_offset)
1642
0
    mail_transaction_log_file_skip_to_head(file);
1643
0
  if (start_offset > file->sync_offset) {
1644
    /* although we could just skip over the unwanted data, we have
1645
       to sync everything so that modseqs are calculated
1646
       correctly */
1647
0
    map_start_offset = file->sync_offset;
1648
0
  }
1649
1650
0
  if ((file->log->index->flags & MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE) == 0)
1651
0
    ret = mail_transaction_log_file_map_mmap(file, map_start_offset, reason_r);
1652
0
  else {
1653
0
    mail_transaction_log_file_munmap(file);
1654
0
    ret = mail_transaction_log_file_read(file, map_start_offset, FALSE, reason_r);
1655
0
  }
1656
1657
0
  i_assert(file->buffer == NULL || file->mmap_base != NULL ||
1658
0
     file->sync_offset >= file->buffer_offset + file->buffer->used);
1659
0
  if (ret <= 0)
1660
0
    return ret;
1661
1662
0
  i_assert(file->buffer != NULL);
1663
0
  return log_file_map_check_offsets(file, start_offset, end_offset,
1664
0
            reason_r) ? 1 : 0;
1665
0
}
1666
1667
int mail_transaction_log_file_move_to_memory(struct mail_transaction_log_file *file)
1668
0
{
1669
0
  const char *error;
1670
0
  buffer_t *buf;
1671
0
  int ret = 0;
1672
1673
0
  if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file))
1674
0
    return 0;
1675
1676
0
  if (file->mmap_base != NULL) {
1677
    /* just copy to memory */
1678
0
    i_assert(file->buffer_offset == 0);
1679
1680
0
    buf = buffer_create_dynamic(default_pool, file->mmap_size);
1681
0
    buffer_append(buf, file->mmap_base, file->mmap_size);
1682
0
    buffer_free(&file->buffer);
1683
0
    file->buffer = buf;
1684
1685
    /* and lose the mmap */
1686
0
    if (munmap(file->mmap_base, file->mmap_size) < 0)
1687
0
      log_file_set_syscall_error(file, "munmap()");
1688
0
    file->mmap_base = NULL;
1689
0
  } else if (file->buffer_offset != 0) {
1690
    /* we don't have the full log in the memory. read it. */
1691
0
    ret = mail_transaction_log_file_read(file, 0, FALSE, &error);
1692
0
    if (ret <= 0) {
1693
0
      mail_index_set_error(file->log->index,
1694
0
        "%s: Failed to read into memory: %s", file->filepath, error);
1695
0
    }
1696
0
  }
1697
0
  file->last_size = 0;
1698
1699
0
  if (close(file->fd) < 0)
1700
0
    log_file_set_syscall_error(file, "close()");
1701
0
  file->fd = -1;
1702
1703
0
  i_free(file->filepath);
1704
0
  file->filepath = i_strdup(file->log->filepath);
1705
0
  return ret < 0 ? -1 : 0;
1706
0
}