/src/dovecot/src/lib-index/mail-transaction-log-file.c
Line | Count | Source |
1 | | /* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "array.h" |
5 | | #include "ioloop.h" |
6 | | #include "file-dotlock.h" |
7 | | #include "nfs-workarounds.h" |
8 | | #include "read-full.h" |
9 | | #include "write-full.h" |
10 | | #include "mmap-util.h" |
11 | | #include "mail-index-private.h" |
12 | | #include "mail-index-modseq.h" |
13 | | #include "mail-transaction-log-private.h" |
14 | | |
15 | 0 | #define LOG_PREFETCH IO_BLOCK_SIZE |
16 | 0 | #define MEMORY_LOG_NAME "(in-memory transaction log file)" |
17 | 0 | #define LOG_NEW_DOTLOCK_SUFFIX ".newlock" |
18 | | |
19 | | static int |
20 | | mail_transaction_log_file_sync(struct mail_transaction_log_file *file, |
21 | | bool *retry_r, const char **reason_r); |
22 | | |
23 | | static void |
24 | | log_file_set_syscall_error(struct mail_transaction_log_file *file, |
25 | | const char *function) |
26 | 0 | { |
27 | 0 | mail_index_file_set_syscall_error(file->log->index, |
28 | 0 | file->filepath, function); |
29 | 0 | } |
30 | | |
31 | | static void |
32 | | mail_transaction_log_mark_corrupted(struct mail_transaction_log_file *file) |
33 | 0 | { |
34 | 0 | unsigned int offset = |
35 | 0 | offsetof(struct mail_transaction_log_header, indexid); |
36 | 0 | int flags; |
37 | |
|
38 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file) || |
39 | 0 | file->log->index->readonly) |
40 | 0 | return; |
41 | | |
42 | | /* indexid=0 marks the log file as corrupted. we opened the file with |
43 | | O_APPEND, and now we need to drop it for pwrite() to work (at least |
44 | | in Linux) */ |
45 | 0 | flags = fcntl(file->fd, F_GETFL, 0); |
46 | 0 | if (flags < 0) { |
47 | 0 | mail_index_file_set_syscall_error(file->log->index, |
48 | 0 | file->filepath, "fcntl(F_GETFL)"); |
49 | 0 | return; |
50 | 0 | } |
51 | 0 | if (fcntl(file->fd, F_SETFL, flags & ~O_APPEND) < 0) { |
52 | 0 | mail_index_file_set_syscall_error(file->log->index, |
53 | 0 | file->filepath, "fcntl(F_SETFL)"); |
54 | 0 | return; |
55 | 0 | } |
56 | 0 | if (pwrite_full(file->fd, &file->hdr.indexid, |
57 | 0 | sizeof(file->hdr.indexid), offset) < 0) { |
58 | 0 | mail_index_file_set_syscall_error(file->log->index, |
59 | 0 | file->filepath, "pwrite()"); |
60 | 0 | } |
61 | 0 | } |
62 | | |
63 | | void |
64 | | mail_transaction_log_file_set_corrupted(struct mail_transaction_log_file *file, |
65 | | const char *fmt, ...) |
66 | 0 | { |
67 | 0 | va_list va; |
68 | |
|
69 | 0 | file->corrupted = TRUE; |
70 | 0 | file->hdr.indexid = 0; |
71 | 0 | mail_transaction_log_mark_corrupted(file); |
72 | |
|
73 | 0 | va_start(va, fmt); |
74 | 0 | T_BEGIN { |
75 | 0 | mail_index_set_error(file->log->index, |
76 | 0 | "Corrupted transaction log file %s seq %u: %s " |
77 | 0 | "(sync_offset=%"PRIuUOFF_T")", |
78 | 0 | file->filepath, file->hdr.file_seq, |
79 | 0 | t_strdup_vprintf(fmt, va), file->sync_offset); |
80 | 0 | } T_END; |
81 | 0 | va_end(va); |
82 | 0 | } |
83 | | |
84 | | struct mail_transaction_log_file * |
85 | | mail_transaction_log_file_alloc(struct mail_transaction_log *log, |
86 | | const char *path) |
87 | 0 | { |
88 | 0 | struct mail_transaction_log_file *file; |
89 | |
|
90 | 0 | file = i_new(struct mail_transaction_log_file, 1); |
91 | 0 | file->log = log; |
92 | 0 | file->filepath = i_strdup(path); |
93 | 0 | file->fd = -1; |
94 | 0 | return file; |
95 | 0 | } |
96 | | |
97 | | void mail_transaction_log_file_free(struct mail_transaction_log_file **_file) |
98 | 0 | { |
99 | 0 | struct mail_transaction_log_file *file = *_file; |
100 | 0 | struct mail_transaction_log_file **p; |
101 | 0 | int old_errno = errno; |
102 | |
|
103 | 0 | *_file = NULL; |
104 | |
|
105 | 0 | i_assert(!file->locked); |
106 | 0 | i_assert(file->refcount == 0); |
107 | | |
108 | 0 | for (p = &file->log->files; *p != NULL; p = &(*p)->next) { |
109 | 0 | if (*p == file) { |
110 | 0 | *p = file->next; |
111 | 0 | break; |
112 | 0 | } |
113 | 0 | } |
114 | |
|
115 | 0 | if (file == file->log->head) |
116 | 0 | file->log->head = NULL; |
117 | |
|
118 | 0 | buffer_free(&file->buffer); |
119 | |
|
120 | 0 | if (file->mmap_base != NULL) { |
121 | 0 | if (munmap(file->mmap_base, file->mmap_size) < 0) |
122 | 0 | log_file_set_syscall_error(file, "munmap()"); |
123 | 0 | } |
124 | |
|
125 | 0 | if (file->fd != -1) { |
126 | 0 | if (close(file->fd) < 0) |
127 | 0 | log_file_set_syscall_error(file, "close()"); |
128 | 0 | } |
129 | |
|
130 | 0 | i_free(file->filepath); |
131 | 0 | i_free(file->need_rotate); |
132 | 0 | i_free(file); |
133 | |
|
134 | 0 | errno = old_errno; |
135 | 0 | } |
136 | | |
137 | | static void |
138 | | mail_transaction_log_file_skip_to_head(struct mail_transaction_log_file *file) |
139 | 0 | { |
140 | 0 | struct mail_transaction_log *log = file->log; |
141 | 0 | struct mail_index_map *map = log->index->map; |
142 | 0 | const struct mail_index_modseq_header *modseq_hdr; |
143 | 0 | uoff_t head_offset; |
144 | |
|
145 | 0 | if (map == NULL || file->hdr.file_seq != map->hdr.log_file_seq || |
146 | 0 | map->hdr.log_file_head_offset == 0) |
147 | 0 | return; |
148 | | |
149 | | /* we can get a valid log offset from index file. initialize |
150 | | sync_offset from it so we don't have to read the whole log |
151 | | file from beginning. */ |
152 | 0 | head_offset = map->hdr.log_file_head_offset; |
153 | |
|
154 | 0 | modseq_hdr = mail_index_map_get_modseq_header(map); |
155 | 0 | if (head_offset < file->hdr.hdr_size) { |
156 | 0 | mail_index_set_error(log->index, |
157 | 0 | "%s: log_file_head_offset too small", |
158 | 0 | log->index->filepath); |
159 | 0 | file->sync_offset = file->hdr.hdr_size; |
160 | 0 | file->sync_highest_modseq = file->hdr.initial_modseq; |
161 | 0 | } else if (modseq_hdr == NULL && file->hdr.initial_modseq == 0) { |
162 | | /* modseqs not used yet */ |
163 | 0 | file->sync_offset = head_offset; |
164 | 0 | file->sync_highest_modseq = 0; |
165 | 0 | } else if (modseq_hdr == NULL || |
166 | 0 | modseq_hdr->log_seq != file->hdr.file_seq) { |
167 | | /* highest_modseq not synced, start from beginning */ |
168 | 0 | file->sync_offset = file->hdr.hdr_size; |
169 | 0 | file->sync_highest_modseq = file->hdr.initial_modseq; |
170 | 0 | } else if (modseq_hdr->log_offset > head_offset) { |
171 | 0 | mail_index_set_error(log->index, |
172 | 0 | "%s: modseq_hdr.log_offset too large", |
173 | 0 | log->index->filepath); |
174 | 0 | file->sync_offset = file->hdr.hdr_size; |
175 | 0 | file->sync_highest_modseq = file->hdr.initial_modseq; |
176 | 0 | } else { |
177 | | /* start from where we last stopped tracking modseqs */ |
178 | 0 | file->sync_offset = modseq_hdr->log_offset; |
179 | 0 | file->sync_highest_modseq = modseq_hdr->highest_modseq; |
180 | 0 | } |
181 | 0 | if (file->hdr.file_seq == log->index->map->hdr.log_file_seq) { |
182 | 0 | file->last_read_hdr_tail_offset = |
183 | 0 | log->index->map->hdr.log_file_tail_offset; |
184 | 0 | } |
185 | 0 | if (file->last_read_hdr_tail_offset > file->max_tail_offset) |
186 | 0 | file->max_tail_offset = file->last_read_hdr_tail_offset; |
187 | 0 | } |
188 | | |
189 | | static void |
190 | | mail_transaction_log_file_add_to_list(struct mail_transaction_log_file *file) |
191 | 0 | { |
192 | 0 | struct mail_transaction_log_file **p; |
193 | 0 | const char *reason; |
194 | 0 | bool retry; |
195 | |
|
196 | 0 | file->sync_offset = file->hdr.hdr_size; |
197 | 0 | file->sync_highest_modseq = file->hdr.initial_modseq; |
198 | 0 | mail_transaction_log_file_skip_to_head(file); |
199 | | |
200 | | /* insert it to correct position */ |
201 | 0 | for (p = &file->log->files; *p != NULL; p = &(*p)->next) { |
202 | 0 | if ((*p)->hdr.file_seq > file->hdr.file_seq) |
203 | 0 | break; |
204 | 0 | i_assert((*p)->hdr.file_seq < file->hdr.file_seq); |
205 | 0 | } |
206 | | |
207 | 0 | file->next = *p; |
208 | 0 | *p = file; |
209 | |
|
210 | 0 | if (file->buffer != NULL) { |
211 | | /* if we read any unfinished data, make sure the buffer gets |
212 | | truncated. */ |
213 | 0 | (void)mail_transaction_log_file_sync(file, &retry, &reason); |
214 | 0 | buffer_set_used_size(file->buffer, |
215 | 0 | file->sync_offset - file->buffer_offset); |
216 | 0 | } |
217 | 0 | } |
218 | | |
219 | | static int |
220 | | mail_transaction_log_init_hdr(struct mail_transaction_log *log, |
221 | | struct mail_transaction_log_header *hdr) |
222 | 0 | { |
223 | 0 | struct mail_index *index = log->index; |
224 | 0 | struct mail_transaction_log_file *file; |
225 | |
|
226 | 0 | i_assert(index->indexid != 0); |
227 | | |
228 | 0 | i_zero(hdr); |
229 | 0 | hdr->major_version = MAIL_TRANSACTION_LOG_MAJOR_VERSION; |
230 | 0 | hdr->minor_version = MAIL_TRANSACTION_LOG_MINOR_VERSION; |
231 | 0 | hdr->hdr_size = sizeof(struct mail_transaction_log_header); |
232 | 0 | hdr->indexid = log->index->indexid; |
233 | 0 | hdr->create_stamp = ioloop_time32; |
234 | 0 | #ifndef WORDS_BIGENDIAN |
235 | 0 | hdr->compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN; |
236 | 0 | #endif |
237 | |
|
238 | 0 | if (index->fd != -1) { |
239 | | /* not creating index - make sure we have latest header */ |
240 | 0 | if (!index->mapping) { |
241 | 0 | if (mail_index_map(index, |
242 | 0 | MAIL_INDEX_SYNC_HANDLER_HEAD) <= 0) |
243 | 0 | return -1; |
244 | 0 | } else { |
245 | | /* if we got here from mapping, the .log file is |
246 | | corrupted. use whatever values we got from index |
247 | | file */ |
248 | 0 | } |
249 | 0 | } |
250 | 0 | if (index->map != NULL) { |
251 | 0 | hdr->prev_file_seq = index->map->hdr.log_file_seq; |
252 | 0 | hdr->prev_file_offset = index->map->hdr.log_file_head_offset; |
253 | 0 | hdr->file_seq = index->map->hdr.log_file_seq + 1; |
254 | 0 | hdr->initial_modseq = |
255 | 0 | mail_index_map_modseq_get_highest(index->map); |
256 | 0 | } else { |
257 | 0 | hdr->file_seq = 1; |
258 | 0 | } |
259 | 0 | if (hdr->initial_modseq == 0) { |
260 | | /* modseq tracking in log files is required for many reasons |
261 | | nowadays, even if per-message modseqs aren't enabled in |
262 | | dovecot.index. */ |
263 | 0 | hdr->initial_modseq = 1; |
264 | 0 | } |
265 | |
|
266 | 0 | if (log->head != NULL) { |
267 | | /* make sure the sequence always increases to avoid crashes |
268 | | later. this catches the buggy case where two processes |
269 | | happen to replace the same log file. */ |
270 | 0 | for (file = log->head->next; file != NULL; file = file->next) { |
271 | 0 | if (hdr->file_seq <= file->hdr.file_seq) |
272 | 0 | hdr->file_seq = file->hdr.file_seq + 1; |
273 | 0 | } |
274 | |
|
275 | 0 | if (hdr->file_seq <= log->head->hdr.file_seq) { |
276 | | /* make sure the sequence grows */ |
277 | 0 | hdr->file_seq = log->head->hdr.file_seq+1; |
278 | 0 | } |
279 | 0 | if (hdr->initial_modseq < log->head->sync_highest_modseq) { |
280 | | /* this should be always up-to-date */ |
281 | 0 | hdr->initial_modseq = log->head->sync_highest_modseq; |
282 | 0 | } |
283 | 0 | } |
284 | 0 | return 0; |
285 | 0 | } |
286 | | |
287 | | struct mail_transaction_log_file * |
288 | | mail_transaction_log_file_alloc_in_memory(struct mail_transaction_log *log) |
289 | 0 | { |
290 | 0 | struct mail_transaction_log_file *file; |
291 | |
|
292 | 0 | file = mail_transaction_log_file_alloc(log, MEMORY_LOG_NAME); |
293 | 0 | if (mail_transaction_log_init_hdr(log, &file->hdr) < 0) { |
294 | 0 | i_free(file); |
295 | 0 | return NULL; |
296 | 0 | } |
297 | | |
298 | 0 | file->buffer = buffer_create_dynamic(default_pool, 4096); |
299 | 0 | file->buffer_offset = sizeof(file->hdr); |
300 | |
|
301 | 0 | mail_transaction_log_file_add_to_list(file); |
302 | 0 | return file; |
303 | 0 | } |
304 | | |
305 | | static int |
306 | | mail_transaction_log_file_dotlock(struct mail_transaction_log_file *file) |
307 | 0 | { |
308 | 0 | struct dotlock_settings dotlock_set; |
309 | 0 | int ret; |
310 | |
|
311 | 0 | if (file->log->dotlock_refcount > 0) |
312 | 0 | ret = 1; |
313 | 0 | else { |
314 | 0 | i_assert(file->log->dotlock_refcount == 0); |
315 | 0 | mail_transaction_log_get_dotlock_set(file->log, &dotlock_set); |
316 | 0 | ret = file_dotlock_create(&dotlock_set, file->filepath, 0, |
317 | 0 | &file->log->dotlock); |
318 | 0 | } |
319 | 0 | if (ret > 0) { |
320 | 0 | file->log->dotlock_refcount++; |
321 | 0 | file->locked = TRUE; |
322 | 0 | file->lock_create_time = time(NULL); |
323 | 0 | return 0; |
324 | 0 | } |
325 | 0 | if (ret < 0) { |
326 | 0 | log_file_set_syscall_error(file, "file_dotlock_create()"); |
327 | 0 | return -1; |
328 | 0 | } |
329 | | |
330 | 0 | mail_index_set_error(file->log->index, |
331 | 0 | "Timeout (%us) while waiting for " |
332 | 0 | "dotlock for transaction log file %s", |
333 | 0 | dotlock_set.timeout, file->filepath); |
334 | 0 | return -1; |
335 | 0 | } |
336 | | |
337 | | static int |
338 | | mail_transaction_log_file_undotlock(struct mail_transaction_log_file *file) |
339 | 0 | { |
340 | 0 | int ret; |
341 | |
|
342 | 0 | i_assert(file->log->dotlock_refcount >= 0); |
343 | 0 | if (--file->log->dotlock_refcount > 0) |
344 | 0 | return 0; |
345 | | |
346 | 0 | ret = file_dotlock_delete(&file->log->dotlock); |
347 | 0 | if (ret < 0) { |
348 | 0 | log_file_set_syscall_error(file, "file_dotlock_delete()"); |
349 | 0 | return -1; |
350 | 0 | } |
351 | | |
352 | 0 | if (ret == 0) { |
353 | 0 | mail_index_set_error(file->log->index, |
354 | 0 | "Dotlock was lost for transaction log file %s", |
355 | 0 | file->filepath); |
356 | 0 | return -1; |
357 | 0 | } |
358 | 0 | return 0; |
359 | 0 | } |
360 | | |
361 | | int mail_transaction_log_file_lock(struct mail_transaction_log_file *file) |
362 | 0 | { |
363 | 0 | struct mail_index *index = file->log->index; |
364 | 0 | unsigned int lock_timeout_secs; |
365 | 0 | int ret; |
366 | |
|
367 | 0 | if (file->locked) |
368 | 0 | return 0; |
369 | | |
370 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) { |
371 | 0 | file->locked = TRUE; |
372 | 0 | return 0; |
373 | 0 | } |
374 | | |
375 | 0 | if (index->readonly) { |
376 | 0 | mail_index_set_error_code( |
377 | 0 | index, MAIL_INDEX_ERROR_CODE_NO_ACCESS, |
378 | 0 | "Index is read-only, can't write-lock %s", |
379 | 0 | file->filepath); |
380 | 0 | return -1; |
381 | 0 | } |
382 | | |
383 | 0 | if (index->set.lock_method == FILE_LOCK_METHOD_DOTLOCK) |
384 | 0 | return mail_transaction_log_file_dotlock(file); |
385 | | |
386 | 0 | i_assert(file->file_lock == NULL); |
387 | 0 | lock_timeout_secs = I_MIN(MAIL_TRANSACTION_LOG_LOCK_TIMEOUT, |
388 | 0 | index->set.max_lock_timeout_secs); |
389 | 0 | ret = mail_index_lock_fd(index, file->filepath, file->fd, |
390 | 0 | F_WRLCK, lock_timeout_secs, |
391 | 0 | &file->file_lock); |
392 | 0 | if (ret > 0) { |
393 | 0 | file->locked = TRUE; |
394 | 0 | file->lock_create_time = time(NULL); |
395 | 0 | return 0; |
396 | 0 | } |
397 | 0 | if (ret < 0) { |
398 | 0 | log_file_set_syscall_error(file, "mail_index_wait_lock_fd()"); |
399 | 0 | return -1; |
400 | 0 | } |
401 | | |
402 | 0 | mail_index_set_error(index, |
403 | 0 | "Timeout (%us) while waiting for lock for " |
404 | 0 | "transaction log file %s%s", |
405 | 0 | lock_timeout_secs, file->filepath, |
406 | 0 | file_lock_find(file->fd, index->set.lock_method, F_WRLCK)); |
407 | 0 | return -1; |
408 | 0 | } |
409 | | |
410 | | void mail_transaction_log_file_unlock(struct mail_transaction_log_file *file, |
411 | | const char *lock_reason) |
412 | 0 | { |
413 | 0 | unsigned int lock_time; |
414 | |
|
415 | 0 | if (!file->locked) |
416 | 0 | return; |
417 | | |
418 | 0 | file->locked = FALSE; |
419 | 0 | file->locked_sync_offset_updated = FALSE; |
420 | |
|
421 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) |
422 | 0 | return; |
423 | | |
424 | 0 | lock_time = time(NULL) - file->lock_create_time; |
425 | 0 | if (lock_time >= MAIL_TRANSACTION_LOG_LOCK_WARN_SECS && lock_reason != NULL) { |
426 | 0 | e_warning(file->log->index->event, |
427 | 0 | "Transaction log file %s was locked for %u seconds (%s)", |
428 | 0 | file->filepath, lock_time, lock_reason); |
429 | 0 | } |
430 | |
|
431 | 0 | if (file->log->index->set.lock_method == FILE_LOCK_METHOD_DOTLOCK) { |
432 | 0 | (void)mail_transaction_log_file_undotlock(file); |
433 | 0 | return; |
434 | 0 | } |
435 | | |
436 | 0 | file_unlock(&file->file_lock); |
437 | 0 | } |
438 | | |
439 | | static ssize_t |
440 | | mail_transaction_log_file_read_header(struct mail_transaction_log_file *file) |
441 | 0 | { |
442 | 0 | void *dest; |
443 | 0 | size_t pos, dest_size; |
444 | 0 | ssize_t ret; |
445 | |
|
446 | 0 | i_assert(file->buffer == NULL && file->mmap_base == NULL); |
447 | | |
448 | 0 | i_zero(&file->hdr); |
449 | 0 | if (file->last_size < mmap_get_page_size() && file->last_size > 0) { |
450 | | /* just read the entire transaction log to memory. |
451 | | note that if some of the data hasn't been fully committed |
452 | | yet (hdr.size=0), the buffer must be truncated later */ |
453 | 0 | file->buffer = buffer_create_dynamic(default_pool, 4096); |
454 | 0 | file->buffer_offset = 0; |
455 | 0 | dest_size = file->last_size; |
456 | 0 | dest = buffer_append_space_unsafe(file->buffer, dest_size); |
457 | 0 | } else { |
458 | | /* read only the header */ |
459 | 0 | dest = &file->hdr; |
460 | 0 | dest_size = sizeof(file->hdr); |
461 | 0 | } |
462 | | |
463 | | /* it's not necessarily an error to read less than wanted header size, |
464 | | since older versions of the log format used smaller headers. */ |
465 | 0 | pos = 0; |
466 | 0 | do { |
467 | 0 | ret = pread(file->fd, PTR_OFFSET(dest, pos), |
468 | 0 | dest_size - pos, pos); |
469 | 0 | if (ret > 0) |
470 | 0 | pos += ret; |
471 | 0 | } while (ret > 0 && pos < dest_size); |
472 | |
|
473 | 0 | if (file->buffer != NULL) { |
474 | 0 | buffer_set_used_size(file->buffer, pos); |
475 | 0 | memcpy(&file->hdr, file->buffer->data, |
476 | 0 | I_MIN(pos, sizeof(file->hdr))); |
477 | 0 | } |
478 | |
|
479 | 0 | return ret < 0 ? -1 : (ssize_t)pos; |
480 | 0 | } |
481 | | |
482 | | static int |
483 | | mail_transaction_log_file_fail_dupe(struct mail_transaction_log_file *file) |
484 | 0 | { |
485 | 0 | int ret; |
486 | | |
487 | | /* mark the old file corrupted. we can't safely remove |
488 | | it from the list however, so return failure. */ |
489 | 0 | file->hdr.indexid = 0; |
490 | 0 | if (strcmp(file->filepath, file->log->head->filepath) != 0) { |
491 | | /* only mark .2 corrupted, just to make sure we don't lose any |
492 | | changes from .log in case we're somehow wrong */ |
493 | 0 | mail_transaction_log_mark_corrupted(file); |
494 | 0 | ret = 0; |
495 | 0 | } else { |
496 | 0 | ret = -1; |
497 | 0 | } |
498 | 0 | if (!file->corrupted) { |
499 | 0 | file->corrupted = TRUE; |
500 | 0 | mail_index_set_error(file->log->index, |
501 | 0 | "Transaction log %s: " |
502 | 0 | "duplicate transaction log sequence (%u)", |
503 | 0 | file->filepath, file->hdr.file_seq); |
504 | 0 | } |
505 | 0 | return ret; |
506 | 0 | } |
507 | | |
508 | | static int |
509 | | mail_transaction_log_file_read_hdr(struct mail_transaction_log_file *file, |
510 | | bool ignore_estale) |
511 | 0 | { |
512 | 0 | struct mail_transaction_log_file *f; |
513 | 0 | int ret; |
514 | |
|
515 | 0 | i_assert(!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)); |
516 | | |
517 | 0 | if (file->corrupted) |
518 | 0 | return 0; |
519 | | |
520 | 0 | ret = mail_transaction_log_file_read_header(file); |
521 | 0 | if (ret < 0) { |
522 | 0 | if (errno != ESTALE || !ignore_estale) |
523 | 0 | log_file_set_syscall_error(file, "pread()"); |
524 | 0 | return -1; |
525 | 0 | } |
526 | 0 | if (file->hdr.major_version != MAIL_TRANSACTION_LOG_MAJOR_VERSION) { |
527 | | /* incompatible version - fix silently */ |
528 | 0 | return 0; |
529 | 0 | } |
530 | 0 | if (ret < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) { |
531 | 0 | mail_transaction_log_file_set_corrupted(file, |
532 | 0 | "unexpected end of file while reading header"); |
533 | 0 | return 0; |
534 | 0 | } |
535 | | |
536 | 0 | const unsigned int hdr_version = |
537 | 0 | MAIL_TRANSACTION_LOG_HDR_VERSION(&file->hdr); |
538 | 0 | if (MAIL_TRANSACTION_LOG_VERSION_HAVE(hdr_version, COMPAT_FLAGS)) { |
539 | | /* we have compatibility flags */ |
540 | 0 | enum mail_index_header_compat_flags compat_flags = 0; |
541 | |
|
542 | 0 | #ifndef WORDS_BIGENDIAN |
543 | 0 | compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN; |
544 | 0 | #endif |
545 | 0 | if (file->hdr.compat_flags != compat_flags) { |
546 | | /* architecture change */ |
547 | 0 | mail_index_set_error(file->log->index, |
548 | 0 | "Rebuilding index file %s: " |
549 | 0 | "CPU architecture changed", |
550 | 0 | file->log->index->filepath); |
551 | 0 | return 0; |
552 | 0 | } |
553 | 0 | } |
554 | 0 | if (file->hdr.hdr_size < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) { |
555 | 0 | mail_transaction_log_file_set_corrupted(file, |
556 | 0 | "Header size too small"); |
557 | 0 | return 0; |
558 | 0 | } |
559 | 0 | if (file->hdr.hdr_size < sizeof(file->hdr)) { |
560 | | /* @UNSAFE: smaller than we expected - zero out the fields we |
561 | | shouldn't have filled */ |
562 | 0 | memset(PTR_OFFSET(&file->hdr, file->hdr.hdr_size), 0, |
563 | 0 | sizeof(file->hdr) - file->hdr.hdr_size); |
564 | 0 | } |
565 | |
|
566 | 0 | if (file->hdr.indexid == 0) { |
567 | | /* corrupted */ |
568 | 0 | file->corrupted = TRUE; |
569 | 0 | mail_index_set_error(file->log->index, |
570 | 0 | "Transaction log file %s: marked corrupted", |
571 | 0 | file->filepath); |
572 | 0 | return 0; |
573 | 0 | } |
574 | 0 | if (file->hdr.indexid != file->log->index->indexid) { |
575 | 0 | if (file->log->index->indexid == 0 || |
576 | 0 | file->log->index->initial_create) |
577 | 0 | ; |
578 | 0 | else if (strcmp(file->filepath, file->log->filepath2) == 0) { |
579 | | /* .log.2 has a different indexid. This is rather |
580 | | unlikely situation to notice. We'll handle it by |
581 | | deleting the .log.2 so a permanently wrong indexid |
582 | | gets fixed automatically. Since .log.2 doesn't |
583 | | contain anything critical, it's not so bad even if |
584 | | the deletion wasn't really necessary. */ |
585 | 0 | mail_transaction_log_file_set_corrupted(file, |
586 | 0 | "indexid changed: %u -> %u - deleting", |
587 | 0 | file->log->index->indexid, file->hdr.indexid); |
588 | 0 | return 0; |
589 | 0 | } else { |
590 | | /* Index was just rebuilt, possibly because the whole |
591 | | mailbox was recreated under us. Handle it the same |
592 | | as if the mailbox directory had been deleted. */ |
593 | 0 | e_debug(file->log->index->event, |
594 | 0 | "Transaction log file %s indexid changed: %u -> %u", |
595 | 0 | file->filepath, |
596 | 0 | file->log->index->indexid, file->hdr.indexid); |
597 | 0 | file->log->index->index_deleted = TRUE; |
598 | 0 | errno = ENOENT; |
599 | 0 | return -1; |
600 | 0 | } |
601 | | |
602 | | /* creating index file. since transaction log is created |
603 | | first, use the indexid in it to create the main index |
604 | | to avoid races. */ |
605 | 0 | file->log->index->indexid = file->hdr.indexid; |
606 | 0 | } |
607 | | |
608 | | /* make sure we already don't have a file with the same sequence |
609 | | opened. it shouldn't happen unless the old log file was |
610 | | corrupted. */ |
611 | 0 | for (f = file->log->files; f != NULL; f = f->next) { |
612 | 0 | if (f->hdr.file_seq == file->hdr.file_seq) { |
613 | 0 | if (strcmp(f->filepath, f->log->head->filepath) != 0) { |
614 | | /* old "f" is the .log.2 */ |
615 | 0 | return mail_transaction_log_file_fail_dupe(f); |
616 | 0 | } else { |
617 | | /* new "file" is probably the .log.2 */ |
618 | 0 | return mail_transaction_log_file_fail_dupe(file); |
619 | 0 | } |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | 0 | file->sync_highest_modseq = file->hdr.initial_modseq; |
624 | 0 | return 1; |
625 | 0 | } |
626 | | |
627 | | static int |
628 | | mail_transaction_log_file_stat(struct mail_transaction_log_file *file, |
629 | | bool ignore_estale) |
630 | 0 | { |
631 | 0 | struct stat st; |
632 | |
|
633 | 0 | if (fstat(file->fd, &st) < 0) { |
634 | 0 | if (!ESTALE_FSTAT(errno) || !ignore_estale) |
635 | 0 | log_file_set_syscall_error(file, "fstat()"); |
636 | 0 | return -1; |
637 | 0 | } |
638 | | |
639 | 0 | file->st_dev = st.st_dev; |
640 | 0 | file->st_ino = st.st_ino; |
641 | 0 | file->last_mtime = st.st_mtime; |
642 | 0 | file->last_size = st.st_size; |
643 | 0 | return 0; |
644 | 0 | } |
645 | | |
646 | | static bool |
647 | | mail_transaction_log_file_is_dupe(struct mail_transaction_log_file *file) |
648 | 0 | { |
649 | 0 | struct mail_transaction_log_file *tmp; |
650 | |
|
651 | 0 | for (tmp = file->log->files; tmp != NULL; tmp = tmp->next) { |
652 | 0 | if (tmp->st_ino == file->st_ino && |
653 | 0 | CMP_DEV_T(tmp->st_dev, file->st_dev)) |
654 | 0 | return TRUE; |
655 | 0 | } |
656 | 0 | return FALSE; |
657 | 0 | } |
658 | | |
659 | | static void log_write_ext_hdr_init_data(struct mail_index *index, buffer_t *buf) |
660 | 0 | { |
661 | 0 | const struct mail_index_registered_ext *rext; |
662 | 0 | struct mail_transaction_header *hdr; |
663 | 0 | struct mail_transaction_ext_intro *intro; |
664 | 0 | struct mail_transaction_ext_hdr_update *ext_hdr; |
665 | 0 | unsigned int hdr_offset; |
666 | |
|
667 | 0 | rext = array_idx(&index->extensions, index->set.ext_hdr_init_id); |
668 | | |
669 | | /* introduce the extension */ |
670 | 0 | hdr_offset = buf->used; |
671 | 0 | hdr = buffer_append_space_unsafe(buf, sizeof(*hdr)); |
672 | 0 | hdr->type = MAIL_TRANSACTION_EXT_INTRO; |
673 | |
|
674 | 0 | intro = buffer_append_space_unsafe(buf, sizeof(*intro)); |
675 | 0 | intro->ext_id = (uint32_t)-1; |
676 | 0 | intro->hdr_size = rext->hdr_size; |
677 | 0 | intro->record_size = rext->record_size; |
678 | 0 | intro->record_align = rext->record_align; |
679 | 0 | intro->name_size = strlen(rext->name); |
680 | 0 | buffer_append(buf, rext->name, intro->name_size); |
681 | 0 | if (buf->used % 4 != 0) |
682 | 0 | buffer_append_zero(buf, 4 - buf->used % 4); |
683 | |
|
684 | 0 | hdr = buffer_get_space_unsafe(buf, hdr_offset, sizeof(*hdr)); |
685 | 0 | hdr->size = mail_index_uint32_to_offset(buf->used - hdr_offset); |
686 | | |
687 | | /* add the extension header data */ |
688 | 0 | hdr_offset = buf->used; |
689 | 0 | hdr = buffer_append_space_unsafe(buf, sizeof(*hdr)); |
690 | 0 | hdr->type = MAIL_TRANSACTION_EXT_HDR_UPDATE; |
691 | |
|
692 | 0 | ext_hdr = buffer_append_space_unsafe(buf, sizeof(*ext_hdr)); |
693 | 0 | ext_hdr->size = rext->hdr_size; |
694 | 0 | buffer_append(buf, index->set.ext_hdr_init_data, rext->hdr_size); |
695 | |
|
696 | 0 | hdr = buffer_get_space_unsafe(buf, hdr_offset, sizeof(*hdr)); |
697 | 0 | hdr->size = mail_index_uint32_to_offset(buf->used - hdr_offset); |
698 | 0 | } |
699 | | |
700 | | static int |
701 | | mail_transaction_log_file_create2(struct mail_transaction_log_file *file, |
702 | | int new_fd, bool reset, |
703 | | struct dotlock **dotlock) |
704 | 0 | { |
705 | 0 | struct mail_index *index = file->log->index; |
706 | 0 | struct stat st; |
707 | 0 | const char *path2; |
708 | 0 | buffer_t *writebuf; |
709 | 0 | int fd, ret; |
710 | 0 | bool rename_existing, need_lock; |
711 | |
|
712 | 0 | need_lock = file->log->head != NULL && file->log->head->locked; |
713 | |
|
714 | 0 | if (fcntl(new_fd, F_SETFL, O_APPEND) < 0) { |
715 | 0 | log_file_set_syscall_error(file, "fcntl(O_APPEND)"); |
716 | 0 | return -1; |
717 | 0 | } |
718 | | |
719 | 0 | if ((index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0) { |
720 | | /* although we check also mtime and file size below, it's done |
721 | | only to fix broken log files. we don't bother flushing |
722 | | attribute cache just for that. */ |
723 | 0 | nfs_flush_file_handle_cache(file->filepath); |
724 | 0 | } |
725 | | |
726 | | /* log creation is locked now - see if someone already created it. |
727 | | note that if we're rotating, we need to keep the log locked until |
728 | | the file has been rewritten. and because fcntl() locks are stupid, |
729 | | if we go and open()+close() the file and we had it already opened, |
730 | | its locks are lost. so we use stat() to check if the file has been |
731 | | recreated, although it almost never is. */ |
732 | 0 | if (reset) |
733 | 0 | rename_existing = FALSE; |
734 | 0 | else if (nfs_safe_stat(file->filepath, &st) < 0) { |
735 | 0 | if (errno != ENOENT) { |
736 | 0 | log_file_set_syscall_error(file, "stat()"); |
737 | 0 | return -1; |
738 | 0 | } |
739 | 0 | rename_existing = FALSE; |
740 | 0 | } else if (st.st_ino == file->st_ino && |
741 | 0 | CMP_DEV_T(st.st_dev, file->st_dev) && |
742 | | /* inode/dev checks are enough when we're rotating the file, |
743 | | but not when we're replacing a broken log file */ |
744 | 0 | st.st_mtime == file->last_mtime && |
745 | 0 | (uoff_t)st.st_size == file->last_size) { |
746 | | /* no-one else recreated the file */ |
747 | 0 | rename_existing = TRUE; |
748 | 0 | } else { |
749 | | /* recreated. use the file if its header is ok */ |
750 | 0 | fd = nfs_safe_open(file->filepath, O_RDWR | O_APPEND); |
751 | 0 | if (fd == -1) { |
752 | 0 | if (errno != ENOENT) { |
753 | 0 | log_file_set_syscall_error(file, "open()"); |
754 | 0 | return -1; |
755 | 0 | } |
756 | 0 | } else { |
757 | 0 | file->fd = fd; |
758 | 0 | file->last_size = 0; |
759 | 0 | if (mail_transaction_log_file_read_hdr(file, |
760 | 0 | FALSE) > 0 && |
761 | 0 | mail_transaction_log_file_stat(file, FALSE) == 0) { |
762 | | /* yes, it was ok */ |
763 | 0 | file_dotlock_delete(dotlock); |
764 | 0 | mail_transaction_log_file_add_to_list(file); |
765 | 0 | return 0; |
766 | 0 | } |
767 | 0 | file->fd = -1; |
768 | 0 | if (close(fd) < 0) |
769 | 0 | log_file_set_syscall_error(file, "close()"); |
770 | 0 | } |
771 | 0 | rename_existing = FALSE; |
772 | 0 | } |
773 | | |
774 | 0 | if (index->fd == -1 && !rename_existing) { |
775 | | /* creating the initial index */ |
776 | 0 | reset = TRUE; |
777 | 0 | } |
778 | |
|
779 | 0 | if (mail_transaction_log_init_hdr(file->log, &file->hdr) < 0) |
780 | 0 | return -1; |
781 | | |
782 | 0 | if (reset) { |
783 | | /* don't reset modseqs. if we're resetting due to rebuilding |
784 | | indexes we'll probably want to keep uidvalidity and in such |
785 | | cases we really don't want to shrink modseqs. */ |
786 | 0 | file->hdr.prev_file_seq = 0; |
787 | 0 | file->hdr.prev_file_offset = 0; |
788 | 0 | } |
789 | |
|
790 | 0 | writebuf = t_buffer_create(128); |
791 | 0 | buffer_append(writebuf, &file->hdr, sizeof(file->hdr)); |
792 | |
|
793 | 0 | if (index->set.ext_hdr_init_data != NULL && reset) |
794 | 0 | log_write_ext_hdr_init_data(index, writebuf); |
795 | 0 | if (write_full(new_fd, writebuf->data, writebuf->used) < 0) { |
796 | 0 | log_file_set_syscall_error(file, "write_full()"); |
797 | 0 | return -1; |
798 | 0 | } |
799 | | |
800 | 0 | if (file->log->index->set.fsync_mode == FSYNC_MODE_ALWAYS) { |
801 | | /* the header isn't important, so don't bother calling |
802 | | fdatasync() unless it's required */ |
803 | 0 | if (fdatasync(new_fd) < 0) { |
804 | 0 | log_file_set_syscall_error(file, "fdatasync()"); |
805 | 0 | return -1; |
806 | 0 | } |
807 | 0 | } |
808 | | |
809 | 0 | file->fd = new_fd; |
810 | 0 | ret = mail_transaction_log_file_stat(file, FALSE); |
811 | |
|
812 | 0 | if (need_lock && ret == 0) { |
813 | | /* we'll need to preserve the lock */ |
814 | 0 | if (mail_transaction_log_file_lock(file) < 0) |
815 | 0 | ret = -1; |
816 | 0 | } |
817 | | |
818 | | /* if we return -1 the dotlock deletion code closes the fd */ |
819 | 0 | file->fd = -1; |
820 | 0 | if (ret < 0) |
821 | 0 | return -1; |
822 | | |
823 | | /* keep two log files */ |
824 | 0 | if (rename_existing) { |
825 | | /* rename() would be nice and easy way to do this, except then |
826 | | there's a race condition between the rename and |
827 | | file_dotlock_replace(). during that time the log file |
828 | | doesn't exist, which could cause problems. */ |
829 | 0 | path2 = t_strconcat(file->filepath, ".2", NULL); |
830 | 0 | if (i_unlink_if_exists(path2) < 0) { |
831 | | /* try to link() anyway */ |
832 | 0 | } |
833 | 0 | if (nfs_safe_link(file->filepath, path2, FALSE) < 0 && |
834 | 0 | errno != ENOENT && errno != EEXIST) { |
835 | 0 | mail_index_set_error(index, "link(%s, %s) failed: %m", |
836 | 0 | file->filepath, path2); |
837 | | /* ignore the error. we don't care that much about the |
838 | | second log file and we're going to overwrite this |
839 | | first one. */ |
840 | 0 | } |
841 | | /* NOTE: here's a race condition where both .log and .log.2 |
842 | | point to the same file. our reading code should ignore that |
843 | | though by comparing the inodes. */ |
844 | 0 | } |
845 | |
|
846 | 0 | if (file_dotlock_replace(dotlock, |
847 | 0 | DOTLOCK_REPLACE_FLAG_DONT_CLOSE_FD) <= 0) { |
848 | | /* need to unlock to avoid assert-crash in |
849 | | mail_transaction_log_file_free() */ |
850 | 0 | mail_transaction_log_file_unlock(file, "creation failed"); |
851 | 0 | return -1; |
852 | 0 | } |
853 | | |
854 | | /* success */ |
855 | 0 | file->fd = new_fd; |
856 | 0 | mail_transaction_log_file_add_to_list(file); |
857 | |
|
858 | 0 | i_assert(!need_lock || file->locked); |
859 | 0 | return 1; |
860 | 0 | } |
861 | | |
862 | | int mail_transaction_log_file_create(struct mail_transaction_log_file *file, |
863 | | bool reset) |
864 | 0 | { |
865 | 0 | struct mail_index *index = file->log->index; |
866 | 0 | struct dotlock_settings new_dotlock_set; |
867 | 0 | struct dotlock *dotlock; |
868 | 0 | mode_t old_mask; |
869 | 0 | int fd, ret; |
870 | |
|
871 | 0 | i_assert(!MAIL_INDEX_IS_IN_MEMORY(index)); |
872 | | |
873 | 0 | if (file->log->index->readonly) { |
874 | 0 | mail_index_set_error(index, |
875 | 0 | "Can't create log file %s: Index is read-only", |
876 | 0 | file->filepath); |
877 | 0 | return -1; |
878 | 0 | } |
879 | | |
880 | 0 | if (index->indexid == 0) { |
881 | 0 | mail_index_set_error(index, |
882 | 0 | "Can't create log file %s: Index is marked corrupted", |
883 | 0 | file->filepath); |
884 | 0 | return -1; |
885 | 0 | } |
886 | | |
887 | 0 | mail_transaction_log_get_dotlock_set(file->log, &new_dotlock_set); |
888 | 0 | new_dotlock_set.lock_suffix = LOG_NEW_DOTLOCK_SUFFIX; |
889 | | |
890 | | /* With dotlocking we might already have path.lock created, so this |
891 | | filename has to be different. */ |
892 | 0 | old_mask = umask(index->set.mode ^ 0666); |
893 | 0 | fd = file_dotlock_open(&new_dotlock_set, file->filepath, 0, &dotlock); |
894 | 0 | umask(old_mask); |
895 | |
|
896 | 0 | if (fd == -1) { |
897 | 0 | log_file_set_syscall_error(file, "file_dotlock_open()"); |
898 | 0 | return -1; |
899 | 0 | } |
900 | 0 | mail_index_fchown(index, fd, file_dotlock_get_lock_path(dotlock)); |
901 | | |
902 | | /* either fd gets used or the dotlock gets deleted and returned fd |
903 | | is for the existing file */ |
904 | 0 | ret = mail_transaction_log_file_create2(file, fd, reset, &dotlock); |
905 | 0 | if (ret < 0) { |
906 | 0 | if (dotlock != NULL) |
907 | 0 | file_dotlock_delete(&dotlock); |
908 | 0 | return -1; |
909 | 0 | } |
910 | 0 | return ret; |
911 | 0 | } |
912 | | |
913 | | int mail_transaction_log_file_open(struct mail_transaction_log_file *file, |
914 | | const char **reason_r) |
915 | 0 | { |
916 | 0 | struct mail_index *index = file->log->index; |
917 | 0 | unsigned int i; |
918 | 0 | bool ignore_estale; |
919 | 0 | int ret; |
920 | |
|
921 | 0 | for (i = 0;; i++) { |
922 | 0 | if (!index->readonly) { |
923 | 0 | file->fd = nfs_safe_open(file->filepath, |
924 | 0 | O_RDWR | O_APPEND); |
925 | 0 | } else { |
926 | 0 | file->fd = nfs_safe_open(file->filepath, O_RDONLY); |
927 | 0 | } |
928 | 0 | if (file->fd == -1 && ENOACCESS(errno)) { |
929 | 0 | file->fd = nfs_safe_open(file->filepath, O_RDONLY); |
930 | 0 | index->readonly = TRUE; |
931 | 0 | } |
932 | 0 | if (file->fd == -1) { |
933 | 0 | if (errno == ENOENT) { |
934 | 0 | *reason_r = "File doesn't exist"; |
935 | 0 | return 0; |
936 | 0 | } |
937 | | |
938 | 0 | log_file_set_syscall_error(file, "open()"); |
939 | 0 | *reason_r = t_strdup_printf("open() failed: %m"); |
940 | 0 | return -1; |
941 | 0 | } |
942 | | |
943 | 0 | ignore_estale = i < MAIL_INDEX_ESTALE_RETRY_COUNT; |
944 | 0 | if (mail_transaction_log_file_stat(file, ignore_estale) < 0) |
945 | 0 | ret = -1; |
946 | 0 | else if (mail_transaction_log_file_is_dupe(file)) { |
947 | | /* probably our already opened .log file has been |
948 | | renamed to .log.2 and we're trying to reopen it. |
949 | | also possible that hit a race condition where .log |
950 | | and .log.2 are linked. */ |
951 | 0 | *reason_r = "File is already open"; |
952 | 0 | return 0; |
953 | 0 | } else { |
954 | 0 | ret = mail_transaction_log_file_read_hdr(file, |
955 | 0 | ignore_estale); |
956 | 0 | } |
957 | 0 | if (ret > 0) { |
958 | | /* success */ |
959 | 0 | break; |
960 | 0 | } |
961 | | |
962 | 0 | if (ret == 0) { |
963 | | /* corrupted */ |
964 | 0 | if (index->readonly) { |
965 | | /* don't delete */ |
966 | 0 | } else { |
967 | 0 | i_unlink_if_exists(file->filepath); |
968 | 0 | } |
969 | 0 | *reason_r = "File is corrupted"; |
970 | 0 | return 0; |
971 | 0 | } |
972 | 0 | if (errno != ESTALE || |
973 | 0 | i == MAIL_INDEX_ESTALE_RETRY_COUNT) { |
974 | | /* syscall error */ |
975 | 0 | *reason_r = t_strdup_printf("fstat() failed: %m"); |
976 | 0 | return -1; |
977 | 0 | } |
978 | | |
979 | | /* ESTALE - try again */ |
980 | 0 | buffer_free(&file->buffer); |
981 | 0 | } |
982 | | |
983 | 0 | mail_transaction_log_file_add_to_list(file); |
984 | 0 | return 1; |
985 | 0 | } |
986 | | |
987 | | static int |
988 | | log_file_track_mailbox_sync_offset_hdr(struct mail_transaction_log_file *file, |
989 | | const void *data, unsigned int trans_size, |
990 | | const char **error_r) |
991 | 0 | { |
992 | 0 | const struct mail_transaction_header_update *u = data; |
993 | 0 | const struct mail_index_header *ihdr; |
994 | 0 | const unsigned int size = trans_size - sizeof(struct mail_transaction_header); |
995 | 0 | const unsigned int offset_pos = |
996 | 0 | offsetof(struct mail_index_header, log_file_tail_offset); |
997 | 0 | const unsigned int offset_size = sizeof(ihdr->log_file_tail_offset); |
998 | 0 | uint32_t tail_offset; |
999 | |
|
1000 | 0 | i_assert(offset_size == sizeof(tail_offset)); |
1001 | | |
1002 | 0 | if (size < sizeof(*u) || size < sizeof(*u) + u->size) { |
1003 | 0 | *error_r = "header update extends beyond record size"; |
1004 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *error_r); |
1005 | 0 | return -1; |
1006 | 0 | } |
1007 | | |
1008 | 0 | if (u->offset <= offset_pos && |
1009 | 0 | u->offset + u->size >= offset_pos + offset_size) { |
1010 | 0 | memcpy(&tail_offset, |
1011 | 0 | CONST_PTR_OFFSET(u + 1, offset_pos - u->offset), |
1012 | 0 | sizeof(tail_offset)); |
1013 | |
|
1014 | 0 | if (tail_offset < file->last_read_hdr_tail_offset) { |
1015 | | /* ignore shrinking tail offsets */ |
1016 | 0 | return 1; |
1017 | 0 | } else if (tail_offset > file->sync_offset + trans_size) { |
1018 | 0 | mail_transaction_log_file_set_corrupted(file, |
1019 | 0 | "log_file_tail_offset %u goes past sync offset %"PRIuUOFF_T, |
1020 | 0 | tail_offset, file->sync_offset + trans_size); |
1021 | 0 | } else { |
1022 | 0 | file->last_read_hdr_tail_offset = tail_offset; |
1023 | 0 | if (tail_offset > file->max_tail_offset) |
1024 | 0 | file->max_tail_offset = tail_offset; |
1025 | 0 | return 1; |
1026 | 0 | } |
1027 | 0 | } |
1028 | 0 | return 0; |
1029 | 0 | } |
1030 | | |
1031 | | static bool |
1032 | | flag_updates_have_non_internal(const struct mail_transaction_flag_update *u, |
1033 | | unsigned int count, unsigned int version) |
1034 | 0 | { |
1035 | | /* Hide internal flags from modseqs if the log file's version |
1036 | | is new enough. This allows upgrading without the modseqs suddenly |
1037 | | shrinking. */ |
1038 | 0 | if (!MAIL_TRANSACTION_LOG_VERSION_HAVE(version, HIDE_INTERNAL_MODSEQS)) |
1039 | 0 | return TRUE; |
1040 | | |
1041 | 0 | for (unsigned int i = 0; i < count; i++) { |
1042 | 0 | if (!MAIL_TRANSACTION_FLAG_UPDATE_IS_INTERNAL(&u[i])) |
1043 | 0 | return TRUE; |
1044 | 0 | } |
1045 | 0 | return FALSE; |
1046 | 0 | } |
1047 | | |
1048 | | void mail_transaction_update_modseq(const struct mail_transaction_header *hdr, |
1049 | | const void *data, uint64_t *cur_modseq, |
1050 | | unsigned int version) |
1051 | 0 | { |
1052 | 0 | uint32_t trans_size; |
1053 | |
|
1054 | 0 | trans_size = mail_index_offset_to_uint32(hdr->size); |
1055 | 0 | i_assert(trans_size != 0); |
1056 | | |
1057 | 0 | if (*cur_modseq != 0) { |
1058 | | /* tracking modseqs */ |
1059 | 0 | } else if ((hdr->type & MAIL_TRANSACTION_TYPE_MASK) == |
1060 | 0 | MAIL_TRANSACTION_EXT_INTRO) { |
1061 | | /* modseqs not tracked yet. see if this is a modseq |
1062 | | extension introduction. */ |
1063 | 0 | const struct mail_transaction_ext_intro *intro = data; |
1064 | 0 | const unsigned int modseq_ext_len = |
1065 | 0 | strlen(MAIL_INDEX_MODSEQ_EXT_NAME); |
1066 | |
|
1067 | 0 | if (intro->name_size == modseq_ext_len && |
1068 | 0 | memcmp(intro + 1, MAIL_INDEX_MODSEQ_EXT_NAME, |
1069 | 0 | modseq_ext_len) == 0) { |
1070 | | /* modseq tracking started */ |
1071 | 0 | *cur_modseq += 1; |
1072 | 0 | } |
1073 | 0 | return; |
1074 | 0 | } else { |
1075 | | /* not tracking modseqs */ |
1076 | 0 | return; |
1077 | 0 | } |
1078 | | |
1079 | 0 | switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) { |
1080 | 0 | case MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_EXPUNGE_PROT: |
1081 | 0 | case MAIL_TRANSACTION_EXPUNGE_GUID | MAIL_TRANSACTION_EXPUNGE_PROT: |
1082 | 0 | if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) { |
1083 | | /* ignore expunge requests */ |
1084 | 0 | break; |
1085 | 0 | } |
1086 | | /* fall through */ |
1087 | 0 | case MAIL_TRANSACTION_APPEND: |
1088 | 0 | case MAIL_TRANSACTION_KEYWORD_UPDATE: |
1089 | 0 | case MAIL_TRANSACTION_KEYWORD_RESET: |
1090 | 0 | case MAIL_TRANSACTION_ATTRIBUTE_UPDATE: |
1091 | | /* these changes increase modseq */ |
1092 | 0 | *cur_modseq += 1; |
1093 | 0 | break; |
1094 | 0 | case MAIL_TRANSACTION_FLAG_UPDATE: { |
1095 | 0 | const struct mail_transaction_flag_update *rec = data; |
1096 | 0 | unsigned int count; |
1097 | |
|
1098 | 0 | count = (trans_size - sizeof(*hdr)) / sizeof(*rec); |
1099 | 0 | if (flag_updates_have_non_internal(rec, count, version)) |
1100 | 0 | *cur_modseq += 1; |
1101 | 0 | break; |
1102 | 0 | } |
1103 | 0 | case MAIL_TRANSACTION_MODSEQ_UPDATE: { |
1104 | 0 | const struct mail_transaction_modseq_update *rec, *end; |
1105 | |
|
1106 | 0 | end = CONST_PTR_OFFSET(data, trans_size - sizeof(*hdr)); |
1107 | 0 | for (rec = data; rec < end; rec++) { |
1108 | 0 | uint64_t modseq = ((uint64_t)rec->modseq_high32 << 32) | |
1109 | 0 | rec->modseq_low32; |
1110 | 0 | if (*cur_modseq < modseq) |
1111 | 0 | *cur_modseq = modseq; |
1112 | 0 | } |
1113 | 0 | } |
1114 | 0 | } |
1115 | 0 | } |
1116 | | |
1117 | | static int |
1118 | | log_file_track_sync(struct mail_transaction_log_file *file, |
1119 | | const struct mail_transaction_header *hdr, |
1120 | | unsigned int trans_size, const char **error_r) |
1121 | 0 | { |
1122 | 0 | const void *data = hdr + 1; |
1123 | 0 | int ret; |
1124 | |
|
1125 | 0 | mail_transaction_update_modseq(hdr, hdr + 1, &file->sync_highest_modseq, |
1126 | 0 | MAIL_TRANSACTION_LOG_HDR_VERSION(&file->hdr)); |
1127 | 0 | if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) |
1128 | 0 | return 1; |
1129 | | |
1130 | | /* external transactions: */ |
1131 | 0 | switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) { |
1132 | 0 | case MAIL_TRANSACTION_HEADER_UPDATE: |
1133 | | /* see if this updates mailbox_sync_offset */ |
1134 | 0 | ret = log_file_track_mailbox_sync_offset_hdr(file, data, |
1135 | 0 | trans_size, error_r); |
1136 | 0 | if (ret != 0) |
1137 | 0 | return ret < 0 ? -1 : 1; |
1138 | 0 | break; |
1139 | 0 | case MAIL_TRANSACTION_INDEX_DELETED: |
1140 | 0 | if (file->sync_offset < file->index_undeleted_offset || |
1141 | 0 | file->hdr.file_seq < file->log->index->index_delete_changed_file_seq) |
1142 | 0 | break; |
1143 | 0 | file->log->index->index_deleted = TRUE; |
1144 | 0 | file->log->index->index_delete_requested = FALSE; |
1145 | 0 | file->log->index->index_delete_changed_file_seq = file->hdr.file_seq; |
1146 | 0 | file->index_deleted_offset = file->sync_offset + trans_size; |
1147 | 0 | break; |
1148 | 0 | case MAIL_TRANSACTION_INDEX_UNDELETED: |
1149 | 0 | if (file->sync_offset < file->index_deleted_offset || |
1150 | 0 | file->hdr.file_seq < file->log->index->index_delete_changed_file_seq) |
1151 | 0 | break; |
1152 | 0 | file->log->index->index_deleted = FALSE; |
1153 | 0 | file->log->index->index_delete_requested = FALSE; |
1154 | 0 | file->log->index->index_delete_changed_file_seq = file->hdr.file_seq; |
1155 | 0 | file->index_undeleted_offset = file->sync_offset + trans_size; |
1156 | 0 | break; |
1157 | 0 | case MAIL_TRANSACTION_BOUNDARY: { |
1158 | 0 | const struct mail_transaction_boundary *boundary = |
1159 | 0 | (const void *)(hdr + 1); |
1160 | 0 | size_t wanted_buffer_size; |
1161 | |
|
1162 | 0 | wanted_buffer_size = file->sync_offset - file->buffer_offset + |
1163 | 0 | boundary->size; |
1164 | 0 | if (wanted_buffer_size > file->buffer->used) { |
1165 | | /* the full transaction hasn't been written yet */ |
1166 | 0 | return 0; |
1167 | 0 | } |
1168 | 0 | break; |
1169 | 0 | } |
1170 | 0 | } |
1171 | | |
1172 | 0 | if (file->max_tail_offset == file->sync_offset) { |
1173 | | /* external transactions aren't synced to mailbox. we can |
1174 | | update mailbox sync offset to skip this transaction to |
1175 | | avoid re-reading it at the next sync. */ |
1176 | 0 | file->max_tail_offset += trans_size; |
1177 | 0 | } |
1178 | 0 | return 1; |
1179 | 0 | } |
1180 | | |
1181 | | static int |
1182 | | mail_transaction_log_file_sync(struct mail_transaction_log_file *file, |
1183 | | bool *retry_r, const char **reason_r) |
1184 | 0 | { |
1185 | 0 | const struct mail_transaction_header *hdr; |
1186 | 0 | const void *data; |
1187 | 0 | struct stat st; |
1188 | 0 | size_t size, avail; |
1189 | 0 | uint32_t trans_size = 0; |
1190 | 0 | int ret; |
1191 | |
|
1192 | 0 | i_assert(file->sync_offset >= file->buffer_offset); |
1193 | | |
1194 | 0 | *retry_r = FALSE; |
1195 | |
|
1196 | 0 | data = buffer_get_data(file->buffer, &size); |
1197 | 0 | if (file->buffer_offset + size < file->sync_offset) { |
1198 | 0 | *reason_r = t_strdup_printf( |
1199 | 0 | "log file shrank (%"PRIuUOFF_T" < %"PRIuUOFF_T")", |
1200 | 0 | file->buffer_offset + (uoff_t)size, file->sync_offset); |
1201 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1202 | | /* fix the sync_offset to avoid crashes later on */ |
1203 | 0 | file->sync_offset = file->buffer_offset + size; |
1204 | 0 | return 0; |
1205 | 0 | } |
1206 | 0 | while (file->sync_offset - file->buffer_offset + sizeof(*hdr) <= size) { |
1207 | 0 | hdr = CONST_PTR_OFFSET(data, file->sync_offset - |
1208 | 0 | file->buffer_offset); |
1209 | 0 | trans_size = mail_index_offset_to_uint32(hdr->size); |
1210 | 0 | if (trans_size == 0) { |
1211 | | /* unfinished or corrupted */ |
1212 | 0 | break; |
1213 | 0 | } |
1214 | 0 | if (trans_size < sizeof(*hdr)) { |
1215 | 0 | *reason_r = t_strdup_printf( |
1216 | 0 | "hdr.size too small (%u)", trans_size); |
1217 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1218 | 0 | return 0; |
1219 | 0 | } |
1220 | | |
1221 | 0 | if (file->sync_offset - file->buffer_offset + trans_size > size) |
1222 | 0 | break; |
1223 | | |
1224 | | /* transaction has been fully written */ |
1225 | 0 | if ((ret = log_file_track_sync(file, hdr, trans_size, reason_r)) <= 0) { |
1226 | 0 | if (ret < 0) |
1227 | 0 | return 0; |
1228 | 0 | break; |
1229 | 0 | } |
1230 | | |
1231 | 0 | file->sync_offset += trans_size; |
1232 | 0 | } |
1233 | | |
1234 | 0 | if (file->mmap_base != NULL && !file->locked) { |
1235 | | /* Now that all the mmaped pages have page faulted, check if |
1236 | | the file had changed while doing that. Only after the last |
1237 | | page has faulted, the size returned by fstat() can be |
1238 | | trusted. Otherwise it might point to a page boundary while |
1239 | | the next page is still being written. |
1240 | | |
1241 | | Without this check we might see partial transactions, |
1242 | | sometimes causing "Extension record updated without intro |
1243 | | prefix" errors. */ |
1244 | 0 | if (fstat(file->fd, &st) < 0) { |
1245 | 0 | log_file_set_syscall_error(file, "fstat()"); |
1246 | 0 | *reason_r = t_strdup_printf("fstat() failed: %m"); |
1247 | 0 | return -1; |
1248 | 0 | } |
1249 | 0 | if ((uoff_t)st.st_size != file->last_size) { |
1250 | 0 | file->last_size = st.st_size; |
1251 | 0 | *retry_r = TRUE; |
1252 | 0 | *reason_r = "File size changed - retrying"; |
1253 | 0 | return 0; |
1254 | 0 | } |
1255 | 0 | } |
1256 | | |
1257 | 0 | avail = file->sync_offset - file->buffer_offset; |
1258 | 0 | if (avail != size) { |
1259 | | /* There's more data than we could sync at the moment. If the |
1260 | | last record's size wasn't valid, we can't know if it will |
1261 | | be updated unless we've locked the log. */ |
1262 | 0 | if (file->locked) { |
1263 | 0 | *reason_r = "Unexpected garbage at EOF"; |
1264 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1265 | 0 | return 0; |
1266 | 0 | } |
1267 | | /* The size field will be updated soon */ |
1268 | 0 | mail_index_flush_read_cache(file->log->index, file->filepath, |
1269 | 0 | file->fd, file->locked); |
1270 | 0 | } |
1271 | | |
1272 | 0 | if (file->next != NULL && |
1273 | 0 | file->hdr.file_seq == file->next->hdr.prev_file_seq && |
1274 | 0 | file->next->hdr.prev_file_offset != file->sync_offset) { |
1275 | 0 | *reason_r = t_strdup_printf( |
1276 | 0 | "Invalid transaction log size " |
1277 | 0 | "(%"PRIuUOFF_T" vs %u): %s", file->sync_offset, |
1278 | 0 | file->log->head->hdr.prev_file_offset, file->filepath); |
1279 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1280 | 0 | return 0; |
1281 | 0 | } |
1282 | | |
1283 | 0 | return 1; |
1284 | 0 | } |
1285 | | |
1286 | | static int |
1287 | | mail_transaction_log_file_insert_read(struct mail_transaction_log_file *file, |
1288 | | uoff_t offset, const char **reason_r) |
1289 | 0 | { |
1290 | 0 | void *data; |
1291 | 0 | size_t size; |
1292 | 0 | ssize_t ret; |
1293 | |
|
1294 | 0 | size = file->buffer_offset - offset; |
1295 | 0 | buffer_copy(file->buffer, size, file->buffer, 0, SIZE_MAX); |
1296 | |
|
1297 | 0 | data = buffer_get_space_unsafe(file->buffer, 0, size); |
1298 | 0 | ret = pread_full(file->fd, data, size, offset); |
1299 | 0 | if (ret > 0) { |
1300 | | /* success */ |
1301 | 0 | file->buffer_offset -= size; |
1302 | 0 | return 1; |
1303 | 0 | } |
1304 | | |
1305 | | /* failure. don't leave ourself to inconsistent state */ |
1306 | 0 | buffer_copy(file->buffer, 0, file->buffer, size, SIZE_MAX); |
1307 | 0 | buffer_set_used_size(file->buffer, file->buffer->used - size); |
1308 | |
|
1309 | 0 | if (ret == 0) { |
1310 | 0 | *reason_r = "file shrank unexpectedly"; |
1311 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1312 | 0 | return 0; |
1313 | 0 | } else if (errno == ESTALE) { |
1314 | | /* log file was deleted in NFS server, fail silently */ |
1315 | 0 | *reason_r = t_strdup_printf("read() failed: %m"); |
1316 | 0 | return 0; |
1317 | 0 | } else { |
1318 | 0 | log_file_set_syscall_error(file, "pread()"); |
1319 | 0 | *reason_r = t_strdup_printf("read() failed: %m"); |
1320 | 0 | return -1; |
1321 | 0 | } |
1322 | 0 | } |
1323 | | |
1324 | | static int |
1325 | | mail_transaction_log_file_read_more(struct mail_transaction_log_file *file, |
1326 | | const char **reason_r) |
1327 | 0 | { |
1328 | 0 | void *data; |
1329 | 0 | size_t size; |
1330 | 0 | uint32_t read_offset; |
1331 | 0 | ssize_t ret; |
1332 | |
|
1333 | 0 | read_offset = file->buffer_offset + file->buffer->used; |
1334 | |
|
1335 | 0 | do { |
1336 | 0 | data = buffer_append_space_unsafe(file->buffer, LOG_PREFETCH); |
1337 | 0 | ret = pread(file->fd, data, LOG_PREFETCH, read_offset); |
1338 | 0 | if (ret > 0) |
1339 | 0 | read_offset += ret; |
1340 | |
|
1341 | 0 | size = read_offset - file->buffer_offset; |
1342 | 0 | buffer_set_used_size(file->buffer, size); |
1343 | 0 | } while (ret > 0 || (ret < 0 && errno == EINTR)); |
1344 | |
|
1345 | 0 | file->last_size = read_offset; |
1346 | |
|
1347 | 0 | if (ret < 0) { |
1348 | 0 | *reason_r = t_strdup_printf("pread() failed: %m"); |
1349 | 0 | if (errno == ESTALE) { |
1350 | | /* log file was deleted in NFS server, fail silently */ |
1351 | 0 | return 0; |
1352 | 0 | } |
1353 | 0 | log_file_set_syscall_error(file, "pread()"); |
1354 | 0 | return -1; |
1355 | 0 | } |
1356 | 0 | return 1; |
1357 | 0 | } |
1358 | | |
1359 | | static bool |
1360 | | mail_transaction_log_file_need_nfs_flush(struct mail_transaction_log_file *file) |
1361 | 0 | { |
1362 | 0 | const struct mail_index_header *hdr = &file->log->index->map->hdr; |
1363 | 0 | uoff_t max_offset = file->last_size; |
1364 | |
|
1365 | 0 | if (file->next != NULL && |
1366 | 0 | file->hdr.file_seq == file->next->hdr.prev_file_seq && |
1367 | 0 | file->next->hdr.prev_file_offset != max_offset) { |
1368 | | /* we already have a newer log file which says that we haven't |
1369 | | synced the entire file. */ |
1370 | 0 | return TRUE; |
1371 | 0 | } |
1372 | | |
1373 | 0 | if (file->hdr.file_seq == hdr->log_file_seq && |
1374 | 0 | max_offset < hdr->log_file_head_offset) |
1375 | 0 | return TRUE; |
1376 | | |
1377 | 0 | return FALSE; |
1378 | 0 | } |
1379 | | |
1380 | | static int |
1381 | | mail_transaction_log_file_read(struct mail_transaction_log_file *file, |
1382 | | uoff_t start_offset, bool nfs_flush, |
1383 | | const char **reason_r) |
1384 | 0 | { |
1385 | 0 | bool retry; |
1386 | 0 | int ret; |
1387 | |
|
1388 | 0 | i_assert(file->mmap_base == NULL); |
1389 | | |
1390 | | /* NFS: if file isn't locked, we're optimistic that we can read enough |
1391 | | data without flushing attribute cache. if after reading we notice |
1392 | | that we really should have read more, flush the cache and try again. |
1393 | | if file is locked, the attribute cache was already flushed when |
1394 | | refreshing the log. */ |
1395 | 0 | if (nfs_flush && |
1396 | 0 | (file->log->index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0) { |
1397 | 0 | if (!file->locked) |
1398 | 0 | nfs_flush_attr_cache_unlocked(file->filepath); |
1399 | 0 | else |
1400 | 0 | nfs_flush_attr_cache_fd_locked(file->filepath, file->fd); |
1401 | 0 | } |
1402 | |
|
1403 | 0 | if (file->buffer != NULL && file->buffer_offset > start_offset) { |
1404 | | /* we have to insert missing data to beginning of buffer */ |
1405 | 0 | ret = mail_transaction_log_file_insert_read(file, start_offset, reason_r); |
1406 | 0 | if (ret <= 0) |
1407 | 0 | return ret; |
1408 | 0 | } |
1409 | | |
1410 | 0 | if (file->buffer == NULL) { |
1411 | 0 | file->buffer = |
1412 | 0 | buffer_create_dynamic(default_pool, LOG_PREFETCH); |
1413 | 0 | file->buffer_offset = start_offset; |
1414 | 0 | } |
1415 | |
|
1416 | 0 | if ((ret = mail_transaction_log_file_read_more(file, reason_r)) <= 0) |
1417 | 0 | ; |
1418 | 0 | else if (!nfs_flush && |
1419 | 0 | (file->log->index->flags & MAIL_INDEX_OPEN_FLAG_NFS_FLUSH) != 0 && |
1420 | 0 | mail_transaction_log_file_need_nfs_flush(file)) { |
1421 | | /* we didn't read enough data. flush and try again. */ |
1422 | 0 | return mail_transaction_log_file_read(file, start_offset, TRUE, reason_r); |
1423 | 0 | } else if ((ret = mail_transaction_log_file_sync(file, &retry, reason_r)) == 0) { |
1424 | 0 | i_assert(!retry); /* retry happens only with mmap */ |
1425 | 0 | } |
1426 | 0 | i_assert(file->sync_offset >= file->buffer_offset); |
1427 | 0 | buffer_set_used_size(file->buffer, |
1428 | 0 | file->sync_offset - file->buffer_offset); |
1429 | 0 | return ret; |
1430 | 0 | } |
1431 | | |
1432 | | static bool |
1433 | | log_file_map_check_offsets(struct mail_transaction_log_file *file, |
1434 | | uoff_t start_offset, uoff_t end_offset, |
1435 | | const char **reason_r) |
1436 | 0 | { |
1437 | 0 | struct stat st, st2; |
1438 | |
|
1439 | 0 | if (start_offset > file->sync_offset) { |
1440 | | /* broken start offset */ |
1441 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) { |
1442 | 0 | *reason_r = t_strdup_printf( |
1443 | 0 | "%s: start_offset (%"PRIuUOFF_T") > " |
1444 | 0 | "current sync_offset (%"PRIuUOFF_T")", |
1445 | 0 | file->filepath, start_offset, file->sync_offset); |
1446 | 0 | return FALSE; |
1447 | 0 | } |
1448 | | |
1449 | 0 | if (fstat(file->fd, &st) < 0) { |
1450 | 0 | log_file_set_syscall_error(file, "fstat()"); |
1451 | 0 | st.st_size = -1; |
1452 | 0 | } |
1453 | 0 | *reason_r = t_strdup_printf( |
1454 | 0 | "%s: start_offset (%"PRIuUOFF_T") > " |
1455 | 0 | "current sync_offset (%"PRIuUOFF_T"), file size=%"PRIuUOFF_T, |
1456 | 0 | file->filepath, start_offset, file->sync_offset, |
1457 | 0 | st.st_size); |
1458 | 0 | if (stat(file->filepath, &st2) == 0) { |
1459 | 0 | if (st.st_ino != st2.st_ino) { |
1460 | 0 | *reason_r = t_strdup_printf( |
1461 | 0 | "%s, file unexpectedly replaced", *reason_r); |
1462 | 0 | } |
1463 | 0 | } else if (errno == ENOENT) { |
1464 | 0 | *reason_r = t_strdup_printf( |
1465 | 0 | "%s, file unexpectedly deleted", *reason_r); |
1466 | 0 | } else { |
1467 | 0 | log_file_set_syscall_error(file, "stat()"); |
1468 | 0 | } |
1469 | 0 | return FALSE; |
1470 | 0 | } |
1471 | 0 | if (end_offset != UOFF_T_MAX && end_offset > file->sync_offset) { |
1472 | 0 | *reason_r = t_strdup_printf( |
1473 | 0 | "%s: end_offset (%"PRIuUOFF_T") > " |
1474 | 0 | "current sync_offset (%"PRIuUOFF_T")", |
1475 | 0 | file->filepath, start_offset, file->sync_offset); |
1476 | 0 | return FALSE; |
1477 | 0 | } |
1478 | | |
1479 | 0 | return TRUE; |
1480 | 0 | } |
1481 | | |
1482 | | static int |
1483 | | mail_transaction_log_file_mmap(struct mail_transaction_log_file *file, |
1484 | | const char **reason_r) |
1485 | 0 | { |
1486 | | /* we may have switched to mmaping */ |
1487 | 0 | buffer_free(&file->buffer); |
1488 | |
|
1489 | 0 | file->mmap_size = file->last_size; |
1490 | 0 | file->mmap_base = mmap(NULL, file->mmap_size, PROT_READ, MAP_SHARED, |
1491 | 0 | file->fd, 0); |
1492 | 0 | if (file->mmap_base == MAP_FAILED) { |
1493 | 0 | file->mmap_base = NULL; |
1494 | 0 | if (ioloop_time != file->last_mmap_error_time) { |
1495 | 0 | file->last_mmap_error_time = ioloop_time; |
1496 | 0 | log_file_set_syscall_error(file, t_strdup_printf( |
1497 | 0 | "mmap(size=%zu)", file->mmap_size)); |
1498 | 0 | } |
1499 | 0 | *reason_r = t_strdup_printf("mmap(size=%zu) failed: %m", |
1500 | 0 | file->mmap_size); |
1501 | 0 | file->mmap_size = 0; |
1502 | 0 | return -1; |
1503 | 0 | } |
1504 | | |
1505 | 0 | if (file->mmap_size > mmap_get_page_size()) { |
1506 | 0 | errno = posix_madvise(file->mmap_base, file->mmap_size, |
1507 | 0 | POSIX_MADV_SEQUENTIAL); |
1508 | 0 | if (errno != 0) |
1509 | 0 | log_file_set_syscall_error(file, "posix_madvise()"); |
1510 | 0 | } |
1511 | |
|
1512 | 0 | buffer_create_from_const_data(&file->mmap_buffer, |
1513 | 0 | file->mmap_base, file->mmap_size); |
1514 | 0 | file->buffer = &file->mmap_buffer; |
1515 | 0 | file->buffer_offset = 0; |
1516 | 0 | return 0; |
1517 | 0 | } |
1518 | | |
1519 | | static void |
1520 | | mail_transaction_log_file_munmap(struct mail_transaction_log_file *file) |
1521 | 0 | { |
1522 | 0 | if (file->mmap_base == NULL) |
1523 | 0 | return; |
1524 | | |
1525 | 0 | i_assert(file->buffer != NULL); |
1526 | 0 | if (munmap(file->mmap_base, file->mmap_size) < 0) |
1527 | 0 | log_file_set_syscall_error(file, "munmap()"); |
1528 | 0 | file->mmap_base = NULL; |
1529 | 0 | file->mmap_size = 0; |
1530 | 0 | buffer_free(&file->buffer); |
1531 | 0 | } |
1532 | | |
1533 | | static int |
1534 | | mail_transaction_log_file_map_mmap(struct mail_transaction_log_file *file, |
1535 | | uoff_t start_offset, const char **reason_r) |
1536 | 0 | { |
1537 | 0 | struct stat st; |
1538 | 0 | bool retry; |
1539 | 0 | int ret; |
1540 | | |
1541 | | /* we are going to mmap() this file, but it's not necessarily |
1542 | | mmaped currently. */ |
1543 | 0 | i_assert(file->buffer_offset == 0 || file->mmap_base == NULL); |
1544 | 0 | i_assert(file->mmap_size == 0 || file->mmap_base != NULL); |
1545 | | |
1546 | 0 | if (fstat(file->fd, &st) < 0) { |
1547 | 0 | log_file_set_syscall_error(file, "fstat()"); |
1548 | 0 | *reason_r = t_strdup_printf("fstat() failed: %m"); |
1549 | 0 | return -1; |
1550 | 0 | } |
1551 | 0 | file->last_size = st.st_size; |
1552 | |
|
1553 | 0 | if ((uoff_t)st.st_size < file->sync_offset) { |
1554 | 0 | *reason_r = t_strdup_printf( |
1555 | 0 | "file size shrank (%"PRIuUOFF_T" < %"PRIuUOFF_T")", |
1556 | 0 | (uoff_t)st.st_size, file->sync_offset); |
1557 | 0 | mail_transaction_log_file_set_corrupted(file, "%s", *reason_r); |
1558 | 0 | return 0; |
1559 | 0 | } |
1560 | | |
1561 | 0 | if (file->buffer != NULL && file->buffer_offset <= start_offset && |
1562 | 0 | (uoff_t)st.st_size == file->buffer_offset + file->buffer->used) { |
1563 | | /* we already have the whole file mapped */ |
1564 | 0 | if ((ret = mail_transaction_log_file_sync(file, &retry, reason_r)) != 0 || |
1565 | 0 | !retry) |
1566 | 0 | return ret; |
1567 | | /* size changed, re-mmap */ |
1568 | 0 | } |
1569 | | |
1570 | 0 | do { |
1571 | 0 | mail_transaction_log_file_munmap(file); |
1572 | |
|
1573 | 0 | if (file->last_size - start_offset < mmap_get_page_size()) { |
1574 | | /* just reading the file is probably faster */ |
1575 | 0 | return mail_transaction_log_file_read(file, |
1576 | 0 | start_offset, |
1577 | 0 | FALSE, reason_r); |
1578 | 0 | } |
1579 | | |
1580 | 0 | if (mail_transaction_log_file_mmap(file, reason_r) < 0) |
1581 | 0 | return -1; |
1582 | 0 | ret = mail_transaction_log_file_sync(file, &retry, reason_r); |
1583 | 0 | } while (retry); |
1584 | | |
1585 | 0 | return ret; |
1586 | 0 | } |
1587 | | |
1588 | | int mail_transaction_log_file_map(struct mail_transaction_log_file *file, |
1589 | | uoff_t start_offset, uoff_t end_offset, |
1590 | | const char **reason_r) |
1591 | 0 | { |
1592 | 0 | uoff_t map_start_offset = start_offset; |
1593 | 0 | size_t size; |
1594 | 0 | int ret; |
1595 | |
|
1596 | 0 | if (file->hdr.indexid == 0) { |
1597 | | /* corrupted */ |
1598 | 0 | *reason_r = "corrupted, indexid=0"; |
1599 | 0 | return 0; |
1600 | 0 | } |
1601 | | |
1602 | 0 | i_assert(start_offset >= file->hdr.hdr_size); |
1603 | 0 | i_assert(start_offset <= end_offset); |
1604 | 0 | i_assert(file->buffer == NULL || file->mmap_base != NULL || |
1605 | 0 | file->sync_offset >= file->buffer_offset + file->buffer->used); |
1606 | | |
1607 | 0 | if (file->locked_sync_offset_updated && file == file->log->head && |
1608 | 0 | end_offset == UOFF_T_MAX) { |
1609 | | /* we're not interested of going further than sync_offset */ |
1610 | 0 | if (!log_file_map_check_offsets(file, start_offset, |
1611 | 0 | end_offset, reason_r)) |
1612 | 0 | return 0; |
1613 | 0 | i_assert(start_offset <= file->sync_offset); |
1614 | 0 | end_offset = file->sync_offset; |
1615 | 0 | } |
1616 | | |
1617 | 0 | if (file->buffer != NULL && file->buffer_offset <= start_offset) { |
1618 | | /* see if we already have it */ |
1619 | 0 | size = file->buffer->used; |
1620 | 0 | if (file->buffer_offset + size >= end_offset) |
1621 | 0 | return 1; |
1622 | 0 | } |
1623 | | |
1624 | 0 | if (file->locked) { |
1625 | | /* set this only when we've synced to end of file while locked |
1626 | | (either end_offset=UOFF_T_MAX or we had to read anyway) */ |
1627 | 0 | file->locked_sync_offset_updated = TRUE; |
1628 | 0 | } |
1629 | |
|
1630 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) { |
1631 | 0 | if (start_offset < file->buffer_offset || file->buffer == NULL) { |
1632 | | /* we had moved the log to memory but failed to read |
1633 | | the beginning of the log file */ |
1634 | 0 | *reason_r = "Beginning of the log isn't available"; |
1635 | 0 | return 0; |
1636 | 0 | } |
1637 | 0 | return log_file_map_check_offsets(file, start_offset, |
1638 | 0 | end_offset, reason_r) ? 1 : 0; |
1639 | 0 | } |
1640 | | |
1641 | 0 | if (start_offset > file->sync_offset) |
1642 | 0 | mail_transaction_log_file_skip_to_head(file); |
1643 | 0 | if (start_offset > file->sync_offset) { |
1644 | | /* although we could just skip over the unwanted data, we have |
1645 | | to sync everything so that modseqs are calculated |
1646 | | correctly */ |
1647 | 0 | map_start_offset = file->sync_offset; |
1648 | 0 | } |
1649 | |
|
1650 | 0 | if ((file->log->index->flags & MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE) == 0) |
1651 | 0 | ret = mail_transaction_log_file_map_mmap(file, map_start_offset, reason_r); |
1652 | 0 | else { |
1653 | 0 | mail_transaction_log_file_munmap(file); |
1654 | 0 | ret = mail_transaction_log_file_read(file, map_start_offset, FALSE, reason_r); |
1655 | 0 | } |
1656 | |
|
1657 | 0 | i_assert(file->buffer == NULL || file->mmap_base != NULL || |
1658 | 0 | file->sync_offset >= file->buffer_offset + file->buffer->used); |
1659 | 0 | if (ret <= 0) |
1660 | 0 | return ret; |
1661 | | |
1662 | 0 | i_assert(file->buffer != NULL); |
1663 | 0 | return log_file_map_check_offsets(file, start_offset, end_offset, |
1664 | 0 | reason_r) ? 1 : 0; |
1665 | 0 | } |
1666 | | |
1667 | | int mail_transaction_log_file_move_to_memory(struct mail_transaction_log_file *file) |
1668 | 0 | { |
1669 | 0 | const char *error; |
1670 | 0 | buffer_t *buf; |
1671 | 0 | int ret = 0; |
1672 | |
|
1673 | 0 | if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) |
1674 | 0 | return 0; |
1675 | | |
1676 | 0 | if (file->mmap_base != NULL) { |
1677 | | /* just copy to memory */ |
1678 | 0 | i_assert(file->buffer_offset == 0); |
1679 | | |
1680 | 0 | buf = buffer_create_dynamic(default_pool, file->mmap_size); |
1681 | 0 | buffer_append(buf, file->mmap_base, file->mmap_size); |
1682 | 0 | buffer_free(&file->buffer); |
1683 | 0 | file->buffer = buf; |
1684 | | |
1685 | | /* and lose the mmap */ |
1686 | 0 | if (munmap(file->mmap_base, file->mmap_size) < 0) |
1687 | 0 | log_file_set_syscall_error(file, "munmap()"); |
1688 | 0 | file->mmap_base = NULL; |
1689 | 0 | } else if (file->buffer_offset != 0) { |
1690 | | /* we don't have the full log in the memory. read it. */ |
1691 | 0 | ret = mail_transaction_log_file_read(file, 0, FALSE, &error); |
1692 | 0 | if (ret <= 0) { |
1693 | 0 | mail_index_set_error(file->log->index, |
1694 | 0 | "%s: Failed to read into memory: %s", file->filepath, error); |
1695 | 0 | } |
1696 | 0 | } |
1697 | 0 | file->last_size = 0; |
1698 | |
|
1699 | 0 | if (close(file->fd) < 0) |
1700 | 0 | log_file_set_syscall_error(file, "close()"); |
1701 | 0 | file->fd = -1; |
1702 | |
|
1703 | 0 | i_free(file->filepath); |
1704 | 0 | file->filepath = i_strdup(file->log->filepath); |
1705 | 0 | return ret < 0 ? -1 : 0; |
1706 | 0 | } |