/src/postgres/src/backend/storage/smgr/smgr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * smgr.c |
4 | | * public interface routines to storage manager switch. |
5 | | * |
6 | | * All file system operations on relations dispatch through these routines. |
7 | | * An SMgrRelation represents physical on-disk relation files that are open |
8 | | * for reading and writing. |
9 | | * |
10 | | * When a relation is first accessed through the relation cache, the |
11 | | * corresponding SMgrRelation entry is opened by calling smgropen(), and the |
12 | | * reference is stored in the relation cache entry. |
13 | | * |
14 | | * Accesses that don't go through the relation cache open the SMgrRelation |
15 | | * directly. That includes flushing buffers from the buffer cache, as well as |
16 | | * all accesses in auxiliary processes like the checkpointer or the WAL redo |
17 | | * in the startup process. |
18 | | * |
19 | | * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references |
20 | | * independent of the relation cache. They need to prepare the physical files |
21 | | * before updating the relation cache. |
22 | | * |
23 | | * There is a hash table that holds all the SMgrRelation entries in the |
24 | | * backend. If you call smgropen() twice for the same rel locator, you get a |
25 | | * reference to the same SMgrRelation. The reference is valid until the end of |
26 | | * transaction. This makes repeated access to the same relation efficient, |
27 | | * and allows caching things like the relation size in the SMgrRelation entry. |
28 | | * |
29 | | * At end of transaction, all SMgrRelation entries that haven't been pinned |
30 | | * are removed. An SMgrRelation can hold kernel file system descriptors for |
31 | | * the underlying files, and we'd like to close those reasonably soon if the |
32 | | * file gets deleted. The SMgrRelations references held by the relcache are |
33 | | * pinned to prevent them from being closed. |
34 | | * |
35 | | * There is another mechanism to close file descriptors early: |
36 | | * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all |
37 | | * file descriptors. Upon receiving that signal, the backend closes all file |
38 | | * descriptors held open by SMgrRelations, but because it can happen in the |
39 | | * middle of a transaction, we cannot destroy the SMgrRelation objects |
40 | | * themselves, as there could pointers to them in active use. See |
41 | | * smgrrelease() and smgrreleaseall(). |
42 | | * |
43 | | * NB: We need to hold interrupts across most of the functions in this file, |
44 | | * as otherwise interrupt processing, e.g. due to a < ERROR elog/ereport, can |
45 | | * trigger procsignal processing, which in turn can trigger |
46 | | * smgrreleaseall(). Most of the relevant code is not reentrant. It seems |
47 | | * better to put the HOLD_INTERRUPTS()/RESUME_INTERRUPTS() here, instead of |
48 | | * trying to push them down to md.c where possible: For one, every smgr |
49 | | * implementation would be vulnerable, for another, a good bit of smgr.c code |
50 | | * itself is affected too. Eventually we might want a more targeted solution, |
51 | | * allowing e.g. a networked smgr implementation to be interrupted, but many |
52 | | * other, more complicated, problems would need to be fixed for that to be |
53 | | * viable (e.g. smgr.c is often called with interrupts already held). |
54 | | * |
55 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
56 | | * Portions Copyright (c) 1994, Regents of the University of California |
57 | | * |
58 | | * |
59 | | * IDENTIFICATION |
60 | | * src/backend/storage/smgr/smgr.c |
61 | | * |
62 | | *------------------------------------------------------------------------- |
63 | | */ |
64 | | #include "postgres.h" |
65 | | |
66 | | #include "access/xlogutils.h" |
67 | | #include "lib/ilist.h" |
68 | | #include "miscadmin.h" |
69 | | #include "storage/aio.h" |
70 | | #include "storage/bufmgr.h" |
71 | | #include "storage/ipc.h" |
72 | | #include "storage/md.h" |
73 | | #include "storage/smgr.h" |
74 | | #include "utils/hsearch.h" |
75 | | #include "utils/inval.h" |
76 | | |
77 | | |
78 | | /* |
79 | | * This struct of function pointers defines the API between smgr.c and |
80 | | * any individual storage manager module. Note that smgr subfunctions are |
81 | | * generally expected to report problems via elog(ERROR). An exception is |
82 | | * that smgr_unlink should use elog(WARNING), rather than erroring out, |
83 | | * because we normally unlink relations during post-commit/abort cleanup, |
84 | | * and so it's too late to raise an error. Also, various conditions that |
85 | | * would normally be errors should be allowed during bootstrap and/or WAL |
86 | | * recovery --- see comments in md.c for details. |
87 | | */ |
88 | | typedef struct f_smgr |
89 | | { |
90 | | void (*smgr_init) (void); /* may be NULL */ |
91 | | void (*smgr_shutdown) (void); /* may be NULL */ |
92 | | void (*smgr_open) (SMgrRelation reln); |
93 | | void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); |
94 | | void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, |
95 | | bool isRedo); |
96 | | bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); |
97 | | void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, |
98 | | bool isRedo); |
99 | | void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, |
100 | | BlockNumber blocknum, const void *buffer, bool skipFsync); |
101 | | void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, |
102 | | BlockNumber blocknum, int nblocks, bool skipFsync); |
103 | | bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, |
104 | | BlockNumber blocknum, int nblocks); |
105 | | uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, |
106 | | BlockNumber blocknum); |
107 | | void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, |
108 | | BlockNumber blocknum, |
109 | | void **buffers, BlockNumber nblocks); |
110 | | void (*smgr_startreadv) (PgAioHandle *ioh, |
111 | | SMgrRelation reln, ForkNumber forknum, |
112 | | BlockNumber blocknum, |
113 | | void **buffers, BlockNumber nblocks); |
114 | | void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, |
115 | | BlockNumber blocknum, |
116 | | const void **buffers, BlockNumber nblocks, |
117 | | bool skipFsync); |
118 | | void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, |
119 | | BlockNumber blocknum, BlockNumber nblocks); |
120 | | BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); |
121 | | void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, |
122 | | BlockNumber old_blocks, BlockNumber nblocks); |
123 | | void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); |
124 | | void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); |
125 | | int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off); |
126 | | } f_smgr; |
127 | | |
128 | | static const f_smgr smgrsw[] = { |
129 | | /* magnetic disk */ |
130 | | { |
131 | | .smgr_init = mdinit, |
132 | | .smgr_shutdown = NULL, |
133 | | .smgr_open = mdopen, |
134 | | .smgr_close = mdclose, |
135 | | .smgr_create = mdcreate, |
136 | | .smgr_exists = mdexists, |
137 | | .smgr_unlink = mdunlink, |
138 | | .smgr_extend = mdextend, |
139 | | .smgr_zeroextend = mdzeroextend, |
140 | | .smgr_prefetch = mdprefetch, |
141 | | .smgr_maxcombine = mdmaxcombine, |
142 | | .smgr_readv = mdreadv, |
143 | | .smgr_startreadv = mdstartreadv, |
144 | | .smgr_writev = mdwritev, |
145 | | .smgr_writeback = mdwriteback, |
146 | | .smgr_nblocks = mdnblocks, |
147 | | .smgr_truncate = mdtruncate, |
148 | | .smgr_immedsync = mdimmedsync, |
149 | | .smgr_registersync = mdregistersync, |
150 | | .smgr_fd = mdfd, |
151 | | } |
152 | | }; |
153 | | |
154 | | static const int NSmgr = lengthof(smgrsw); |
155 | | |
156 | | /* |
157 | | * Each backend has a hashtable that stores all extant SMgrRelation objects. |
158 | | * In addition, "unpinned" SMgrRelation objects are chained together in a list. |
159 | | */ |
160 | | static HTAB *SMgrRelationHash = NULL; |
161 | | |
162 | | static dlist_head unpinned_relns; |
163 | | |
164 | | /* local function prototypes */ |
165 | | static void smgrshutdown(int code, Datum arg); |
166 | | static void smgrdestroy(SMgrRelation reln); |
167 | | |
168 | | static void smgr_aio_reopen(PgAioHandle *ioh); |
169 | | static char *smgr_aio_describe_identity(const PgAioTargetData *sd); |
170 | | |
171 | | |
172 | | const PgAioTargetInfo aio_smgr_target_info = { |
173 | | .name = "smgr", |
174 | | .reopen = smgr_aio_reopen, |
175 | | .describe_identity = smgr_aio_describe_identity, |
176 | | }; |
177 | | |
178 | | |
179 | | /* |
180 | | * smgrinit(), smgrshutdown() -- Initialize or shut down storage |
181 | | * managers. |
182 | | * |
183 | | * Note: smgrinit is called during backend startup (normal or standalone |
184 | | * case), *not* during postmaster start. Therefore, any resources created |
185 | | * here or destroyed in smgrshutdown are backend-local. |
186 | | */ |
187 | | void |
188 | | smgrinit(void) |
189 | 0 | { |
190 | 0 | int i; |
191 | |
|
192 | 0 | HOLD_INTERRUPTS(); |
193 | |
|
194 | 0 | for (i = 0; i < NSmgr; i++) |
195 | 0 | { |
196 | 0 | if (smgrsw[i].smgr_init) |
197 | 0 | smgrsw[i].smgr_init(); |
198 | 0 | } |
199 | |
|
200 | 0 | RESUME_INTERRUPTS(); |
201 | | |
202 | | /* register the shutdown proc */ |
203 | 0 | on_proc_exit(smgrshutdown, 0); |
204 | 0 | } |
205 | | |
206 | | /* |
207 | | * on_proc_exit hook for smgr cleanup during backend shutdown |
208 | | */ |
209 | | static void |
210 | | smgrshutdown(int code, Datum arg) |
211 | 0 | { |
212 | 0 | int i; |
213 | |
|
214 | 0 | HOLD_INTERRUPTS(); |
215 | |
|
216 | 0 | for (i = 0; i < NSmgr; i++) |
217 | 0 | { |
218 | 0 | if (smgrsw[i].smgr_shutdown) |
219 | 0 | smgrsw[i].smgr_shutdown(); |
220 | 0 | } |
221 | |
|
222 | 0 | RESUME_INTERRUPTS(); |
223 | 0 | } |
224 | | |
225 | | /* |
226 | | * smgropen() -- Return an SMgrRelation object, creating it if need be. |
227 | | * |
228 | | * In versions of PostgreSQL prior to 17, this function returned an object |
229 | | * with no defined lifetime. Now, however, the object remains valid for the |
230 | | * lifetime of the transaction, up to the point where AtEOXact_SMgr() is |
231 | | * called, making it much easier for callers to know for how long they can |
232 | | * hold on to a pointer to the returned object. If this function is called |
233 | | * outside of a transaction, the object remains valid until smgrdestroy() or |
234 | | * smgrdestroyall() is called. Background processes that use smgr but not |
235 | | * transactions typically do this once per checkpoint cycle. |
236 | | * |
237 | | * This does not attempt to actually open the underlying files. |
238 | | */ |
239 | | SMgrRelation |
240 | | smgropen(RelFileLocator rlocator, ProcNumber backend) |
241 | 0 | { |
242 | 0 | RelFileLocatorBackend brlocator; |
243 | 0 | SMgrRelation reln; |
244 | 0 | bool found; |
245 | |
|
246 | 0 | Assert(RelFileNumberIsValid(rlocator.relNumber)); |
247 | |
|
248 | 0 | HOLD_INTERRUPTS(); |
249 | |
|
250 | 0 | if (SMgrRelationHash == NULL) |
251 | 0 | { |
252 | | /* First time through: initialize the hash table */ |
253 | 0 | HASHCTL ctl; |
254 | |
|
255 | 0 | ctl.keysize = sizeof(RelFileLocatorBackend); |
256 | 0 | ctl.entrysize = sizeof(SMgrRelationData); |
257 | 0 | SMgrRelationHash = hash_create("smgr relation table", 400, |
258 | 0 | &ctl, HASH_ELEM | HASH_BLOBS); |
259 | 0 | dlist_init(&unpinned_relns); |
260 | 0 | } |
261 | | |
262 | | /* Look up or create an entry */ |
263 | 0 | brlocator.locator = rlocator; |
264 | 0 | brlocator.backend = backend; |
265 | 0 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
266 | 0 | &brlocator, |
267 | 0 | HASH_ENTER, &found); |
268 | | |
269 | | /* Initialize it if not present before */ |
270 | 0 | if (!found) |
271 | 0 | { |
272 | | /* hash_search already filled in the lookup key */ |
273 | 0 | reln->smgr_targblock = InvalidBlockNumber; |
274 | 0 | for (int i = 0; i <= MAX_FORKNUM; ++i) |
275 | 0 | reln->smgr_cached_nblocks[i] = InvalidBlockNumber; |
276 | 0 | reln->smgr_which = 0; /* we only have md.c at present */ |
277 | | |
278 | | /* it is not pinned yet */ |
279 | 0 | reln->pincount = 0; |
280 | 0 | dlist_push_tail(&unpinned_relns, &reln->node); |
281 | | |
282 | | /* implementation-specific initialization */ |
283 | 0 | smgrsw[reln->smgr_which].smgr_open(reln); |
284 | 0 | } |
285 | |
|
286 | 0 | RESUME_INTERRUPTS(); |
287 | |
|
288 | 0 | return reln; |
289 | 0 | } |
290 | | |
291 | | /* |
292 | | * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of |
293 | | * transaction |
294 | | */ |
295 | | void |
296 | | smgrpin(SMgrRelation reln) |
297 | 0 | { |
298 | 0 | if (reln->pincount == 0) |
299 | 0 | dlist_delete(&reln->node); |
300 | 0 | reln->pincount++; |
301 | 0 | } |
302 | | |
303 | | /* |
304 | | * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of |
305 | | * transaction |
306 | | * |
307 | | * The object remains valid, but if there are no other pins on it, it is moved |
308 | | * to the unpinned list where it will be destroyed by AtEOXact_SMgr(). |
309 | | */ |
310 | | void |
311 | | smgrunpin(SMgrRelation reln) |
312 | 0 | { |
313 | 0 | Assert(reln->pincount > 0); |
314 | 0 | reln->pincount--; |
315 | 0 | if (reln->pincount == 0) |
316 | 0 | dlist_push_tail(&unpinned_relns, &reln->node); |
317 | 0 | } |
318 | | |
319 | | /* |
320 | | * smgrdestroy() -- Delete an SMgrRelation object. |
321 | | */ |
322 | | static void |
323 | | smgrdestroy(SMgrRelation reln) |
324 | 0 | { |
325 | 0 | ForkNumber forknum; |
326 | |
|
327 | 0 | Assert(reln->pincount == 0); |
328 | |
|
329 | 0 | HOLD_INTERRUPTS(); |
330 | |
|
331 | 0 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
332 | 0 | smgrsw[reln->smgr_which].smgr_close(reln, forknum); |
333 | |
|
334 | 0 | dlist_delete(&reln->node); |
335 | |
|
336 | 0 | if (hash_search(SMgrRelationHash, |
337 | 0 | &(reln->smgr_rlocator), |
338 | 0 | HASH_REMOVE, NULL) == NULL) |
339 | 0 | elog(ERROR, "SMgrRelation hashtable corrupted"); |
340 | | |
341 | 0 | RESUME_INTERRUPTS(); |
342 | 0 | } |
343 | | |
344 | | /* |
345 | | * smgrrelease() -- Release all resources used by this object. |
346 | | * |
347 | | * The object remains valid. |
348 | | */ |
349 | | void |
350 | | smgrrelease(SMgrRelation reln) |
351 | 0 | { |
352 | 0 | HOLD_INTERRUPTS(); |
353 | |
|
354 | 0 | for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
355 | 0 | { |
356 | 0 | smgrsw[reln->smgr_which].smgr_close(reln, forknum); |
357 | 0 | reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; |
358 | 0 | } |
359 | 0 | reln->smgr_targblock = InvalidBlockNumber; |
360 | |
|
361 | 0 | RESUME_INTERRUPTS(); |
362 | 0 | } |
363 | | |
364 | | /* |
365 | | * smgrclose() -- Close an SMgrRelation object. |
366 | | * |
367 | | * The SMgrRelation reference should not be used after this call. However, |
368 | | * because we don't keep track of the references returned by smgropen(), we |
369 | | * don't know if there are other references still pointing to the same object, |
370 | | * so we cannot remove the SMgrRelation object yet. Therefore, this is just a |
371 | | * synonym for smgrrelease() at the moment. |
372 | | */ |
373 | | void |
374 | | smgrclose(SMgrRelation reln) |
375 | 0 | { |
376 | 0 | smgrrelease(reln); |
377 | 0 | } |
378 | | |
379 | | /* |
380 | | * smgrdestroyall() -- Release resources used by all unpinned objects. |
381 | | * |
382 | | * It must be known that there are no pointers to SMgrRelations, other than |
383 | | * those pinned with smgrpin(). |
384 | | */ |
385 | | void |
386 | | smgrdestroyall(void) |
387 | 0 | { |
388 | 0 | dlist_mutable_iter iter; |
389 | | |
390 | | /* seems unsafe to accept interrupts while in a dlist_foreach_modify() */ |
391 | 0 | HOLD_INTERRUPTS(); |
392 | | |
393 | | /* |
394 | | * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove |
395 | | * each one from the list. |
396 | | */ |
397 | 0 | dlist_foreach_modify(iter, &unpinned_relns) |
398 | 0 | { |
399 | 0 | SMgrRelation rel = dlist_container(SMgrRelationData, node, |
400 | 0 | iter.cur); |
401 | |
|
402 | 0 | smgrdestroy(rel); |
403 | 0 | } |
404 | |
|
405 | 0 | RESUME_INTERRUPTS(); |
406 | 0 | } |
407 | | |
408 | | /* |
409 | | * smgrreleaseall() -- Release resources used by all objects. |
410 | | */ |
411 | | void |
412 | | smgrreleaseall(void) |
413 | 0 | { |
414 | 0 | HASH_SEQ_STATUS status; |
415 | 0 | SMgrRelation reln; |
416 | | |
417 | | /* Nothing to do if hashtable not set up */ |
418 | 0 | if (SMgrRelationHash == NULL) |
419 | 0 | return; |
420 | | |
421 | | /* seems unsafe to accept interrupts while iterating */ |
422 | 0 | HOLD_INTERRUPTS(); |
423 | |
|
424 | 0 | hash_seq_init(&status, SMgrRelationHash); |
425 | |
|
426 | 0 | while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) |
427 | 0 | { |
428 | 0 | smgrrelease(reln); |
429 | 0 | } |
430 | |
|
431 | 0 | RESUME_INTERRUPTS(); |
432 | 0 | } |
433 | | |
434 | | /* |
435 | | * smgrreleaserellocator() -- Release resources for given RelFileLocator, if |
436 | | * it's open. |
437 | | * |
438 | | * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids |
439 | | * uselessly creating a hashtable entry only to drop it again when no |
440 | | * such entry exists already. |
441 | | */ |
442 | | void |
443 | | smgrreleaserellocator(RelFileLocatorBackend rlocator) |
444 | 0 | { |
445 | 0 | SMgrRelation reln; |
446 | | |
447 | | /* Nothing to do if hashtable not set up */ |
448 | 0 | if (SMgrRelationHash == NULL) |
449 | 0 | return; |
450 | | |
451 | 0 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
452 | 0 | &rlocator, |
453 | 0 | HASH_FIND, NULL); |
454 | 0 | if (reln != NULL) |
455 | 0 | smgrrelease(reln); |
456 | 0 | } |
457 | | |
458 | | /* |
459 | | * smgrexists() -- Does the underlying file for a fork exist? |
460 | | */ |
461 | | bool |
462 | | smgrexists(SMgrRelation reln, ForkNumber forknum) |
463 | 0 | { |
464 | 0 | bool ret; |
465 | |
|
466 | 0 | HOLD_INTERRUPTS(); |
467 | 0 | ret = smgrsw[reln->smgr_which].smgr_exists(reln, forknum); |
468 | 0 | RESUME_INTERRUPTS(); |
469 | |
|
470 | 0 | return ret; |
471 | 0 | } |
472 | | |
473 | | /* |
474 | | * smgrcreate() -- Create a new relation. |
475 | | * |
476 | | * Given an already-created (but presumably unused) SMgrRelation, |
477 | | * cause the underlying disk file or other storage for the fork |
478 | | * to be created. |
479 | | */ |
480 | | void |
481 | | smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) |
482 | 0 | { |
483 | 0 | HOLD_INTERRUPTS(); |
484 | 0 | smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); |
485 | 0 | RESUME_INTERRUPTS(); |
486 | 0 | } |
487 | | |
488 | | /* |
489 | | * smgrdosyncall() -- Immediately sync all forks of all given relations |
490 | | * |
491 | | * All forks of all given relations are synced out to the store. |
492 | | * |
493 | | * This is equivalent to FlushRelationBuffers() for each smgr relation, |
494 | | * then calling smgrimmedsync() for all forks of each relation, but it's |
495 | | * significantly quicker so should be preferred when possible. |
496 | | */ |
497 | | void |
498 | | smgrdosyncall(SMgrRelation *rels, int nrels) |
499 | 0 | { |
500 | 0 | int i = 0; |
501 | 0 | ForkNumber forknum; |
502 | |
|
503 | 0 | if (nrels == 0) |
504 | 0 | return; |
505 | | |
506 | 0 | FlushRelationsAllBuffers(rels, nrels); |
507 | |
|
508 | 0 | HOLD_INTERRUPTS(); |
509 | | |
510 | | /* |
511 | | * Sync the physical file(s). |
512 | | */ |
513 | 0 | for (i = 0; i < nrels; i++) |
514 | 0 | { |
515 | 0 | int which = rels[i]->smgr_which; |
516 | |
|
517 | 0 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
518 | 0 | { |
519 | 0 | if (smgrsw[which].smgr_exists(rels[i], forknum)) |
520 | 0 | smgrsw[which].smgr_immedsync(rels[i], forknum); |
521 | 0 | } |
522 | 0 | } |
523 | |
|
524 | 0 | RESUME_INTERRUPTS(); |
525 | 0 | } |
526 | | |
527 | | /* |
528 | | * smgrdounlinkall() -- Immediately unlink all forks of all given relations |
529 | | * |
530 | | * All forks of all given relations are removed from the store. This |
531 | | * should not be used during transactional operations, since it can't be |
532 | | * undone. |
533 | | * |
534 | | * If isRedo is true, it is okay for the underlying file(s) to be gone |
535 | | * already. |
536 | | */ |
537 | | void |
538 | | smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) |
539 | 0 | { |
540 | 0 | int i = 0; |
541 | 0 | RelFileLocatorBackend *rlocators; |
542 | 0 | ForkNumber forknum; |
543 | |
|
544 | 0 | if (nrels == 0) |
545 | 0 | return; |
546 | | |
547 | | /* |
548 | | * It would be unsafe to process interrupts between DropRelationBuffers() |
549 | | * and unlinking the underlying files. This probably should be a critical |
550 | | * section, but we're not there yet. |
551 | | */ |
552 | 0 | HOLD_INTERRUPTS(); |
553 | | |
554 | | /* |
555 | | * Get rid of any remaining buffers for the relations. bufmgr will just |
556 | | * drop them without bothering to write the contents. |
557 | | */ |
558 | 0 | DropRelationsAllBuffers(rels, nrels); |
559 | | |
560 | | /* |
561 | | * create an array which contains all relations to be dropped, and close |
562 | | * each relation's forks at the smgr level while at it |
563 | | */ |
564 | 0 | rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels); |
565 | 0 | for (i = 0; i < nrels; i++) |
566 | 0 | { |
567 | 0 | RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator; |
568 | 0 | int which = rels[i]->smgr_which; |
569 | |
|
570 | 0 | rlocators[i] = rlocator; |
571 | | |
572 | | /* Close the forks at smgr level */ |
573 | 0 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
574 | 0 | smgrsw[which].smgr_close(rels[i], forknum); |
575 | 0 | } |
576 | | |
577 | | /* |
578 | | * Send a shared-inval message to force other backends to close any |
579 | | * dangling smgr references they may have for these rels. We should do |
580 | | * this before starting the actual unlinking, in case we fail partway |
581 | | * through that step. Note that the sinval messages will eventually come |
582 | | * back to this backend, too, and thereby provide a backstop that we |
583 | | * closed our own smgr rel. |
584 | | */ |
585 | 0 | for (i = 0; i < nrels; i++) |
586 | 0 | CacheInvalidateSmgr(rlocators[i]); |
587 | | |
588 | | /* |
589 | | * Delete the physical file(s). |
590 | | * |
591 | | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
592 | | * ERROR, because we've already decided to commit or abort the current |
593 | | * xact. |
594 | | */ |
595 | |
|
596 | 0 | for (i = 0; i < nrels; i++) |
597 | 0 | { |
598 | 0 | int which = rels[i]->smgr_which; |
599 | |
|
600 | 0 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
601 | 0 | smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo); |
602 | 0 | } |
603 | |
|
604 | 0 | pfree(rlocators); |
605 | |
|
606 | 0 | RESUME_INTERRUPTS(); |
607 | 0 | } |
608 | | |
609 | | |
610 | | /* |
611 | | * smgrextend() -- Add a new block to a file. |
612 | | * |
613 | | * The semantics are nearly the same as smgrwrite(): write at the |
614 | | * specified position. However, this is to be used for the case of |
615 | | * extending a relation (i.e., blocknum is at or beyond the current |
616 | | * EOF). Note that we assume writing a block beyond current EOF |
617 | | * causes intervening file space to become filled with zeroes. |
618 | | */ |
619 | | void |
620 | | smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
621 | | const void *buffer, bool skipFsync) |
622 | 0 | { |
623 | 0 | HOLD_INTERRUPTS(); |
624 | |
|
625 | 0 | smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, |
626 | 0 | buffer, skipFsync); |
627 | | |
628 | | /* |
629 | | * Normally we expect this to increase nblocks by one, but if the cached |
630 | | * value isn't as expected, just invalidate it so the next call asks the |
631 | | * kernel. |
632 | | */ |
633 | 0 | if (reln->smgr_cached_nblocks[forknum] == blocknum) |
634 | 0 | reln->smgr_cached_nblocks[forknum] = blocknum + 1; |
635 | 0 | else |
636 | 0 | reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; |
637 | |
|
638 | 0 | RESUME_INTERRUPTS(); |
639 | 0 | } |
640 | | |
641 | | /* |
642 | | * smgrzeroextend() -- Add new zeroed out blocks to a file. |
643 | | * |
644 | | * Similar to smgrextend(), except the relation can be extended by |
645 | | * multiple blocks at once and the added blocks will be filled with |
646 | | * zeroes. |
647 | | */ |
648 | | void |
649 | | smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
650 | | int nblocks, bool skipFsync) |
651 | 0 | { |
652 | 0 | HOLD_INTERRUPTS(); |
653 | |
|
654 | 0 | smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum, |
655 | 0 | nblocks, skipFsync); |
656 | | |
657 | | /* |
658 | | * Normally we expect this to increase the fork size by nblocks, but if |
659 | | * the cached value isn't as expected, just invalidate it so the next call |
660 | | * asks the kernel. |
661 | | */ |
662 | 0 | if (reln->smgr_cached_nblocks[forknum] == blocknum) |
663 | 0 | reln->smgr_cached_nblocks[forknum] = blocknum + nblocks; |
664 | 0 | else |
665 | 0 | reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; |
666 | |
|
667 | 0 | RESUME_INTERRUPTS(); |
668 | 0 | } |
669 | | |
670 | | /* |
671 | | * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. |
672 | | * |
673 | | * In recovery only, this can return false to indicate that a file |
674 | | * doesn't exist (presumably it has been dropped by a later WAL |
675 | | * record). |
676 | | */ |
677 | | bool |
678 | | smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
679 | | int nblocks) |
680 | 0 | { |
681 | 0 | bool ret; |
682 | |
|
683 | 0 | HOLD_INTERRUPTS(); |
684 | 0 | ret = smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks); |
685 | 0 | RESUME_INTERRUPTS(); |
686 | |
|
687 | 0 | return ret; |
688 | 0 | } |
689 | | |
690 | | /* |
691 | | * smgrmaxcombine() - Return the maximum number of total blocks that can be |
692 | | * combined with an IO starting at blocknum. |
693 | | * |
694 | | * The returned value includes the IO for blocknum itself. |
695 | | */ |
696 | | uint32 |
697 | | smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, |
698 | | BlockNumber blocknum) |
699 | 0 | { |
700 | 0 | uint32 ret; |
701 | |
|
702 | 0 | HOLD_INTERRUPTS(); |
703 | 0 | ret = smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum); |
704 | 0 | RESUME_INTERRUPTS(); |
705 | |
|
706 | 0 | return ret; |
707 | 0 | } |
708 | | |
709 | | /* |
710 | | * smgrreadv() -- read a particular block range from a relation into the |
711 | | * supplied buffers. |
712 | | * |
713 | | * This routine is called from the buffer manager in order to |
714 | | * instantiate pages in the shared buffer cache. All storage managers |
715 | | * return pages in the format that POSTGRES expects. |
716 | | * |
717 | | * If more than one block is intended to be read, callers need to use |
718 | | * smgrmaxcombine() to check how many blocks can be combined into one IO. |
719 | | */ |
720 | | void |
721 | | smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
722 | | void **buffers, BlockNumber nblocks) |
723 | 0 | { |
724 | 0 | HOLD_INTERRUPTS(); |
725 | 0 | smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers, |
726 | 0 | nblocks); |
727 | 0 | RESUME_INTERRUPTS(); |
728 | 0 | } |
729 | | |
730 | | /* |
731 | | * smgrstartreadv() -- asynchronous version of smgrreadv() |
732 | | * |
733 | | * This starts an asynchronous readv IO using the IO handle `ioh`. Other than |
734 | | * `ioh` all parameters are the same as smgrreadv(). |
735 | | * |
736 | | * Completion callbacks above smgr will be passed the result as the number of |
737 | | * successfully read blocks if the read [partially] succeeds (Buffers for |
738 | | * blocks not successfully read might bear unspecified modifications, up to |
739 | | * the full nblocks). This maintains the abstraction that smgr operates on the |
740 | | * level of blocks, rather than bytes. |
741 | | * |
742 | | * Compared to smgrreadv(), more responsibilities fall on the caller: |
743 | | * - Partial reads need to be handled by the caller re-issuing IO for the |
744 | | * unread blocks |
745 | | * - smgr will ereport(LOG_SERVER_ONLY) some problems, but higher layers are |
746 | | * responsible for pgaio_result_report() to mirror that news to the user (if |
747 | | * the IO results in PGAIO_RS_WARNING) or abort the (sub)transaction (if |
748 | | * PGAIO_RS_ERROR). |
749 | | * - Under Valgrind, the "buffers" memory may or may not change status to |
750 | | * DEFINED, depending on io_method and concurrent activity. |
751 | | */ |
752 | | void |
753 | | smgrstartreadv(PgAioHandle *ioh, |
754 | | SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
755 | | void **buffers, BlockNumber nblocks) |
756 | 0 | { |
757 | 0 | HOLD_INTERRUPTS(); |
758 | 0 | smgrsw[reln->smgr_which].smgr_startreadv(ioh, |
759 | 0 | reln, forknum, blocknum, buffers, |
760 | 0 | nblocks); |
761 | 0 | RESUME_INTERRUPTS(); |
762 | 0 | } |
763 | | |
764 | | /* |
765 | | * smgrwritev() -- Write the supplied buffers out. |
766 | | * |
767 | | * This is to be used only for updating already-existing blocks of a |
768 | | * relation (ie, those before the current EOF). To extend a relation, |
769 | | * use smgrextend(). |
770 | | * |
771 | | * This is not a synchronous write -- the block is not necessarily |
772 | | * on disk at return, only dumped out to the kernel. However, |
773 | | * provisions will be made to fsync the write before the next checkpoint. |
774 | | * |
775 | | * NB: The mechanism to ensure fsync at next checkpoint assumes that there is |
776 | | * something that prevents a concurrent checkpoint from "racing ahead" of the |
777 | | * write. One way to prevent that is by holding a lock on the buffer; the |
778 | | * buffer manager's writes are protected by that. The bulk writer facility |
779 | | * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a |
780 | | * checkpoint happened; that relies on the fact that no other backend can be |
781 | | * concurrently modifying the page. |
782 | | * |
783 | | * skipFsync indicates that the caller will make other provisions to |
784 | | * fsync the relation, so we needn't bother. Temporary relations also |
785 | | * do not require fsync. |
786 | | * |
787 | | * If more than one block is intended to be read, callers need to use |
788 | | * smgrmaxcombine() to check how many blocks can be combined into one IO. |
789 | | */ |
790 | | void |
791 | | smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
792 | | const void **buffers, BlockNumber nblocks, bool skipFsync) |
793 | 0 | { |
794 | 0 | HOLD_INTERRUPTS(); |
795 | 0 | smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum, |
796 | 0 | buffers, nblocks, skipFsync); |
797 | 0 | RESUME_INTERRUPTS(); |
798 | 0 | } |
799 | | |
800 | | /* |
801 | | * smgrwriteback() -- Trigger kernel writeback for the supplied range of |
802 | | * blocks. |
803 | | */ |
804 | | void |
805 | | smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
806 | | BlockNumber nblocks) |
807 | 0 | { |
808 | 0 | HOLD_INTERRUPTS(); |
809 | 0 | smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, |
810 | 0 | nblocks); |
811 | 0 | RESUME_INTERRUPTS(); |
812 | 0 | } |
813 | | |
814 | | /* |
815 | | * smgrnblocks() -- Calculate the number of blocks in the |
816 | | * supplied relation. |
817 | | */ |
818 | | BlockNumber |
819 | | smgrnblocks(SMgrRelation reln, ForkNumber forknum) |
820 | 0 | { |
821 | 0 | BlockNumber result; |
822 | | |
823 | | /* Check and return if we get the cached value for the number of blocks. */ |
824 | 0 | result = smgrnblocks_cached(reln, forknum); |
825 | 0 | if (result != InvalidBlockNumber) |
826 | 0 | return result; |
827 | | |
828 | 0 | HOLD_INTERRUPTS(); |
829 | |
|
830 | 0 | result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); |
831 | |
|
832 | 0 | reln->smgr_cached_nblocks[forknum] = result; |
833 | |
|
834 | 0 | RESUME_INTERRUPTS(); |
835 | |
|
836 | 0 | return result; |
837 | 0 | } |
838 | | |
839 | | /* |
840 | | * smgrnblocks_cached() -- Get the cached number of blocks in the supplied |
841 | | * relation. |
842 | | * |
843 | | * Returns an InvalidBlockNumber when not in recovery and when the relation |
844 | | * fork size is not cached. |
845 | | */ |
846 | | BlockNumber |
847 | | smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum) |
848 | 0 | { |
849 | | /* |
850 | | * For now, this function uses cached values only in recovery due to lack |
851 | | * of a shared invalidation mechanism for changes in file size. Code |
852 | | * elsewhere reads smgr_cached_nblocks and copes with stale data. |
853 | | */ |
854 | 0 | if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber) |
855 | 0 | return reln->smgr_cached_nblocks[forknum]; |
856 | | |
857 | 0 | return InvalidBlockNumber; |
858 | 0 | } |
859 | | |
860 | | /* |
861 | | * smgrtruncate() -- Truncate the given forks of supplied relation to |
862 | | * each specified numbers of blocks |
863 | | * |
864 | | * The truncation is done immediately, so this can't be rolled back. |
865 | | * |
866 | | * The caller must hold AccessExclusiveLock on the relation, to ensure that |
867 | | * other backends receive the smgr invalidation event that this function sends |
868 | | * before they access any forks of the relation again. The current size of |
869 | | * the forks should be provided in old_nblocks. This function should normally |
870 | | * be called in a critical section, but the current size must be checked |
871 | | * outside the critical section, and no interrupts or smgr functions relating |
872 | | * to this relation should be called in between. |
873 | | */ |
874 | | void |
875 | | smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, |
876 | | BlockNumber *old_nblocks, BlockNumber *nblocks) |
877 | 0 | { |
878 | 0 | int i; |
879 | | |
880 | | /* |
881 | | * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will |
882 | | * just drop them without bothering to write the contents. |
883 | | */ |
884 | 0 | DropRelationBuffers(reln, forknum, nforks, nblocks); |
885 | | |
886 | | /* |
887 | | * Send a shared-inval message to force other backends to close any smgr |
888 | | * references they may have for this rel. This is useful because they |
889 | | * might have open file pointers to segments that got removed, and/or |
890 | | * smgr_targblock variables pointing past the new rel end. (The inval |
891 | | * message will come back to our backend, too, causing a |
892 | | * probably-unnecessary local smgr flush. But we don't expect that this |
893 | | * is a performance-critical path.) As in the unlink code, we want to be |
894 | | * sure the message is sent before we start changing things on-disk. |
895 | | */ |
896 | 0 | CacheInvalidateSmgr(reln->smgr_rlocator); |
897 | | |
898 | | /* Do the truncation */ |
899 | 0 | for (i = 0; i < nforks; i++) |
900 | 0 | { |
901 | | /* Make the cached size is invalid if we encounter an error. */ |
902 | 0 | reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; |
903 | |
|
904 | 0 | smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], |
905 | 0 | old_nblocks[i], nblocks[i]); |
906 | | |
907 | | /* |
908 | | * We might as well update the local smgr_cached_nblocks values. The |
909 | | * smgr cache inval message that this function sent will cause other |
910 | | * backends to invalidate their copies of smgr_cached_nblocks, and |
911 | | * these ones too at the next command boundary. But ensure they aren't |
912 | | * outright wrong until then. |
913 | | */ |
914 | 0 | reln->smgr_cached_nblocks[forknum[i]] = nblocks[i]; |
915 | 0 | } |
916 | 0 | } |
917 | | |
918 | | /* |
919 | | * smgrregistersync() -- Request a relation to be sync'd at next checkpoint |
920 | | * |
921 | | * This can be used after calling smgrwrite() or smgrextend() with skipFsync = |
922 | | * true, to register the fsyncs that were skipped earlier. |
923 | | * |
924 | | * Note: be mindful that a checkpoint could already have happened between the |
925 | | * smgrwrite or smgrextend calls and this! In that case, the checkpoint |
926 | | * already missed fsyncing this relation, and you should use smgrimmedsync |
927 | | * instead. Most callers should use the bulk loading facility in bulk_write.c |
928 | | * which handles all that. |
929 | | */ |
930 | | void |
931 | | smgrregistersync(SMgrRelation reln, ForkNumber forknum) |
932 | 0 | { |
933 | 0 | HOLD_INTERRUPTS(); |
934 | 0 | smgrsw[reln->smgr_which].smgr_registersync(reln, forknum); |
935 | 0 | RESUME_INTERRUPTS(); |
936 | 0 | } |
937 | | |
938 | | /* |
939 | | * smgrimmedsync() -- Force the specified relation to stable storage. |
940 | | * |
941 | | * Synchronously force all previous writes to the specified relation |
942 | | * down to disk. |
943 | | * |
944 | | * This is useful for building completely new relations (eg, new |
945 | | * indexes). Instead of incrementally WAL-logging the index build |
946 | | * steps, we can just write completed index pages to disk with smgrwrite |
947 | | * or smgrextend, and then fsync the completed index file before |
948 | | * committing the transaction. (This is sufficient for purposes of |
949 | | * crash recovery, since it effectively duplicates forcing a checkpoint |
950 | | * for the completed index. But it is *not* sufficient if one wishes |
951 | | * to use the WAL log for PITR or replication purposes: in that case |
952 | | * we have to make WAL entries as well.) |
953 | | * |
954 | | * The preceding writes should specify skipFsync = true to avoid |
955 | | * duplicative fsyncs. |
956 | | * |
957 | | * Note that you need to do FlushRelationBuffers() first if there is |
958 | | * any possibility that there are dirty buffers for the relation; |
959 | | * otherwise the sync is not very meaningful. |
960 | | * |
961 | | * Most callers should use the bulk loading facility in bulk_write.c |
962 | | * instead of calling this directly. |
963 | | */ |
964 | | void |
965 | | smgrimmedsync(SMgrRelation reln, ForkNumber forknum) |
966 | 0 | { |
967 | 0 | HOLD_INTERRUPTS(); |
968 | 0 | smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); |
969 | 0 | RESUME_INTERRUPTS(); |
970 | 0 | } |
971 | | |
972 | | /* |
973 | | * Return fd for the specified block number and update *off to the appropriate |
974 | | * position. |
975 | | * |
976 | | * This is only to be used for when AIO needs to perform the IO in a different |
977 | | * process than where it was issued (e.g. in an IO worker). |
978 | | */ |
979 | | static int |
980 | | smgrfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) |
981 | 0 | { |
982 | 0 | int fd; |
983 | | |
984 | | /* |
985 | | * The caller needs to prevent interrupts from being processed, otherwise |
986 | | * the FD could be closed prematurely. |
987 | | */ |
988 | 0 | Assert(!INTERRUPTS_CAN_BE_PROCESSED()); |
989 | |
|
990 | 0 | fd = smgrsw[reln->smgr_which].smgr_fd(reln, forknum, blocknum, off); |
991 | |
|
992 | 0 | return fd; |
993 | 0 | } |
994 | | |
995 | | /* |
996 | | * AtEOXact_SMgr |
997 | | * |
998 | | * This routine is called during transaction commit or abort (it doesn't |
999 | | * particularly care which). All unpinned SMgrRelation objects are destroyed. |
1000 | | * |
1001 | | * We do this as a compromise between wanting transient SMgrRelations to |
1002 | | * live awhile (to amortize the costs of blind writes of multiple blocks) |
1003 | | * and needing them to not live forever (since we're probably holding open |
1004 | | * a kernel file descriptor for the underlying file, and we need to ensure |
1005 | | * that gets closed reasonably soon if the file gets deleted). |
1006 | | */ |
1007 | | void |
1008 | | AtEOXact_SMgr(void) |
1009 | 0 | { |
1010 | 0 | smgrdestroyall(); |
1011 | 0 | } |
1012 | | |
1013 | | /* |
1014 | | * This routine is called when we are ordered to release all open files by a |
1015 | | * ProcSignalBarrier. |
1016 | | */ |
1017 | | bool |
1018 | | ProcessBarrierSmgrRelease(void) |
1019 | 0 | { |
1020 | 0 | smgrreleaseall(); |
1021 | 0 | return true; |
1022 | 0 | } |
1023 | | |
1024 | | /* |
1025 | | * Set target of the IO handle to be smgr and initialize all the relevant |
1026 | | * pieces of data. |
1027 | | */ |
1028 | | void |
1029 | | pgaio_io_set_target_smgr(PgAioHandle *ioh, |
1030 | | SMgrRelationData *smgr, |
1031 | | ForkNumber forknum, |
1032 | | BlockNumber blocknum, |
1033 | | int nblocks, |
1034 | | bool skip_fsync) |
1035 | 0 | { |
1036 | 0 | PgAioTargetData *sd = pgaio_io_get_target_data(ioh); |
1037 | |
|
1038 | 0 | pgaio_io_set_target(ioh, PGAIO_TID_SMGR); |
1039 | | |
1040 | | /* backend is implied via IO owner */ |
1041 | 0 | sd->smgr.rlocator = smgr->smgr_rlocator.locator; |
1042 | 0 | sd->smgr.forkNum = forknum; |
1043 | 0 | sd->smgr.blockNum = blocknum; |
1044 | 0 | sd->smgr.nblocks = nblocks; |
1045 | 0 | sd->smgr.is_temp = SmgrIsTemp(smgr); |
1046 | | /* Temp relations should never be fsync'd */ |
1047 | 0 | sd->smgr.skip_fsync = skip_fsync && !SmgrIsTemp(smgr); |
1048 | 0 | } |
1049 | | |
1050 | | /* |
1051 | | * Callback for the smgr AIO target, to reopen the file (e.g. because the IO |
1052 | | * is executed in a worker). |
1053 | | */ |
1054 | | static void |
1055 | | smgr_aio_reopen(PgAioHandle *ioh) |
1056 | 0 | { |
1057 | 0 | PgAioTargetData *sd = pgaio_io_get_target_data(ioh); |
1058 | 0 | PgAioOpData *od = pgaio_io_get_op_data(ioh); |
1059 | 0 | SMgrRelation reln; |
1060 | 0 | ProcNumber procno; |
1061 | 0 | uint32 off; |
1062 | | |
1063 | | /* |
1064 | | * The caller needs to prevent interrupts from being processed, otherwise |
1065 | | * the FD could be closed again before we get to executing the IO. |
1066 | | */ |
1067 | 0 | Assert(!INTERRUPTS_CAN_BE_PROCESSED()); |
1068 | |
|
1069 | 0 | if (sd->smgr.is_temp) |
1070 | 0 | procno = pgaio_io_get_owner(ioh); |
1071 | 0 | else |
1072 | 0 | procno = INVALID_PROC_NUMBER; |
1073 | |
|
1074 | 0 | reln = smgropen(sd->smgr.rlocator, procno); |
1075 | 0 | switch (pgaio_io_get_op(ioh)) |
1076 | 0 | { |
1077 | 0 | case PGAIO_OP_INVALID: |
1078 | 0 | pg_unreachable(); |
1079 | 0 | break; |
1080 | 0 | case PGAIO_OP_READV: |
1081 | 0 | od->read.fd = smgrfd(reln, sd->smgr.forkNum, sd->smgr.blockNum, &off); |
1082 | 0 | Assert(off == od->read.offset); |
1083 | 0 | break; |
1084 | 0 | case PGAIO_OP_WRITEV: |
1085 | 0 | od->write.fd = smgrfd(reln, sd->smgr.forkNum, sd->smgr.blockNum, &off); |
1086 | 0 | Assert(off == od->write.offset); |
1087 | 0 | break; |
1088 | 0 | } |
1089 | 0 | } |
1090 | | |
1091 | | /* |
1092 | | * Callback for the smgr AIO target, describing the target of the IO. |
1093 | | */ |
1094 | | static char * |
1095 | | smgr_aio_describe_identity(const PgAioTargetData *sd) |
1096 | 0 | { |
1097 | 0 | RelPathStr path; |
1098 | 0 | char *desc; |
1099 | |
|
1100 | 0 | path = relpathbackend(sd->smgr.rlocator, |
1101 | 0 | sd->smgr.is_temp ? |
1102 | 0 | MyProcNumber : INVALID_PROC_NUMBER, |
1103 | 0 | sd->smgr.forkNum); |
1104 | |
|
1105 | 0 | if (sd->smgr.nblocks == 0) |
1106 | 0 | desc = psprintf(_("file \"%s\""), path.str); |
1107 | 0 | else if (sd->smgr.nblocks == 1) |
1108 | 0 | desc = psprintf(_("block %u in file \"%s\""), |
1109 | 0 | sd->smgr.blockNum, |
1110 | 0 | path.str); |
1111 | 0 | else |
1112 | 0 | desc = psprintf(_("blocks %u..%u in file \"%s\""), |
1113 | 0 | sd->smgr.blockNum, |
1114 | 0 | sd->smgr.blockNum + sd->smgr.nblocks - 1, |
1115 | 0 | path.str); |
1116 | |
|
1117 | 0 | return desc; |
1118 | 0 | } |