/src/postgres/src/backend/storage/ipc/dsm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * dsm.c |
4 | | * manage dynamic shared memory segments |
5 | | * |
6 | | * This file provides a set of services to make programming with dynamic |
7 | | * shared memory segments more convenient. Unlike the low-level |
8 | | * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments |
9 | | * created using this module will be cleaned up automatically. Mappings |
10 | | * will be removed when the resource owner under which they were created |
11 | | * is cleaned up, unless dsm_pin_mapping() is used, in which case they |
12 | | * have session lifespan. Segments will be removed when there are no |
13 | | * remaining mappings, or at postmaster shutdown in any case. After a |
14 | | * hard postmaster crash, remaining segments will be removed, if they |
15 | | * still exist, at the next postmaster startup. |
16 | | * |
17 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
18 | | * Portions Copyright (c) 1994, Regents of the University of California |
19 | | * |
20 | | * |
21 | | * IDENTIFICATION |
22 | | * src/backend/storage/ipc/dsm.c |
23 | | * |
24 | | *------------------------------------------------------------------------- |
25 | | */ |
26 | | |
27 | | #include "postgres.h" |
28 | | |
29 | | #include <fcntl.h> |
30 | | #include <unistd.h> |
31 | | #ifndef WIN32 |
32 | | #include <sys/mman.h> |
33 | | #endif |
34 | | #include <sys/stat.h> |
35 | | |
36 | | #include "common/pg_prng.h" |
37 | | #include "lib/ilist.h" |
38 | | #include "miscadmin.h" |
39 | | #include "port/pg_bitutils.h" |
40 | | #include "storage/dsm.h" |
41 | | #include "storage/fd.h" |
42 | | #include "storage/ipc.h" |
43 | | #include "storage/lwlock.h" |
44 | | #include "storage/pg_shmem.h" |
45 | | #include "storage/shmem.h" |
46 | | #include "utils/freepage.h" |
47 | | #include "utils/memutils.h" |
48 | | #include "utils/resowner.h" |
49 | | |
50 | 0 | #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32 |
51 | | |
52 | 0 | #define PG_DYNSHMEM_FIXED_SLOTS 64 |
53 | 0 | #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5 |
54 | | |
55 | 0 | #define INVALID_CONTROL_SLOT ((uint32) -1) |
56 | | |
57 | | /* Backend-local tracking for on-detach callbacks. */ |
58 | | typedef struct dsm_segment_detach_callback |
59 | | { |
60 | | on_dsm_detach_callback function; |
61 | | Datum arg; |
62 | | slist_node node; |
63 | | } dsm_segment_detach_callback; |
64 | | |
65 | | /* Backend-local state for a dynamic shared memory segment. */ |
66 | | struct dsm_segment |
67 | | { |
68 | | dlist_node node; /* List link in dsm_segment_list. */ |
69 | | ResourceOwner resowner; /* Resource owner. */ |
70 | | dsm_handle handle; /* Segment name. */ |
71 | | uint32 control_slot; /* Slot in control segment. */ |
72 | | void *impl_private; /* Implementation-specific private data. */ |
73 | | void *mapped_address; /* Mapping address, or NULL if unmapped. */ |
74 | | Size mapped_size; /* Size of our mapping. */ |
75 | | slist_head on_detach; /* On-detach callbacks. */ |
76 | | }; |
77 | | |
78 | | /* Shared-memory state for a dynamic shared memory segment. */ |
79 | | typedef struct dsm_control_item |
80 | | { |
81 | | dsm_handle handle; |
82 | | uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */ |
83 | | size_t first_page; |
84 | | size_t npages; |
85 | | void *impl_private_pm_handle; /* only needed on Windows */ |
86 | | bool pinned; |
87 | | } dsm_control_item; |
88 | | |
89 | | /* Layout of the dynamic shared memory control segment. */ |
90 | | typedef struct dsm_control_header |
91 | | { |
92 | | uint32 magic; |
93 | | uint32 nitems; |
94 | | uint32 maxitems; |
95 | | dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]; |
96 | | } dsm_control_header; |
97 | | |
98 | | static void dsm_cleanup_for_mmap(void); |
99 | | static void dsm_postmaster_shutdown(int code, Datum arg); |
100 | | static dsm_segment *dsm_create_descriptor(void); |
101 | | static bool dsm_control_segment_sane(dsm_control_header *control, |
102 | | Size mapped_size); |
103 | | static uint64 dsm_control_bytes_needed(uint32 nitems); |
104 | | static inline dsm_handle make_main_region_dsm_handle(int slot); |
105 | | static inline bool is_main_region_dsm_handle(dsm_handle handle); |
106 | | |
107 | | /* Has this backend initialized the dynamic shared memory system yet? */ |
108 | | static bool dsm_init_done = false; |
109 | | |
110 | | /* Preallocated DSM space in the main shared memory region. */ |
111 | | static void *dsm_main_space_begin = NULL; |
112 | | |
113 | | /* |
114 | | * List of dynamic shared memory segments used by this backend. |
115 | | * |
116 | | * At process exit time, we must decrement the reference count of each |
117 | | * segment we have attached; this list makes it possible to find all such |
118 | | * segments. |
119 | | * |
120 | | * This list should always be empty in the postmaster. We could probably |
121 | | * allow the postmaster to map dynamic shared memory segments before it |
122 | | * begins to start child processes, provided that each process adjusted |
123 | | * the reference counts for those segments in the control segment at |
124 | | * startup time, but there's no obvious need for such a facility, which |
125 | | * would also be complex to handle in the EXEC_BACKEND case. Once the |
126 | | * postmaster has begun spawning children, there's an additional problem: |
127 | | * each new mapping would require an update to the control segment, |
128 | | * which requires locking, in which the postmaster must not be involved. |
129 | | */ |
130 | | static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list); |
131 | | |
132 | | /* |
133 | | * Control segment information. |
134 | | * |
135 | | * Unlike ordinary shared memory segments, the control segment is not |
136 | | * reference counted; instead, it lasts for the postmaster's entire |
137 | | * life cycle. For simplicity, it doesn't have a dsm_segment object either. |
138 | | */ |
139 | | static dsm_handle dsm_control_handle; |
140 | | static dsm_control_header *dsm_control; |
141 | | static Size dsm_control_mapped_size = 0; |
142 | | static void *dsm_control_impl_private = NULL; |
143 | | |
144 | | |
145 | | /* ResourceOwner callbacks to hold DSM segments */ |
146 | | static void ResOwnerReleaseDSM(Datum res); |
147 | | static char *ResOwnerPrintDSM(Datum res); |
148 | | |
149 | | static const ResourceOwnerDesc dsm_resowner_desc = |
150 | | { |
151 | | .name = "dynamic shared memory segment", |
152 | | .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS, |
153 | | .release_priority = RELEASE_PRIO_DSMS, |
154 | | .ReleaseResource = ResOwnerReleaseDSM, |
155 | | .DebugPrint = ResOwnerPrintDSM |
156 | | }; |
157 | | |
158 | | /* Convenience wrappers over ResourceOwnerRemember/Forget */ |
159 | | static inline void |
160 | | ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg) |
161 | 0 | { |
162 | 0 | ResourceOwnerRemember(owner, PointerGetDatum(seg), &dsm_resowner_desc); |
163 | 0 | } |
164 | | static inline void |
165 | | ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg) |
166 | 0 | { |
167 | 0 | ResourceOwnerForget(owner, PointerGetDatum(seg), &dsm_resowner_desc); |
168 | 0 | } |
169 | | |
170 | | /* |
171 | | * Start up the dynamic shared memory system. |
172 | | * |
173 | | * This is called just once during each cluster lifetime, at postmaster |
174 | | * startup time. |
175 | | */ |
176 | | void |
177 | | dsm_postmaster_startup(PGShmemHeader *shim) |
178 | 0 | { |
179 | 0 | void *dsm_control_address = NULL; |
180 | 0 | uint32 maxitems; |
181 | 0 | Size segsize; |
182 | |
|
183 | 0 | Assert(!IsUnderPostmaster); |
184 | | |
185 | | /* |
186 | | * If we're using the mmap implementations, clean up any leftovers. |
187 | | * Cleanup isn't needed on Windows, and happens earlier in startup for |
188 | | * POSIX and System V shared memory, via a direct call to |
189 | | * dsm_cleanup_using_control_segment. |
190 | | */ |
191 | 0 | if (dynamic_shared_memory_type == DSM_IMPL_MMAP) |
192 | 0 | dsm_cleanup_for_mmap(); |
193 | | |
194 | | /* Determine size for new control segment. */ |
195 | 0 | maxitems = PG_DYNSHMEM_FIXED_SLOTS |
196 | 0 | + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends; |
197 | 0 | elog(DEBUG2, "dynamic shared memory system will support %u segments", |
198 | 0 | maxitems); |
199 | 0 | segsize = dsm_control_bytes_needed(maxitems); |
200 | | |
201 | | /* |
202 | | * Loop until we find an unused identifier for the new control segment. We |
203 | | * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no |
204 | | * control segment", so avoid generating that value for a real handle. |
205 | | */ |
206 | 0 | for (;;) |
207 | 0 | { |
208 | 0 | Assert(dsm_control_address == NULL); |
209 | 0 | Assert(dsm_control_mapped_size == 0); |
210 | | /* Use even numbers only */ |
211 | 0 | dsm_control_handle = pg_prng_uint32(&pg_global_prng_state) << 1; |
212 | 0 | if (dsm_control_handle == DSM_HANDLE_INVALID) |
213 | 0 | continue; |
214 | 0 | if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize, |
215 | 0 | &dsm_control_impl_private, &dsm_control_address, |
216 | 0 | &dsm_control_mapped_size, ERROR)) |
217 | 0 | break; |
218 | 0 | } |
219 | 0 | dsm_control = dsm_control_address; |
220 | 0 | on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim)); |
221 | 0 | elog(DEBUG2, |
222 | 0 | "created dynamic shared memory control segment %u (%zu bytes)", |
223 | 0 | dsm_control_handle, segsize); |
224 | 0 | shim->dsm_control = dsm_control_handle; |
225 | | |
226 | | /* Initialize control segment. */ |
227 | 0 | dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC; |
228 | 0 | dsm_control->nitems = 0; |
229 | 0 | dsm_control->maxitems = maxitems; |
230 | 0 | } |
231 | | |
232 | | /* |
233 | | * Determine whether the control segment from the previous postmaster |
234 | | * invocation still exists. If so, remove the dynamic shared memory |
235 | | * segments to which it refers, and then the control segment itself. |
236 | | */ |
237 | | void |
238 | | dsm_cleanup_using_control_segment(dsm_handle old_control_handle) |
239 | 0 | { |
240 | 0 | void *mapped_address = NULL; |
241 | 0 | void *junk_mapped_address = NULL; |
242 | 0 | void *impl_private = NULL; |
243 | 0 | void *junk_impl_private = NULL; |
244 | 0 | Size mapped_size = 0; |
245 | 0 | Size junk_mapped_size = 0; |
246 | 0 | uint32 nitems; |
247 | 0 | uint32 i; |
248 | 0 | dsm_control_header *old_control; |
249 | | |
250 | | /* |
251 | | * Try to attach the segment. If this fails, it probably just means that |
252 | | * the operating system has been rebooted and the segment no longer |
253 | | * exists, or an unrelated process has used the same shm ID. So just fall |
254 | | * out quietly. |
255 | | */ |
256 | 0 | if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private, |
257 | 0 | &mapped_address, &mapped_size, DEBUG1)) |
258 | 0 | return; |
259 | | |
260 | | /* |
261 | | * We've managed to reattach it, but the contents might not be sane. If |
262 | | * they aren't, we disregard the segment after all. |
263 | | */ |
264 | 0 | old_control = (dsm_control_header *) mapped_address; |
265 | 0 | if (!dsm_control_segment_sane(old_control, mapped_size)) |
266 | 0 | { |
267 | 0 | dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private, |
268 | 0 | &mapped_address, &mapped_size, LOG); |
269 | 0 | return; |
270 | 0 | } |
271 | | |
272 | | /* |
273 | | * OK, the control segment looks basically valid, so we can use it to get |
274 | | * a list of segments that need to be removed. |
275 | | */ |
276 | 0 | nitems = old_control->nitems; |
277 | 0 | for (i = 0; i < nitems; ++i) |
278 | 0 | { |
279 | 0 | dsm_handle handle; |
280 | 0 | uint32 refcnt; |
281 | | |
282 | | /* If the reference count is 0, the slot is actually unused. */ |
283 | 0 | refcnt = old_control->item[i].refcnt; |
284 | 0 | if (refcnt == 0) |
285 | 0 | continue; |
286 | | |
287 | | /* If it was using the main shmem area, there is nothing to do. */ |
288 | 0 | handle = old_control->item[i].handle; |
289 | 0 | if (is_main_region_dsm_handle(handle)) |
290 | 0 | continue; |
291 | | |
292 | | /* Log debugging information. */ |
293 | 0 | elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)", |
294 | 0 | handle, refcnt); |
295 | | |
296 | | /* Destroy the referenced segment. */ |
297 | 0 | dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, |
298 | 0 | &junk_mapped_address, &junk_mapped_size, LOG); |
299 | 0 | } |
300 | | |
301 | | /* Destroy the old control segment, too. */ |
302 | 0 | elog(DEBUG2, |
303 | 0 | "cleaning up dynamic shared memory control segment with ID %u", |
304 | 0 | old_control_handle); |
305 | 0 | dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private, |
306 | 0 | &mapped_address, &mapped_size, LOG); |
307 | 0 | } |
308 | | |
309 | | /* |
310 | | * When we're using the mmap shared memory implementation, "shared memory" |
311 | | * segments might even manage to survive an operating system reboot. |
312 | | * But there's no guarantee as to exactly what will survive: some segments |
313 | | * may survive, and others may not, and the contents of some may be out |
314 | | * of date. In particular, the control segment may be out of date, so we |
315 | | * can't rely on it to figure out what to remove. However, since we know |
316 | | * what directory contains the files we used as shared memory, we can simply |
317 | | * scan the directory and blow everything away that shouldn't be there. |
318 | | */ |
319 | | static void |
320 | | dsm_cleanup_for_mmap(void) |
321 | 0 | { |
322 | 0 | DIR *dir; |
323 | 0 | struct dirent *dent; |
324 | | |
325 | | /* Scan the directory for something with a name of the correct format. */ |
326 | 0 | dir = AllocateDir(PG_DYNSHMEM_DIR); |
327 | |
|
328 | 0 | while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL) |
329 | 0 | { |
330 | 0 | if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX, |
331 | 0 | strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0) |
332 | 0 | { |
333 | 0 | char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)]; |
334 | |
|
335 | 0 | snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name); |
336 | |
|
337 | 0 | elog(DEBUG2, "removing file \"%s\"", buf); |
338 | | |
339 | | /* We found a matching file; so remove it. */ |
340 | 0 | if (unlink(buf) != 0) |
341 | 0 | ereport(ERROR, |
342 | 0 | (errcode_for_file_access(), |
343 | 0 | errmsg("could not remove file \"%s\": %m", buf))); |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | | /* Cleanup complete. */ |
348 | 0 | FreeDir(dir); |
349 | 0 | } |
350 | | |
351 | | /* |
352 | | * At shutdown time, we iterate over the control segment and remove all |
353 | | * remaining dynamic shared memory segments. We avoid throwing errors here; |
354 | | * the postmaster is shutting down either way, and this is just non-critical |
355 | | * resource cleanup. |
356 | | */ |
357 | | static void |
358 | | dsm_postmaster_shutdown(int code, Datum arg) |
359 | 0 | { |
360 | 0 | uint32 nitems; |
361 | 0 | uint32 i; |
362 | 0 | void *dsm_control_address; |
363 | 0 | void *junk_mapped_address = NULL; |
364 | 0 | void *junk_impl_private = NULL; |
365 | 0 | Size junk_mapped_size = 0; |
366 | 0 | PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg); |
367 | | |
368 | | /* |
369 | | * If some other backend exited uncleanly, it might have corrupted the |
370 | | * control segment while it was dying. In that case, we warn and ignore |
371 | | * the contents of the control segment. This may end up leaving behind |
372 | | * stray shared memory segments, but there's not much we can do about that |
373 | | * if the metadata is gone. |
374 | | */ |
375 | 0 | nitems = dsm_control->nitems; |
376 | 0 | if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) |
377 | 0 | { |
378 | 0 | ereport(LOG, |
379 | 0 | (errmsg("dynamic shared memory control segment is corrupt"))); |
380 | 0 | return; |
381 | 0 | } |
382 | | |
383 | | /* Remove any remaining segments. */ |
384 | 0 | for (i = 0; i < nitems; ++i) |
385 | 0 | { |
386 | 0 | dsm_handle handle; |
387 | | |
388 | | /* If the reference count is 0, the slot is actually unused. */ |
389 | 0 | if (dsm_control->item[i].refcnt == 0) |
390 | 0 | continue; |
391 | | |
392 | 0 | handle = dsm_control->item[i].handle; |
393 | 0 | if (is_main_region_dsm_handle(handle)) |
394 | 0 | continue; |
395 | | |
396 | | /* Log debugging information. */ |
397 | 0 | elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u", |
398 | 0 | handle); |
399 | | |
400 | | /* Destroy the segment. */ |
401 | 0 | dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, |
402 | 0 | &junk_mapped_address, &junk_mapped_size, LOG); |
403 | 0 | } |
404 | | |
405 | | /* Remove the control segment itself. */ |
406 | 0 | elog(DEBUG2, |
407 | 0 | "cleaning up dynamic shared memory control segment with ID %u", |
408 | 0 | dsm_control_handle); |
409 | 0 | dsm_control_address = dsm_control; |
410 | 0 | dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0, |
411 | 0 | &dsm_control_impl_private, &dsm_control_address, |
412 | 0 | &dsm_control_mapped_size, LOG); |
413 | 0 | dsm_control = dsm_control_address; |
414 | 0 | shim->dsm_control = 0; |
415 | 0 | } |
416 | | |
417 | | /* |
418 | | * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND, |
419 | | * we must reread the state file and map the control segment; in other cases, |
420 | | * we'll have inherited the postmaster's mapping and global variables. |
421 | | */ |
422 | | static void |
423 | | dsm_backend_startup(void) |
424 | 0 | { |
425 | | #ifdef EXEC_BACKEND |
426 | | if (IsUnderPostmaster) |
427 | | { |
428 | | void *control_address = NULL; |
429 | | |
430 | | /* Attach control segment. */ |
431 | | Assert(dsm_control_handle != 0); |
432 | | dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0, |
433 | | &dsm_control_impl_private, &control_address, |
434 | | &dsm_control_mapped_size, ERROR); |
435 | | dsm_control = control_address; |
436 | | /* If control segment doesn't look sane, something is badly wrong. */ |
437 | | if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) |
438 | | { |
439 | | dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, |
440 | | &dsm_control_impl_private, &control_address, |
441 | | &dsm_control_mapped_size, WARNING); |
442 | | ereport(FATAL, |
443 | | (errcode(ERRCODE_INTERNAL_ERROR), |
444 | | errmsg("dynamic shared memory control segment is not valid"))); |
445 | | } |
446 | | } |
447 | | #endif |
448 | |
|
449 | 0 | dsm_init_done = true; |
450 | 0 | } |
451 | | |
452 | | #ifdef EXEC_BACKEND |
453 | | /* |
454 | | * When running under EXEC_BACKEND, we get a callback here when the main |
455 | | * shared memory segment is re-attached, so that we can record the control |
456 | | * handle retrieved from it. |
457 | | */ |
458 | | void |
459 | | dsm_set_control_handle(dsm_handle h) |
460 | | { |
461 | | Assert(dsm_control_handle == 0 && h != 0); |
462 | | dsm_control_handle = h; |
463 | | } |
464 | | #endif |
465 | | |
466 | | /* |
467 | | * Reserve some space in the main shared memory segment for DSM segments. |
468 | | */ |
469 | | size_t |
470 | | dsm_estimate_size(void) |
471 | 0 | { |
472 | 0 | return 1024 * 1024 * (size_t) min_dynamic_shared_memory; |
473 | 0 | } |
474 | | |
475 | | /* |
476 | | * Initialize space in the main shared memory segment for DSM segments. |
477 | | */ |
478 | | void |
479 | | dsm_shmem_init(void) |
480 | 0 | { |
481 | 0 | size_t size = dsm_estimate_size(); |
482 | 0 | bool found; |
483 | |
|
484 | 0 | if (size == 0) |
485 | 0 | return; |
486 | | |
487 | 0 | dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found); |
488 | 0 | if (!found) |
489 | 0 | { |
490 | 0 | FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin; |
491 | 0 | size_t first_page = 0; |
492 | 0 | size_t pages; |
493 | | |
494 | | /* Reserve space for the FreePageManager. */ |
495 | 0 | while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager)) |
496 | 0 | ++first_page; |
497 | | |
498 | | /* Initialize it and give it all the rest of the space. */ |
499 | 0 | FreePageManagerInitialize(fpm, dsm_main_space_begin); |
500 | 0 | pages = (size / FPM_PAGE_SIZE) - first_page; |
501 | 0 | FreePageManagerPut(fpm, first_page, pages); |
502 | 0 | } |
503 | 0 | } |
504 | | |
505 | | /* |
506 | | * Create a new dynamic shared memory segment. |
507 | | * |
508 | | * If there is a non-NULL CurrentResourceOwner, the new segment is associated |
509 | | * with it and must be detached before the resource owner releases, or a |
510 | | * warning will be logged. If CurrentResourceOwner is NULL, the segment |
511 | | * remains attached until explicitly detached or the session ends. |
512 | | * Creating with a NULL CurrentResourceOwner is equivalent to creating |
513 | | * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping. |
514 | | */ |
515 | | dsm_segment * |
516 | | dsm_create(Size size, int flags) |
517 | 0 | { |
518 | 0 | dsm_segment *seg; |
519 | 0 | uint32 i; |
520 | 0 | uint32 nitems; |
521 | 0 | size_t npages = 0; |
522 | 0 | size_t first_page = 0; |
523 | 0 | FreePageManager *dsm_main_space_fpm = dsm_main_space_begin; |
524 | 0 | bool using_main_dsm_region = false; |
525 | | |
526 | | /* |
527 | | * Unsafe in postmaster. It might seem pointless to allow use of dsm in |
528 | | * single user mode, but otherwise some subsystems will need dedicated |
529 | | * single user mode code paths. |
530 | | */ |
531 | 0 | Assert(IsUnderPostmaster || !IsPostmasterEnvironment); |
532 | |
|
533 | 0 | if (!dsm_init_done) |
534 | 0 | dsm_backend_startup(); |
535 | | |
536 | | /* Create a new segment descriptor. */ |
537 | 0 | seg = dsm_create_descriptor(); |
538 | | |
539 | | /* |
540 | | * Lock the control segment while we try to allocate from the main shared |
541 | | * memory area, if configured. |
542 | | */ |
543 | 0 | if (dsm_main_space_fpm) |
544 | 0 | { |
545 | 0 | npages = size / FPM_PAGE_SIZE; |
546 | 0 | if (size % FPM_PAGE_SIZE > 0) |
547 | 0 | ++npages; |
548 | |
|
549 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
550 | 0 | if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page)) |
551 | 0 | { |
552 | | /* We can carve out a piece of the main shared memory segment. */ |
553 | 0 | seg->mapped_address = (char *) dsm_main_space_begin + |
554 | 0 | first_page * FPM_PAGE_SIZE; |
555 | 0 | seg->mapped_size = npages * FPM_PAGE_SIZE; |
556 | 0 | using_main_dsm_region = true; |
557 | | /* We'll choose a handle below. */ |
558 | 0 | } |
559 | 0 | } |
560 | |
|
561 | 0 | if (!using_main_dsm_region) |
562 | 0 | { |
563 | | /* |
564 | | * We need to create a new memory segment. Loop until we find an |
565 | | * unused segment identifier. |
566 | | */ |
567 | 0 | if (dsm_main_space_fpm) |
568 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
569 | 0 | for (;;) |
570 | 0 | { |
571 | 0 | Assert(seg->mapped_address == NULL && seg->mapped_size == 0); |
572 | | /* Use even numbers only */ |
573 | 0 | seg->handle = pg_prng_uint32(&pg_global_prng_state) << 1; |
574 | 0 | if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */ |
575 | 0 | continue; |
576 | 0 | if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private, |
577 | 0 | &seg->mapped_address, &seg->mapped_size, ERROR)) |
578 | 0 | break; |
579 | 0 | } |
580 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
581 | 0 | } |
582 | | |
583 | | /* Search the control segment for an unused slot. */ |
584 | 0 | nitems = dsm_control->nitems; |
585 | 0 | for (i = 0; i < nitems; ++i) |
586 | 0 | { |
587 | 0 | if (dsm_control->item[i].refcnt == 0) |
588 | 0 | { |
589 | 0 | if (using_main_dsm_region) |
590 | 0 | { |
591 | 0 | seg->handle = make_main_region_dsm_handle(i); |
592 | 0 | dsm_control->item[i].first_page = first_page; |
593 | 0 | dsm_control->item[i].npages = npages; |
594 | 0 | } |
595 | 0 | else |
596 | 0 | Assert(!is_main_region_dsm_handle(seg->handle)); |
597 | 0 | dsm_control->item[i].handle = seg->handle; |
598 | | /* refcnt of 1 triggers destruction, so start at 2 */ |
599 | 0 | dsm_control->item[i].refcnt = 2; |
600 | 0 | dsm_control->item[i].impl_private_pm_handle = NULL; |
601 | 0 | dsm_control->item[i].pinned = false; |
602 | 0 | seg->control_slot = i; |
603 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
604 | 0 | return seg; |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | /* Verify that we can support an additional mapping. */ |
609 | 0 | if (nitems >= dsm_control->maxitems) |
610 | 0 | { |
611 | 0 | if (using_main_dsm_region) |
612 | 0 | FreePageManagerPut(dsm_main_space_fpm, first_page, npages); |
613 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
614 | 0 | if (!using_main_dsm_region) |
615 | 0 | dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, |
616 | 0 | &seg->mapped_address, &seg->mapped_size, WARNING); |
617 | 0 | if (seg->resowner != NULL) |
618 | 0 | ResourceOwnerForgetDSM(seg->resowner, seg); |
619 | 0 | dlist_delete(&seg->node); |
620 | 0 | pfree(seg); |
621 | |
|
622 | 0 | if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0) |
623 | 0 | return NULL; |
624 | 0 | ereport(ERROR, |
625 | 0 | (errcode(ERRCODE_INSUFFICIENT_RESOURCES), |
626 | 0 | errmsg("too many dynamic shared memory segments"))); |
627 | 0 | } |
628 | | |
629 | | /* Enter the handle into a new array slot. */ |
630 | 0 | if (using_main_dsm_region) |
631 | 0 | { |
632 | 0 | seg->handle = make_main_region_dsm_handle(nitems); |
633 | 0 | dsm_control->item[i].first_page = first_page; |
634 | 0 | dsm_control->item[i].npages = npages; |
635 | 0 | } |
636 | 0 | dsm_control->item[nitems].handle = seg->handle; |
637 | | /* refcnt of 1 triggers destruction, so start at 2 */ |
638 | 0 | dsm_control->item[nitems].refcnt = 2; |
639 | 0 | dsm_control->item[nitems].impl_private_pm_handle = NULL; |
640 | 0 | dsm_control->item[nitems].pinned = false; |
641 | 0 | seg->control_slot = nitems; |
642 | 0 | dsm_control->nitems++; |
643 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
644 | |
|
645 | 0 | return seg; |
646 | 0 | } |
647 | | |
648 | | /* |
649 | | * Attach a dynamic shared memory segment. |
650 | | * |
651 | | * See comments for dsm_segment_handle() for an explanation of how this |
652 | | * is intended to be used. |
653 | | * |
654 | | * This function will return NULL if the segment isn't known to the system. |
655 | | * This can happen if we're asked to attach the segment, but then everyone |
656 | | * else detaches it (causing it to be destroyed) before we get around to |
657 | | * attaching it. |
658 | | * |
659 | | * If there is a non-NULL CurrentResourceOwner, the attached segment is |
660 | | * associated with it and must be detached before the resource owner releases, |
661 | | * or a warning will be logged. Otherwise the segment remains attached until |
662 | | * explicitly detached or the session ends. See the note atop dsm_create(). |
663 | | */ |
664 | | dsm_segment * |
665 | | dsm_attach(dsm_handle h) |
666 | 0 | { |
667 | 0 | dsm_segment *seg; |
668 | 0 | dlist_iter iter; |
669 | 0 | uint32 i; |
670 | 0 | uint32 nitems; |
671 | | |
672 | | /* Unsafe in postmaster (and pointless in a stand-alone backend). */ |
673 | 0 | Assert(IsUnderPostmaster); |
674 | |
|
675 | 0 | if (!dsm_init_done) |
676 | 0 | dsm_backend_startup(); |
677 | | |
678 | | /* |
679 | | * Since this is just a debugging cross-check, we could leave it out |
680 | | * altogether, or include it only in assert-enabled builds. But since the |
681 | | * list of attached segments should normally be very short, let's include |
682 | | * it always for right now. |
683 | | * |
684 | | * If you're hitting this error, you probably want to attempt to find an |
685 | | * existing mapping via dsm_find_mapping() before calling dsm_attach() to |
686 | | * create a new one. |
687 | | */ |
688 | 0 | dlist_foreach(iter, &dsm_segment_list) |
689 | 0 | { |
690 | 0 | seg = dlist_container(dsm_segment, node, iter.cur); |
691 | 0 | if (seg->handle == h) |
692 | 0 | elog(ERROR, "can't attach the same segment more than once"); |
693 | 0 | } |
694 | | |
695 | | /* Create a new segment descriptor. */ |
696 | 0 | seg = dsm_create_descriptor(); |
697 | 0 | seg->handle = h; |
698 | | |
699 | | /* Bump reference count for this segment in shared memory. */ |
700 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
701 | 0 | nitems = dsm_control->nitems; |
702 | 0 | for (i = 0; i < nitems; ++i) |
703 | 0 | { |
704 | | /* |
705 | | * If the reference count is 0, the slot is actually unused. If the |
706 | | * reference count is 1, the slot is still in use, but the segment is |
707 | | * in the process of going away; even if the handle matches, another |
708 | | * slot may already have started using the same handle value by |
709 | | * coincidence so we have to keep searching. |
710 | | */ |
711 | 0 | if (dsm_control->item[i].refcnt <= 1) |
712 | 0 | continue; |
713 | | |
714 | | /* If the handle doesn't match, it's not the slot we want. */ |
715 | 0 | if (dsm_control->item[i].handle != seg->handle) |
716 | 0 | continue; |
717 | | |
718 | | /* Otherwise we've found a match. */ |
719 | 0 | dsm_control->item[i].refcnt++; |
720 | 0 | seg->control_slot = i; |
721 | 0 | if (is_main_region_dsm_handle(seg->handle)) |
722 | 0 | { |
723 | 0 | seg->mapped_address = (char *) dsm_main_space_begin + |
724 | 0 | dsm_control->item[i].first_page * FPM_PAGE_SIZE; |
725 | 0 | seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE; |
726 | 0 | } |
727 | 0 | break; |
728 | 0 | } |
729 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
730 | | |
731 | | /* |
732 | | * If we didn't find the handle we're looking for in the control segment, |
733 | | * it probably means that everyone else who had it mapped, including the |
734 | | * original creator, died before we got to this point. It's up to the |
735 | | * caller to decide what to do about that. |
736 | | */ |
737 | 0 | if (seg->control_slot == INVALID_CONTROL_SLOT) |
738 | 0 | { |
739 | 0 | dsm_detach(seg); |
740 | 0 | return NULL; |
741 | 0 | } |
742 | | |
743 | | /* Here's where we actually try to map the segment. */ |
744 | 0 | if (!is_main_region_dsm_handle(seg->handle)) |
745 | 0 | dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private, |
746 | 0 | &seg->mapped_address, &seg->mapped_size, ERROR); |
747 | |
|
748 | 0 | return seg; |
749 | 0 | } |
750 | | |
751 | | /* |
752 | | * At backend shutdown time, detach any segments that are still attached. |
753 | | * (This is similar to dsm_detach_all, except that there's no reason to |
754 | | * unmap the control segment before exiting, so we don't bother.) |
755 | | */ |
756 | | void |
757 | | dsm_backend_shutdown(void) |
758 | 0 | { |
759 | 0 | while (!dlist_is_empty(&dsm_segment_list)) |
760 | 0 | { |
761 | 0 | dsm_segment *seg; |
762 | |
|
763 | 0 | seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); |
764 | 0 | dsm_detach(seg); |
765 | 0 | } |
766 | 0 | } |
767 | | |
768 | | /* |
769 | | * Detach all shared memory segments, including the control segments. This |
770 | | * should be called, along with PGSharedMemoryDetach, in processes that |
771 | | * might inherit mappings but are not intended to be connected to dynamic |
772 | | * shared memory. |
773 | | */ |
774 | | void |
775 | | dsm_detach_all(void) |
776 | 0 | { |
777 | 0 | void *control_address = dsm_control; |
778 | |
|
779 | 0 | while (!dlist_is_empty(&dsm_segment_list)) |
780 | 0 | { |
781 | 0 | dsm_segment *seg; |
782 | |
|
783 | 0 | seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); |
784 | 0 | dsm_detach(seg); |
785 | 0 | } |
786 | |
|
787 | 0 | if (control_address != NULL) |
788 | 0 | dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, |
789 | 0 | &dsm_control_impl_private, &control_address, |
790 | 0 | &dsm_control_mapped_size, ERROR); |
791 | 0 | } |
792 | | |
793 | | /* |
794 | | * Detach from a shared memory segment, destroying the segment if we |
795 | | * remove the last reference. |
796 | | * |
797 | | * This function should never fail. It will often be invoked when aborting |
798 | | * a transaction, and a further error won't serve any purpose. It's not a |
799 | | * complete disaster if we fail to unmap or destroy the segment; it means a |
800 | | * resource leak, but that doesn't necessarily preclude further operations. |
801 | | */ |
802 | | void |
803 | | dsm_detach(dsm_segment *seg) |
804 | 0 | { |
805 | | /* |
806 | | * Invoke registered callbacks. Just in case one of those callbacks |
807 | | * throws a further error that brings us back here, pop the callback |
808 | | * before invoking it, to avoid infinite error recursion. Don't allow |
809 | | * interrupts while running the individual callbacks in non-error code |
810 | | * paths, to avoid leaving cleanup work unfinished if we're interrupted by |
811 | | * a statement timeout or similar. |
812 | | */ |
813 | 0 | HOLD_INTERRUPTS(); |
814 | 0 | while (!slist_is_empty(&seg->on_detach)) |
815 | 0 | { |
816 | 0 | slist_node *node; |
817 | 0 | dsm_segment_detach_callback *cb; |
818 | 0 | on_dsm_detach_callback function; |
819 | 0 | Datum arg; |
820 | |
|
821 | 0 | node = slist_pop_head_node(&seg->on_detach); |
822 | 0 | cb = slist_container(dsm_segment_detach_callback, node, node); |
823 | 0 | function = cb->function; |
824 | 0 | arg = cb->arg; |
825 | 0 | pfree(cb); |
826 | |
|
827 | 0 | function(seg, arg); |
828 | 0 | } |
829 | 0 | RESUME_INTERRUPTS(); |
830 | | |
831 | | /* |
832 | | * Try to remove the mapping, if one exists. Normally, there will be, but |
833 | | * maybe not, if we failed partway through a create or attach operation. |
834 | | * We remove the mapping before decrementing the reference count so that |
835 | | * the process that sees a zero reference count can be certain that no |
836 | | * remaining mappings exist. Even if this fails, we pretend that it |
837 | | * works, because retrying is likely to fail in the same way. |
838 | | */ |
839 | 0 | if (seg->mapped_address != NULL) |
840 | 0 | { |
841 | 0 | if (!is_main_region_dsm_handle(seg->handle)) |
842 | 0 | dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private, |
843 | 0 | &seg->mapped_address, &seg->mapped_size, WARNING); |
844 | 0 | seg->impl_private = NULL; |
845 | 0 | seg->mapped_address = NULL; |
846 | 0 | seg->mapped_size = 0; |
847 | 0 | } |
848 | | |
849 | | /* Reduce reference count, if we previously increased it. */ |
850 | 0 | if (seg->control_slot != INVALID_CONTROL_SLOT) |
851 | 0 | { |
852 | 0 | uint32 refcnt; |
853 | 0 | uint32 control_slot = seg->control_slot; |
854 | |
|
855 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
856 | 0 | Assert(dsm_control->item[control_slot].handle == seg->handle); |
857 | 0 | Assert(dsm_control->item[control_slot].refcnt > 1); |
858 | 0 | refcnt = --dsm_control->item[control_slot].refcnt; |
859 | 0 | seg->control_slot = INVALID_CONTROL_SLOT; |
860 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
861 | | |
862 | | /* If new reference count is 1, try to destroy the segment. */ |
863 | 0 | if (refcnt == 1) |
864 | 0 | { |
865 | | /* A pinned segment should never reach 1. */ |
866 | 0 | Assert(!dsm_control->item[control_slot].pinned); |
867 | | |
868 | | /* |
869 | | * If we fail to destroy the segment here, or are killed before we |
870 | | * finish doing so, the reference count will remain at 1, which |
871 | | * will mean that nobody else can attach to the segment. At |
872 | | * postmaster shutdown time, or when a new postmaster is started |
873 | | * after a hard kill, another attempt will be made to remove the |
874 | | * segment. |
875 | | * |
876 | | * The main case we're worried about here is being killed by a |
877 | | * signal before we can finish removing the segment. In that |
878 | | * case, it's important to be sure that the segment still gets |
879 | | * removed. If we actually fail to remove the segment for some |
880 | | * other reason, the postmaster may not have any better luck than |
881 | | * we did. There's not much we can do about that, though. |
882 | | */ |
883 | 0 | if (is_main_region_dsm_handle(seg->handle) || |
884 | 0 | dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, |
885 | 0 | &seg->mapped_address, &seg->mapped_size, WARNING)) |
886 | 0 | { |
887 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
888 | 0 | if (is_main_region_dsm_handle(seg->handle)) |
889 | 0 | FreePageManagerPut((FreePageManager *) dsm_main_space_begin, |
890 | 0 | dsm_control->item[control_slot].first_page, |
891 | 0 | dsm_control->item[control_slot].npages); |
892 | 0 | Assert(dsm_control->item[control_slot].handle == seg->handle); |
893 | 0 | Assert(dsm_control->item[control_slot].refcnt == 1); |
894 | 0 | dsm_control->item[control_slot].refcnt = 0; |
895 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
896 | 0 | } |
897 | 0 | } |
898 | 0 | } |
899 | | |
900 | | /* Clean up our remaining backend-private data structures. */ |
901 | 0 | if (seg->resowner != NULL) |
902 | 0 | ResourceOwnerForgetDSM(seg->resowner, seg); |
903 | 0 | dlist_delete(&seg->node); |
904 | 0 | pfree(seg); |
905 | 0 | } |
906 | | |
907 | | /* |
908 | | * Keep a dynamic shared memory mapping until end of session. |
909 | | * |
910 | | * By default, mappings are owned by the current resource owner, which |
911 | | * typically means they stick around for the duration of the current query |
912 | | * only. |
913 | | */ |
914 | | void |
915 | | dsm_pin_mapping(dsm_segment *seg) |
916 | 0 | { |
917 | 0 | if (seg->resowner != NULL) |
918 | 0 | { |
919 | 0 | ResourceOwnerForgetDSM(seg->resowner, seg); |
920 | 0 | seg->resowner = NULL; |
921 | 0 | } |
922 | 0 | } |
923 | | |
924 | | /* |
925 | | * Arrange to remove a dynamic shared memory mapping at cleanup time. |
926 | | * |
927 | | * dsm_pin_mapping() can be used to preserve a mapping for the entire |
928 | | * lifetime of a process; this function reverses that decision, making |
929 | | * the segment owned by the current resource owner. This may be useful |
930 | | * just before performing some operation that will invalidate the segment |
931 | | * for future use by this backend. |
932 | | */ |
933 | | void |
934 | | dsm_unpin_mapping(dsm_segment *seg) |
935 | 0 | { |
936 | 0 | Assert(seg->resowner == NULL); |
937 | 0 | ResourceOwnerEnlarge(CurrentResourceOwner); |
938 | 0 | seg->resowner = CurrentResourceOwner; |
939 | 0 | ResourceOwnerRememberDSM(seg->resowner, seg); |
940 | 0 | } |
941 | | |
942 | | /* |
943 | | * Keep a dynamic shared memory segment until postmaster shutdown, or until |
944 | | * dsm_unpin_segment is called. |
945 | | * |
946 | | * This function should not be called more than once per segment, unless the |
947 | | * segment is explicitly unpinned with dsm_unpin_segment in between calls. |
948 | | * |
949 | | * Note that this function does not arrange for the current process to |
950 | | * keep the segment mapped indefinitely; if that behavior is desired, |
951 | | * dsm_pin_mapping() should be used from each process that needs to |
952 | | * retain the mapping. |
953 | | */ |
954 | | void |
955 | | dsm_pin_segment(dsm_segment *seg) |
956 | 0 | { |
957 | 0 | void *handle = NULL; |
958 | | |
959 | | /* |
960 | | * Bump reference count for this segment in shared memory. This will |
961 | | * ensure that even if there is no session which is attached to this |
962 | | * segment, it will remain until postmaster shutdown or an explicit call |
963 | | * to unpin. |
964 | | */ |
965 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
966 | 0 | if (dsm_control->item[seg->control_slot].pinned) |
967 | 0 | elog(ERROR, "cannot pin a segment that is already pinned"); |
968 | 0 | if (!is_main_region_dsm_handle(seg->handle)) |
969 | 0 | dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle); |
970 | 0 | dsm_control->item[seg->control_slot].pinned = true; |
971 | 0 | dsm_control->item[seg->control_slot].refcnt++; |
972 | 0 | dsm_control->item[seg->control_slot].impl_private_pm_handle = handle; |
973 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
974 | 0 | } |
975 | | |
976 | | /* |
977 | | * Unpin a dynamic shared memory segment that was previously pinned with |
978 | | * dsm_pin_segment. This function should not be called unless dsm_pin_segment |
979 | | * was previously called for this segment. |
980 | | * |
981 | | * The argument is a dsm_handle rather than a dsm_segment in case you want |
982 | | * to unpin a segment to which you haven't attached. This turns out to be |
983 | | * useful if, for example, a reference to one shared memory segment is stored |
984 | | * within another shared memory segment. You might want to unpin the |
985 | | * referenced segment before destroying the referencing segment. |
986 | | */ |
987 | | void |
988 | | dsm_unpin_segment(dsm_handle handle) |
989 | 0 | { |
990 | 0 | uint32 control_slot = INVALID_CONTROL_SLOT; |
991 | 0 | bool destroy = false; |
992 | 0 | uint32 i; |
993 | | |
994 | | /* Find the control slot for the given handle. */ |
995 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
996 | 0 | for (i = 0; i < dsm_control->nitems; ++i) |
997 | 0 | { |
998 | | /* Skip unused slots and segments that are concurrently going away. */ |
999 | 0 | if (dsm_control->item[i].refcnt <= 1) |
1000 | 0 | continue; |
1001 | | |
1002 | | /* If we've found our handle, we can stop searching. */ |
1003 | 0 | if (dsm_control->item[i].handle == handle) |
1004 | 0 | { |
1005 | 0 | control_slot = i; |
1006 | 0 | break; |
1007 | 0 | } |
1008 | 0 | } |
1009 | | |
1010 | | /* |
1011 | | * We should definitely have found the slot, and it should not already be |
1012 | | * in the process of going away, because this function should only be |
1013 | | * called on a segment which is pinned. |
1014 | | */ |
1015 | 0 | if (control_slot == INVALID_CONTROL_SLOT) |
1016 | 0 | elog(ERROR, "cannot unpin unknown segment handle"); |
1017 | 0 | if (!dsm_control->item[control_slot].pinned) |
1018 | 0 | elog(ERROR, "cannot unpin a segment that is not pinned"); |
1019 | 0 | Assert(dsm_control->item[control_slot].refcnt > 1); |
1020 | | |
1021 | | /* |
1022 | | * Allow implementation-specific code to run. We have to do this before |
1023 | | * releasing the lock, because impl_private_pm_handle may get modified by |
1024 | | * dsm_impl_unpin_segment. |
1025 | | */ |
1026 | 0 | if (!is_main_region_dsm_handle(handle)) |
1027 | 0 | dsm_impl_unpin_segment(handle, |
1028 | 0 | &dsm_control->item[control_slot].impl_private_pm_handle); |
1029 | | |
1030 | | /* Note that 1 means no references (0 means unused slot). */ |
1031 | 0 | if (--dsm_control->item[control_slot].refcnt == 1) |
1032 | 0 | destroy = true; |
1033 | 0 | dsm_control->item[control_slot].pinned = false; |
1034 | | |
1035 | | /* Now we can release the lock. */ |
1036 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
1037 | | |
1038 | | /* Clean up resources if that was the last reference. */ |
1039 | 0 | if (destroy) |
1040 | 0 | { |
1041 | 0 | void *junk_impl_private = NULL; |
1042 | 0 | void *junk_mapped_address = NULL; |
1043 | 0 | Size junk_mapped_size = 0; |
1044 | | |
1045 | | /* |
1046 | | * For an explanation of how error handling works in this case, see |
1047 | | * comments in dsm_detach. Note that if we reach this point, the |
1048 | | * current process certainly does not have the segment mapped, because |
1049 | | * if it did, the reference count would have still been greater than 1 |
1050 | | * even after releasing the reference count held by the pin. The fact |
1051 | | * that there can't be a dsm_segment for this handle makes it OK to |
1052 | | * pass the mapped size, mapped address, and private data as NULL |
1053 | | * here. |
1054 | | */ |
1055 | 0 | if (is_main_region_dsm_handle(handle) || |
1056 | 0 | dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, |
1057 | 0 | &junk_mapped_address, &junk_mapped_size, WARNING)) |
1058 | 0 | { |
1059 | 0 | LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); |
1060 | 0 | if (is_main_region_dsm_handle(handle)) |
1061 | 0 | FreePageManagerPut((FreePageManager *) dsm_main_space_begin, |
1062 | 0 | dsm_control->item[control_slot].first_page, |
1063 | 0 | dsm_control->item[control_slot].npages); |
1064 | 0 | Assert(dsm_control->item[control_slot].handle == handle); |
1065 | 0 | Assert(dsm_control->item[control_slot].refcnt == 1); |
1066 | 0 | dsm_control->item[control_slot].refcnt = 0; |
1067 | 0 | LWLockRelease(DynamicSharedMemoryControlLock); |
1068 | 0 | } |
1069 | 0 | } |
1070 | 0 | } |
1071 | | |
1072 | | /* |
1073 | | * Find an existing mapping for a shared memory segment, if there is one. |
1074 | | */ |
1075 | | dsm_segment * |
1076 | | dsm_find_mapping(dsm_handle handle) |
1077 | 0 | { |
1078 | 0 | dlist_iter iter; |
1079 | 0 | dsm_segment *seg; |
1080 | |
|
1081 | 0 | dlist_foreach(iter, &dsm_segment_list) |
1082 | 0 | { |
1083 | 0 | seg = dlist_container(dsm_segment, node, iter.cur); |
1084 | 0 | if (seg->handle == handle) |
1085 | 0 | return seg; |
1086 | 0 | } |
1087 | | |
1088 | 0 | return NULL; |
1089 | 0 | } |
1090 | | |
1091 | | /* |
1092 | | * Get the address at which a dynamic shared memory segment is mapped. |
1093 | | */ |
1094 | | void * |
1095 | | dsm_segment_address(dsm_segment *seg) |
1096 | 0 | { |
1097 | 0 | Assert(seg->mapped_address != NULL); |
1098 | 0 | return seg->mapped_address; |
1099 | 0 | } |
1100 | | |
1101 | | /* |
1102 | | * Get the size of a mapping. |
1103 | | */ |
1104 | | Size |
1105 | | dsm_segment_map_length(dsm_segment *seg) |
1106 | 0 | { |
1107 | 0 | Assert(seg->mapped_address != NULL); |
1108 | 0 | return seg->mapped_size; |
1109 | 0 | } |
1110 | | |
1111 | | /* |
1112 | | * Get a handle for a mapping. |
1113 | | * |
1114 | | * To establish communication via dynamic shared memory between two backends, |
1115 | | * one of them should first call dsm_create() to establish a new shared |
1116 | | * memory mapping. That process should then call dsm_segment_handle() to |
1117 | | * obtain a handle for the mapping, and pass that handle to the |
1118 | | * coordinating backend via some means (e.g. bgw_main_arg, or via the |
1119 | | * main shared memory segment). The recipient, once in possession of the |
1120 | | * handle, should call dsm_attach(). |
1121 | | */ |
1122 | | dsm_handle |
1123 | | dsm_segment_handle(dsm_segment *seg) |
1124 | 0 | { |
1125 | 0 | return seg->handle; |
1126 | 0 | } |
1127 | | |
1128 | | /* |
1129 | | * Register an on-detach callback for a dynamic shared memory segment. |
1130 | | */ |
1131 | | void |
1132 | | on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg) |
1133 | 0 | { |
1134 | 0 | dsm_segment_detach_callback *cb; |
1135 | |
|
1136 | 0 | cb = MemoryContextAlloc(TopMemoryContext, |
1137 | 0 | sizeof(dsm_segment_detach_callback)); |
1138 | 0 | cb->function = function; |
1139 | 0 | cb->arg = arg; |
1140 | 0 | slist_push_head(&seg->on_detach, &cb->node); |
1141 | 0 | } |
1142 | | |
1143 | | /* |
1144 | | * Unregister an on-detach callback for a dynamic shared memory segment. |
1145 | | */ |
1146 | | void |
1147 | | cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, |
1148 | | Datum arg) |
1149 | 0 | { |
1150 | 0 | slist_mutable_iter iter; |
1151 | |
|
1152 | 0 | slist_foreach_modify(iter, &seg->on_detach) |
1153 | 0 | { |
1154 | 0 | dsm_segment_detach_callback *cb; |
1155 | |
|
1156 | 0 | cb = slist_container(dsm_segment_detach_callback, node, iter.cur); |
1157 | 0 | if (cb->function == function && cb->arg == arg) |
1158 | 0 | { |
1159 | 0 | slist_delete_current(&iter); |
1160 | 0 | pfree(cb); |
1161 | 0 | break; |
1162 | 0 | } |
1163 | 0 | } |
1164 | 0 | } |
1165 | | |
1166 | | /* |
1167 | | * Discard all registered on-detach callbacks without executing them. |
1168 | | */ |
1169 | | void |
1170 | | reset_on_dsm_detach(void) |
1171 | 0 | { |
1172 | 0 | dlist_iter iter; |
1173 | |
|
1174 | 0 | dlist_foreach(iter, &dsm_segment_list) |
1175 | 0 | { |
1176 | 0 | dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur); |
1177 | | |
1178 | | /* Throw away explicit on-detach actions one by one. */ |
1179 | 0 | while (!slist_is_empty(&seg->on_detach)) |
1180 | 0 | { |
1181 | 0 | slist_node *node; |
1182 | 0 | dsm_segment_detach_callback *cb; |
1183 | |
|
1184 | 0 | node = slist_pop_head_node(&seg->on_detach); |
1185 | 0 | cb = slist_container(dsm_segment_detach_callback, node, node); |
1186 | 0 | pfree(cb); |
1187 | 0 | } |
1188 | | |
1189 | | /* |
1190 | | * Decrementing the reference count is a sort of implicit on-detach |
1191 | | * action; make sure we don't do that, either. |
1192 | | */ |
1193 | 0 | seg->control_slot = INVALID_CONTROL_SLOT; |
1194 | 0 | } |
1195 | 0 | } |
1196 | | |
1197 | | /* |
1198 | | * Create a segment descriptor. |
1199 | | */ |
1200 | | static dsm_segment * |
1201 | | dsm_create_descriptor(void) |
1202 | 0 | { |
1203 | 0 | dsm_segment *seg; |
1204 | |
|
1205 | 0 | if (CurrentResourceOwner) |
1206 | 0 | ResourceOwnerEnlarge(CurrentResourceOwner); |
1207 | |
|
1208 | 0 | seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment)); |
1209 | 0 | dlist_push_head(&dsm_segment_list, &seg->node); |
1210 | | |
1211 | | /* seg->handle must be initialized by the caller */ |
1212 | 0 | seg->control_slot = INVALID_CONTROL_SLOT; |
1213 | 0 | seg->impl_private = NULL; |
1214 | 0 | seg->mapped_address = NULL; |
1215 | 0 | seg->mapped_size = 0; |
1216 | |
|
1217 | 0 | seg->resowner = CurrentResourceOwner; |
1218 | 0 | if (CurrentResourceOwner) |
1219 | 0 | ResourceOwnerRememberDSM(CurrentResourceOwner, seg); |
1220 | |
|
1221 | 0 | slist_init(&seg->on_detach); |
1222 | |
|
1223 | 0 | return seg; |
1224 | 0 | } |
1225 | | |
1226 | | /* |
1227 | | * Sanity check a control segment. |
1228 | | * |
1229 | | * The goal here isn't to detect everything that could possibly be wrong with |
1230 | | * the control segment; there's not enough information for that. Rather, the |
1231 | | * goal is to make sure that someone can iterate over the items in the segment |
1232 | | * without overrunning the end of the mapping and crashing. We also check |
1233 | | * the magic number since, if that's messed up, this may not even be one of |
1234 | | * our segments at all. |
1235 | | */ |
1236 | | static bool |
1237 | | dsm_control_segment_sane(dsm_control_header *control, Size mapped_size) |
1238 | 0 | { |
1239 | 0 | if (mapped_size < offsetof(dsm_control_header, item)) |
1240 | 0 | return false; /* Mapped size too short to read header. */ |
1241 | 0 | if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC) |
1242 | 0 | return false; /* Magic number doesn't match. */ |
1243 | 0 | if (dsm_control_bytes_needed(control->maxitems) > mapped_size) |
1244 | 0 | return false; /* Max item count won't fit in map. */ |
1245 | 0 | if (control->nitems > control->maxitems) |
1246 | 0 | return false; /* Overfull. */ |
1247 | 0 | return true; |
1248 | 0 | } |
1249 | | |
1250 | | /* |
1251 | | * Compute the number of control-segment bytes needed to store a given |
1252 | | * number of items. |
1253 | | */ |
1254 | | static uint64 |
1255 | | dsm_control_bytes_needed(uint32 nitems) |
1256 | 0 | { |
1257 | 0 | return offsetof(dsm_control_header, item) |
1258 | 0 | + sizeof(dsm_control_item) * (uint64) nitems; |
1259 | 0 | } |
1260 | | |
1261 | | static inline dsm_handle |
1262 | | make_main_region_dsm_handle(int slot) |
1263 | 0 | { |
1264 | 0 | dsm_handle handle; |
1265 | | |
1266 | | /* |
1267 | | * We need to create a handle that doesn't collide with any existing extra |
1268 | | * segment created by dsm_impl_op(), so we'll make it odd. It also |
1269 | | * mustn't collide with any other main area pseudo-segment, so we'll |
1270 | | * include the slot number in some of the bits. We also want to make an |
1271 | | * effort to avoid newly created and recently destroyed handles from being |
1272 | | * confused, so we'll make the rest of the bits random. |
1273 | | */ |
1274 | 0 | handle = 1; |
1275 | 0 | handle |= slot << 1; |
1276 | 0 | handle |= pg_prng_uint32(&pg_global_prng_state) << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1); |
1277 | 0 | return handle; |
1278 | 0 | } |
1279 | | |
1280 | | static inline bool |
1281 | | is_main_region_dsm_handle(dsm_handle handle) |
1282 | 0 | { |
1283 | 0 | return handle & 1; |
1284 | 0 | } |
1285 | | |
1286 | | /* ResourceOwner callbacks */ |
1287 | | |
1288 | | static void |
1289 | | ResOwnerReleaseDSM(Datum res) |
1290 | 0 | { |
1291 | 0 | dsm_segment *seg = (dsm_segment *) DatumGetPointer(res); |
1292 | |
|
1293 | 0 | seg->resowner = NULL; |
1294 | 0 | dsm_detach(seg); |
1295 | 0 | } |
1296 | | static char * |
1297 | | ResOwnerPrintDSM(Datum res) |
1298 | 0 | { |
1299 | 0 | dsm_segment *seg = (dsm_segment *) DatumGetPointer(res); |
1300 | |
|
1301 | 0 | return psprintf("dynamic shared memory segment %u", |
1302 | 0 | dsm_segment_handle(seg)); |
1303 | 0 | } |