/src/postgres/src/backend/storage/ipc/pmsignal.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * pmsignal.c |
4 | | * routines for signaling between the postmaster and its child processes |
5 | | * |
6 | | * |
7 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
8 | | * Portions Copyright (c) 1994, Regents of the University of California |
9 | | * |
10 | | * IDENTIFICATION |
11 | | * src/backend/storage/ipc/pmsignal.c |
12 | | * |
13 | | *------------------------------------------------------------------------- |
14 | | */ |
15 | | #include "postgres.h" |
16 | | |
17 | | #include <signal.h> |
18 | | #include <unistd.h> |
19 | | |
20 | | #ifdef HAVE_SYS_PRCTL_H |
21 | | #include <sys/prctl.h> |
22 | | #endif |
23 | | |
24 | | #include "miscadmin.h" |
25 | | #include "postmaster/postmaster.h" |
26 | | #include "replication/walsender.h" |
27 | | #include "storage/ipc.h" |
28 | | #include "storage/pmsignal.h" |
29 | | #include "storage/shmem.h" |
30 | | #include "utils/memutils.h" |
31 | | |
32 | | |
33 | | /* |
34 | | * The postmaster is signaled by its children by sending SIGUSR1. The |
35 | | * specific reason is communicated via flags in shared memory. We keep |
36 | | * a boolean flag for each possible "reason", so that different reasons |
37 | | * can be signaled by different backends at the same time. (However, |
38 | | * if the same reason is signaled more than once simultaneously, the |
39 | | * postmaster will observe it only once.) |
40 | | * |
41 | | * The flags are actually declared as "volatile sig_atomic_t" for maximum |
42 | | * portability. This should ensure that loads and stores of the flag |
43 | | * values are atomic, allowing us to dispense with any explicit locking. |
44 | | * |
45 | | * In addition to the per-reason flags, we store a set of per-child-process |
46 | | * flags that are currently used only for detecting whether a backend has |
47 | | * exited without performing proper shutdown. The per-child-process flags |
48 | | * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is |
49 | | * available for assignment. An ASSIGNED slot is associated with a postmaster |
50 | | * child process, but either the process has not touched shared memory yet, or |
51 | | * it has successfully cleaned up after itself. An ACTIVE slot means the |
52 | | * process is actively using shared memory. The slots are assigned to child |
53 | | * processes by postmaster, and pmchild.c is responsible for tracking which |
54 | | * one goes with which PID. |
55 | | * |
56 | | * Actually there is a fourth state, WALSENDER. This is just like ACTIVE, |
57 | | * but carries the extra information that the child is a WAL sender. |
58 | | * WAL senders too start in ACTIVE state, but switch to WALSENDER once they |
59 | | * start streaming the WAL (and they never go back to ACTIVE after that). |
60 | | * |
61 | | * We also have a shared-memory field that is used for communication in |
62 | | * the opposite direction, from postmaster to children: it tells why the |
63 | | * postmaster has broadcasted SIGQUIT signals, if indeed it has done so. |
64 | | */ |
65 | | |
66 | 0 | #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */ |
67 | 0 | #define PM_CHILD_ASSIGNED 1 |
68 | 0 | #define PM_CHILD_ACTIVE 2 |
69 | 0 | #define PM_CHILD_WALSENDER 3 |
70 | | |
71 | | /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */ |
72 | | struct PMSignalData |
73 | | { |
74 | | /* per-reason flags for signaling the postmaster */ |
75 | | sig_atomic_t PMSignalFlags[NUM_PMSIGNALS]; |
76 | | /* global flags for signals from postmaster to children */ |
77 | | QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */ |
78 | | /* per-child-process flags */ |
79 | | int num_child_flags; /* # of entries in PMChildFlags[] */ |
80 | | sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER]; |
81 | | }; |
82 | | |
83 | | /* PMSignalState pointer is valid in both postmaster and child processes */ |
84 | | NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL; |
85 | | |
86 | | /* |
87 | | * Local copy of PMSignalState->num_child_flags, only valid in the |
88 | | * postmaster. Postmaster keeps a local copy so that it doesn't need to |
89 | | * trust the value in shared memory. |
90 | | */ |
91 | | static int num_child_flags; |
92 | | |
93 | | /* |
94 | | * Signal handler to be notified if postmaster dies. |
95 | | */ |
96 | | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
97 | | volatile sig_atomic_t postmaster_possibly_dead = false; |
98 | | |
99 | | static void |
100 | | postmaster_death_handler(SIGNAL_ARGS) |
101 | 0 | { |
102 | 0 | postmaster_possibly_dead = true; |
103 | 0 | } |
104 | | |
105 | | /* |
106 | | * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already |
107 | | * used for other things, so choose another one. |
108 | | * |
109 | | * Currently, we assume that we can always find a signal to use. That |
110 | | * seems like a reasonable assumption for all platforms that are modern |
111 | | * enough to have a parent-death signaling mechanism. |
112 | | */ |
113 | | #if defined(SIGINFO) |
114 | | #define POSTMASTER_DEATH_SIGNAL SIGINFO |
115 | | #elif defined(SIGPWR) |
116 | 0 | #define POSTMASTER_DEATH_SIGNAL SIGPWR |
117 | | #else |
118 | | #error "cannot find a signal to use for postmaster death" |
119 | | #endif |
120 | | |
121 | | #endif /* USE_POSTMASTER_DEATH_SIGNAL */ |
122 | | |
123 | | static void MarkPostmasterChildInactive(int code, Datum arg); |
124 | | |
125 | | /* |
126 | | * PMSignalShmemSize |
127 | | * Compute space needed for pmsignal.c's shared memory |
128 | | */ |
129 | | Size |
130 | | PMSignalShmemSize(void) |
131 | 0 | { |
132 | 0 | Size size; |
133 | |
|
134 | 0 | size = offsetof(PMSignalData, PMChildFlags); |
135 | 0 | size = add_size(size, mul_size(MaxLivePostmasterChildren(), |
136 | 0 | sizeof(sig_atomic_t))); |
137 | |
|
138 | 0 | return size; |
139 | 0 | } |
140 | | |
141 | | /* |
142 | | * PMSignalShmemInit - initialize during shared-memory creation |
143 | | */ |
144 | | void |
145 | | PMSignalShmemInit(void) |
146 | 0 | { |
147 | 0 | bool found; |
148 | |
|
149 | 0 | PMSignalState = (PMSignalData *) |
150 | 0 | ShmemInitStruct("PMSignalState", PMSignalShmemSize(), &found); |
151 | |
|
152 | 0 | if (!found) |
153 | 0 | { |
154 | | /* initialize all flags to zeroes */ |
155 | 0 | MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); |
156 | 0 | num_child_flags = MaxLivePostmasterChildren(); |
157 | 0 | PMSignalState->num_child_flags = num_child_flags; |
158 | 0 | } |
159 | 0 | } |
160 | | |
161 | | /* |
162 | | * SendPostmasterSignal - signal the postmaster from a child process |
163 | | */ |
164 | | void |
165 | | SendPostmasterSignal(PMSignalReason reason) |
166 | 0 | { |
167 | | /* If called in a standalone backend, do nothing */ |
168 | 0 | if (!IsUnderPostmaster) |
169 | 0 | return; |
170 | | /* Atomically set the proper flag */ |
171 | 0 | PMSignalState->PMSignalFlags[reason] = true; |
172 | | /* Send signal to postmaster */ |
173 | 0 | kill(PostmasterPid, SIGUSR1); |
174 | 0 | } |
175 | | |
176 | | /* |
177 | | * CheckPostmasterSignal - check to see if a particular reason has been |
178 | | * signaled, and clear the signal flag. Should be called by postmaster |
179 | | * after receiving SIGUSR1. |
180 | | */ |
181 | | bool |
182 | | CheckPostmasterSignal(PMSignalReason reason) |
183 | 0 | { |
184 | | /* Careful here --- don't clear flag if we haven't seen it set */ |
185 | 0 | if (PMSignalState->PMSignalFlags[reason]) |
186 | 0 | { |
187 | 0 | PMSignalState->PMSignalFlags[reason] = false; |
188 | 0 | return true; |
189 | 0 | } |
190 | 0 | return false; |
191 | 0 | } |
192 | | |
193 | | /* |
194 | | * SetQuitSignalReason - broadcast the reason for a system shutdown. |
195 | | * Should be called by postmaster before sending SIGQUIT to children. |
196 | | * |
197 | | * Note: in a crash-and-restart scenario, the "reason" field gets cleared |
198 | | * as a part of rebuilding shared memory; the postmaster need not do it |
199 | | * explicitly. |
200 | | */ |
201 | | void |
202 | | SetQuitSignalReason(QuitSignalReason reason) |
203 | 0 | { |
204 | 0 | PMSignalState->sigquit_reason = reason; |
205 | 0 | } |
206 | | |
207 | | /* |
208 | | * GetQuitSignalReason - obtain the reason for a system shutdown. |
209 | | * Called by child processes when they receive SIGQUIT. |
210 | | * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT. |
211 | | */ |
212 | | QuitSignalReason |
213 | | GetQuitSignalReason(void) |
214 | 0 | { |
215 | | /* This is called in signal handlers, so be extra paranoid. */ |
216 | 0 | if (!IsUnderPostmaster || PMSignalState == NULL) |
217 | 0 | return PMQUIT_NOT_SENT; |
218 | 0 | return PMSignalState->sigquit_reason; |
219 | 0 | } |
220 | | |
221 | | |
222 | | /* |
223 | | * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a |
224 | | * new postmaster child process. |
225 | | * |
226 | | * Only the postmaster is allowed to execute this routine, so we need no |
227 | | * special locking. |
228 | | */ |
229 | | void |
230 | | MarkPostmasterChildSlotAssigned(int slot) |
231 | 0 | { |
232 | 0 | Assert(slot > 0 && slot <= num_child_flags); |
233 | 0 | slot--; |
234 | |
|
235 | 0 | if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED) |
236 | 0 | elog(FATAL, "postmaster child slot is already in use"); |
237 | | |
238 | 0 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; |
239 | 0 | } |
240 | | |
241 | | /* |
242 | | * MarkPostmasterChildSlotUnassigned - release a slot after death of a |
243 | | * postmaster child process. This must be called in the postmaster process. |
244 | | * |
245 | | * Returns true if the slot had been in ASSIGNED state (the expected case), |
246 | | * false otherwise (implying that the child failed to clean itself up). |
247 | | */ |
248 | | bool |
249 | | MarkPostmasterChildSlotUnassigned(int slot) |
250 | 0 | { |
251 | 0 | bool result; |
252 | |
|
253 | 0 | Assert(slot > 0 && slot <= num_child_flags); |
254 | 0 | slot--; |
255 | | |
256 | | /* |
257 | | * Note: the slot state might already be unused, because the logic in |
258 | | * postmaster.c is such that this might get called twice when a child |
259 | | * crashes. So we don't try to Assert anything about the state. |
260 | | */ |
261 | 0 | result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); |
262 | 0 | PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED; |
263 | 0 | return result; |
264 | 0 | } |
265 | | |
266 | | /* |
267 | | * IsPostmasterChildWalSender - check if given slot is in use by a |
268 | | * walsender process. This is called only by the postmaster. |
269 | | */ |
270 | | bool |
271 | | IsPostmasterChildWalSender(int slot) |
272 | 0 | { |
273 | 0 | Assert(slot > 0 && slot <= num_child_flags); |
274 | 0 | slot--; |
275 | |
|
276 | 0 | if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER) |
277 | 0 | return true; |
278 | 0 | else |
279 | 0 | return false; |
280 | 0 | } |
281 | | |
282 | | /* |
283 | | * RegisterPostmasterChildActive - mark a postmaster child as about to begin |
284 | | * actively using shared memory. This is called in the child process. |
285 | | * |
286 | | * This register an shmem exit hook to mark us as inactive again when the |
287 | | * process exits normally. |
288 | | */ |
289 | | void |
290 | | RegisterPostmasterChildActive(void) |
291 | 0 | { |
292 | 0 | int slot = MyPMChildSlot; |
293 | |
|
294 | 0 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
295 | 0 | slot--; |
296 | 0 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); |
297 | 0 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE; |
298 | | |
299 | | /* Arrange to clean up at exit. */ |
300 | 0 | on_shmem_exit(MarkPostmasterChildInactive, 0); |
301 | 0 | } |
302 | | |
303 | | /* |
304 | | * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender |
305 | | * process. This is called in the child process, sometime after marking the |
306 | | * child as active. |
307 | | */ |
308 | | void |
309 | | MarkPostmasterChildWalSender(void) |
310 | 0 | { |
311 | 0 | int slot = MyPMChildSlot; |
312 | |
|
313 | 0 | Assert(am_walsender); |
314 | |
|
315 | 0 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
316 | 0 | slot--; |
317 | 0 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE); |
318 | 0 | PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER; |
319 | 0 | } |
320 | | |
321 | | /* |
322 | | * MarkPostmasterChildInactive - mark a postmaster child as done using |
323 | | * shared memory. This is called in the child process. |
324 | | */ |
325 | | static void |
326 | | MarkPostmasterChildInactive(int code, Datum arg) |
327 | 0 | { |
328 | 0 | int slot = MyPMChildSlot; |
329 | |
|
330 | 0 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
331 | 0 | slot--; |
332 | 0 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE || |
333 | 0 | PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER); |
334 | 0 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; |
335 | 0 | } |
336 | | |
337 | | |
338 | | /* |
339 | | * PostmasterIsAliveInternal - check whether postmaster process is still alive |
340 | | * |
341 | | * This is the slow path of PostmasterIsAlive(), where the caller has already |
342 | | * checked 'postmaster_possibly_dead'. (On platforms that don't support |
343 | | * a signal for parent death, PostmasterIsAlive() is just an alias for this.) |
344 | | */ |
345 | | bool |
346 | | PostmasterIsAliveInternal(void) |
347 | 0 | { |
348 | 0 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
349 | | /* |
350 | | * Reset the flag before checking, so that we don't miss a signal if |
351 | | * postmaster dies right after the check. If postmaster was indeed dead, |
352 | | * we'll re-arm it before returning to caller. |
353 | | */ |
354 | 0 | postmaster_possibly_dead = false; |
355 | 0 | #endif |
356 | |
|
357 | 0 | #ifndef WIN32 |
358 | 0 | { |
359 | 0 | char c; |
360 | 0 | ssize_t rc; |
361 | |
|
362 | 0 | rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1); |
363 | | |
364 | | /* |
365 | | * In the usual case, the postmaster is still alive, and there is no |
366 | | * data in the pipe. |
367 | | */ |
368 | 0 | if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) |
369 | 0 | return true; |
370 | 0 | else |
371 | 0 | { |
372 | | /* |
373 | | * Postmaster is dead, or something went wrong with the read() |
374 | | * call. |
375 | | */ |
376 | |
|
377 | 0 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
378 | 0 | postmaster_possibly_dead = true; |
379 | 0 | #endif |
380 | |
|
381 | 0 | if (rc < 0) |
382 | 0 | elog(FATAL, "read on postmaster death monitoring pipe failed: %m"); |
383 | 0 | else if (rc > 0) |
384 | 0 | elog(FATAL, "unexpected data in postmaster death monitoring pipe"); |
385 | | |
386 | 0 | return false; |
387 | 0 | } |
388 | 0 | } |
389 | |
|
390 | | #else /* WIN32 */ |
391 | | if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT) |
392 | | return true; |
393 | | else |
394 | | { |
395 | | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
396 | | postmaster_possibly_dead = true; |
397 | | #endif |
398 | | return false; |
399 | | } |
400 | | #endif /* WIN32 */ |
401 | 0 | } |
402 | | |
403 | | /* |
404 | | * PostmasterDeathSignalInit - request signal on postmaster death if possible |
405 | | */ |
406 | | void |
407 | | PostmasterDeathSignalInit(void) |
408 | 0 | { |
409 | 0 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
410 | 0 | int signum = POSTMASTER_DEATH_SIGNAL; |
411 | | |
412 | | /* Register our signal handler. */ |
413 | 0 | pqsignal(signum, postmaster_death_handler); |
414 | | |
415 | | /* Request a signal on parent exit. */ |
416 | 0 | #if defined(PR_SET_PDEATHSIG) |
417 | 0 | if (prctl(PR_SET_PDEATHSIG, signum) < 0) |
418 | 0 | elog(ERROR, "could not request parent death signal: %m"); |
419 | | #elif defined(PROC_PDEATHSIG_CTL) |
420 | | if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0) |
421 | | elog(ERROR, "could not request parent death signal: %m"); |
422 | | #else |
423 | | #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal" |
424 | | #endif |
425 | | |
426 | | /* |
427 | | * Just in case the parent was gone already and we missed it, we'd better |
428 | | * check the slow way on the first call. |
429 | | */ |
430 | 0 | postmaster_possibly_dead = true; |
431 | 0 | #endif /* USE_POSTMASTER_DEATH_SIGNAL */ |
432 | 0 | } |