/src/postgres/src/backend/postmaster/walwriter.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * walwriter.c |
4 | | * |
5 | | * The WAL writer background process is new as of Postgres 8.3. It attempts |
6 | | * to keep regular backends from having to write out (and fsync) WAL pages. |
7 | | * Also, it guarantees that transaction commit records that weren't synced |
8 | | * to disk immediately upon commit (ie, were "asynchronously committed") |
9 | | * will reach disk within a knowable time --- which, as it happens, is at |
10 | | * most three times the wal_writer_delay cycle time. |
11 | | * |
12 | | * Note that as with the bgwriter for shared buffers, regular backends are |
13 | | * still empowered to issue WAL writes and fsyncs when the walwriter doesn't |
14 | | * keep up. This means that the WALWriter is not an essential process and |
15 | | * can shutdown quickly when requested. |
16 | | * |
17 | | * Because the walwriter's cycle is directly linked to the maximum delay |
18 | | * before async-commit transactions are guaranteed committed, it's probably |
19 | | * unwise to load additional functionality onto it. For instance, if you've |
20 | | * got a yen to create xlog segments further in advance, that'd be better done |
21 | | * in bgwriter than in walwriter. |
22 | | * |
23 | | * The walwriter is started by the postmaster as soon as the startup subprocess |
24 | | * finishes. It remains alive until the postmaster commands it to terminate. |
25 | | * Normal termination is by SIGTERM, which instructs the walwriter to exit(0). |
26 | | * Emergency termination is by SIGQUIT; like any backend, the walwriter will |
27 | | * simply abort and exit on SIGQUIT. |
28 | | * |
29 | | * If the walwriter exits unexpectedly, the postmaster treats that the same |
30 | | * as a backend crash: shared memory may be corrupted, so remaining backends |
31 | | * should be killed by SIGQUIT and then a recovery cycle started. |
32 | | * |
33 | | * |
34 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
35 | | * |
36 | | * |
37 | | * IDENTIFICATION |
38 | | * src/backend/postmaster/walwriter.c |
39 | | * |
40 | | *------------------------------------------------------------------------- |
41 | | */ |
42 | | #include "postgres.h" |
43 | | |
44 | | #include <signal.h> |
45 | | #include <unistd.h> |
46 | | |
47 | | #include "access/xlog.h" |
48 | | #include "libpq/pqsignal.h" |
49 | | #include "miscadmin.h" |
50 | | #include "pgstat.h" |
51 | | #include "postmaster/auxprocess.h" |
52 | | #include "postmaster/interrupt.h" |
53 | | #include "postmaster/walwriter.h" |
54 | | #include "storage/aio_subsys.h" |
55 | | #include "storage/bufmgr.h" |
56 | | #include "storage/condition_variable.h" |
57 | | #include "storage/fd.h" |
58 | | #include "storage/lwlock.h" |
59 | | #include "storage/proc.h" |
60 | | #include "storage/procsignal.h" |
61 | | #include "storage/smgr.h" |
62 | | #include "utils/hsearch.h" |
63 | | #include "utils/memutils.h" |
64 | | #include "utils/resowner.h" |
65 | | |
66 | | |
67 | | /* |
68 | | * GUC parameters |
69 | | */ |
70 | | int WalWriterDelay = 200; |
71 | | int WalWriterFlushAfter = DEFAULT_WAL_WRITER_FLUSH_AFTER; |
72 | | |
73 | | /* |
74 | | * Number of do-nothing loops before lengthening the delay time, and the |
75 | | * multiplier to apply to WalWriterDelay when we do decide to hibernate. |
76 | | * (Perhaps these need to be configurable?) |
77 | | */ |
78 | 0 | #define LOOPS_UNTIL_HIBERNATE 50 |
79 | 0 | #define HIBERNATE_FACTOR 25 |
80 | | |
81 | | /* |
82 | | * Main entry point for walwriter process |
83 | | * |
84 | | * This is invoked from AuxiliaryProcessMain, which has already created the |
85 | | * basic execution environment, but not enabled signals yet. |
86 | | */ |
87 | | void |
88 | | WalWriterMain(const void *startup_data, size_t startup_data_len) |
89 | 0 | { |
90 | 0 | sigjmp_buf local_sigjmp_buf; |
91 | 0 | MemoryContext walwriter_context; |
92 | 0 | int left_till_hibernate; |
93 | 0 | bool hibernating; |
94 | |
|
95 | 0 | Assert(startup_data_len == 0); |
96 | |
|
97 | 0 | MyBackendType = B_WAL_WRITER; |
98 | 0 | AuxiliaryProcessMainCommon(); |
99 | | |
100 | | /* |
101 | | * Properly accept or ignore signals the postmaster might send us |
102 | | * |
103 | | * We have no particular use for SIGINT at the moment, but seems |
104 | | * reasonable to treat like SIGTERM. |
105 | | */ |
106 | 0 | pqsignal(SIGHUP, SignalHandlerForConfigReload); |
107 | 0 | pqsignal(SIGINT, SignalHandlerForShutdownRequest); |
108 | 0 | pqsignal(SIGTERM, SignalHandlerForShutdownRequest); |
109 | | /* SIGQUIT handler was already set up by InitPostmasterChild */ |
110 | 0 | pqsignal(SIGALRM, SIG_IGN); |
111 | 0 | pqsignal(SIGPIPE, SIG_IGN); |
112 | 0 | pqsignal(SIGUSR1, procsignal_sigusr1_handler); |
113 | 0 | pqsignal(SIGUSR2, SIG_IGN); /* not used */ |
114 | | |
115 | | /* |
116 | | * Reset some signals that are accepted by postmaster but not here |
117 | | */ |
118 | 0 | pqsignal(SIGCHLD, SIG_DFL); |
119 | | |
120 | | /* |
121 | | * Create a memory context that we will do all our work in. We do this so |
122 | | * that we can reset the context during error recovery and thereby avoid |
123 | | * possible memory leaks. Formerly this code just ran in |
124 | | * TopMemoryContext, but resetting that would be a really bad idea. |
125 | | */ |
126 | 0 | walwriter_context = AllocSetContextCreate(TopMemoryContext, |
127 | 0 | "Wal Writer", |
128 | 0 | ALLOCSET_DEFAULT_SIZES); |
129 | 0 | MemoryContextSwitchTo(walwriter_context); |
130 | | |
131 | | /* |
132 | | * If an exception is encountered, processing resumes here. |
133 | | * |
134 | | * You might wonder why this isn't coded as an infinite loop around a |
135 | | * PG_TRY construct. The reason is that this is the bottom of the |
136 | | * exception stack, and so with PG_TRY there would be no exception handler |
137 | | * in force at all during the CATCH part. By leaving the outermost setjmp |
138 | | * always active, we have at least some chance of recovering from an error |
139 | | * during error recovery. (If we get into an infinite loop thereby, it |
140 | | * will soon be stopped by overflow of elog.c's internal state stack.) |
141 | | * |
142 | | * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask |
143 | | * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, |
144 | | * signals other than SIGQUIT will be blocked until we complete error |
145 | | * recovery. It might seem that this policy makes the HOLD_INTERRUPTS() |
146 | | * call redundant, but it is not since InterruptPending might be set |
147 | | * already. |
148 | | */ |
149 | 0 | if (sigsetjmp(local_sigjmp_buf, 1) != 0) |
150 | 0 | { |
151 | | /* Since not using PG_TRY, must reset error stack by hand */ |
152 | 0 | error_context_stack = NULL; |
153 | | |
154 | | /* Prevent interrupts while cleaning up */ |
155 | 0 | HOLD_INTERRUPTS(); |
156 | | |
157 | | /* Report the error to the server log */ |
158 | 0 | EmitErrorReport(); |
159 | | |
160 | | /* |
161 | | * These operations are really just a minimal subset of |
162 | | * AbortTransaction(). We don't have very many resources to worry |
163 | | * about in walwriter, but we do have LWLocks, and perhaps buffers? |
164 | | */ |
165 | 0 | LWLockReleaseAll(); |
166 | 0 | ConditionVariableCancelSleep(); |
167 | 0 | pgstat_report_wait_end(); |
168 | 0 | pgaio_error_cleanup(); |
169 | 0 | UnlockBuffers(); |
170 | 0 | ReleaseAuxProcessResources(false); |
171 | 0 | AtEOXact_Buffers(false); |
172 | 0 | AtEOXact_SMgr(); |
173 | 0 | AtEOXact_Files(false); |
174 | 0 | AtEOXact_HashTables(false); |
175 | | |
176 | | /* |
177 | | * Now return to normal top-level context and clear ErrorContext for |
178 | | * next time. |
179 | | */ |
180 | 0 | MemoryContextSwitchTo(walwriter_context); |
181 | 0 | FlushErrorState(); |
182 | | |
183 | | /* Flush any leaked data in the top-level context */ |
184 | 0 | MemoryContextReset(walwriter_context); |
185 | | |
186 | | /* Now we can allow interrupts again */ |
187 | 0 | RESUME_INTERRUPTS(); |
188 | | |
189 | | /* |
190 | | * Sleep at least 1 second after any error. A write error is likely |
191 | | * to be repeated, and we don't want to be filling the error logs as |
192 | | * fast as we can. |
193 | | */ |
194 | 0 | pg_usleep(1000000L); |
195 | 0 | } |
196 | | |
197 | | /* We can now handle ereport(ERROR) */ |
198 | 0 | PG_exception_stack = &local_sigjmp_buf; |
199 | | |
200 | | /* |
201 | | * Unblock signals (they were blocked when the postmaster forked us) |
202 | | */ |
203 | 0 | sigprocmask(SIG_SETMASK, &UnBlockSig, NULL); |
204 | | |
205 | | /* |
206 | | * Reset hibernation state after any error. |
207 | | */ |
208 | 0 | left_till_hibernate = LOOPS_UNTIL_HIBERNATE; |
209 | 0 | hibernating = false; |
210 | 0 | SetWalWriterSleeping(false); |
211 | | |
212 | | /* |
213 | | * Advertise our proc number that backends can use to wake us up while |
214 | | * we're sleeping. |
215 | | */ |
216 | 0 | ProcGlobal->walwriterProc = MyProcNumber; |
217 | | |
218 | | /* |
219 | | * Loop forever |
220 | | */ |
221 | 0 | for (;;) |
222 | 0 | { |
223 | 0 | long cur_timeout; |
224 | | |
225 | | /* |
226 | | * Advertise whether we might hibernate in this cycle. We do this |
227 | | * before resetting the latch to ensure that any async commits will |
228 | | * see the flag set if they might possibly need to wake us up, and |
229 | | * that we won't miss any signal they send us. (If we discover work |
230 | | * to do in the last cycle before we would hibernate, the global flag |
231 | | * will be set unnecessarily, but little harm is done.) But avoid |
232 | | * touching the global flag if it doesn't need to change. |
233 | | */ |
234 | 0 | if (hibernating != (left_till_hibernate <= 1)) |
235 | 0 | { |
236 | 0 | hibernating = (left_till_hibernate <= 1); |
237 | 0 | SetWalWriterSleeping(hibernating); |
238 | 0 | } |
239 | | |
240 | | /* Clear any already-pending wakeups */ |
241 | 0 | ResetLatch(MyLatch); |
242 | | |
243 | | /* Process any signals received recently */ |
244 | 0 | ProcessMainLoopInterrupts(); |
245 | | |
246 | | /* |
247 | | * Do what we're here for; then, if XLogBackgroundFlush() found useful |
248 | | * work to do, reset hibernation counter. |
249 | | */ |
250 | 0 | if (XLogBackgroundFlush()) |
251 | 0 | left_till_hibernate = LOOPS_UNTIL_HIBERNATE; |
252 | 0 | else if (left_till_hibernate > 0) |
253 | 0 | left_till_hibernate--; |
254 | | |
255 | | /* report pending statistics to the cumulative stats system */ |
256 | 0 | pgstat_report_wal(false); |
257 | | |
258 | | /* |
259 | | * Sleep until we are signaled or WalWriterDelay has elapsed. If we |
260 | | * haven't done anything useful for quite some time, lengthen the |
261 | | * sleep time so as to reduce the server's idle power consumption. |
262 | | */ |
263 | 0 | if (left_till_hibernate > 0) |
264 | 0 | cur_timeout = WalWriterDelay; /* in ms */ |
265 | 0 | else |
266 | 0 | cur_timeout = WalWriterDelay * HIBERNATE_FACTOR; |
267 | |
|
268 | 0 | (void) WaitLatch(MyLatch, |
269 | 0 | WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, |
270 | 0 | cur_timeout, |
271 | 0 | WAIT_EVENT_WAL_WRITER_MAIN); |
272 | 0 | } |
273 | 0 | } |