Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/storage/lmgr/proc.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * proc.c
4
 *    routines to manage per-process shared memory data structure
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/storage/lmgr/proc.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
/*
16
 * Interface (a):
17
 *    JoinWaitQueue(), ProcSleep(), ProcWakeup()
18
 *
19
 * Waiting for a lock causes the backend to be put to sleep.  Whoever releases
20
 * the lock wakes the process up again (and gives it an error code so it knows
21
 * whether it was awoken on an error condition).
22
 *
23
 * Interface (b):
24
 *
25
 * ProcReleaseLocks -- frees the locks associated with current transaction
26
 *
27
 * ProcKill -- destroys the shared memory state (and locks)
28
 * associated with the process.
29
 */
30
#include "postgres.h"
31
32
#include <signal.h>
33
#include <unistd.h>
34
#include <sys/time.h>
35
36
#include "access/transam.h"
37
#include "access/twophase.h"
38
#include "access/xlogutils.h"
39
#include "miscadmin.h"
40
#include "pgstat.h"
41
#include "postmaster/autovacuum.h"
42
#include "replication/slotsync.h"
43
#include "replication/syncrep.h"
44
#include "storage/condition_variable.h"
45
#include "storage/ipc.h"
46
#include "storage/lmgr.h"
47
#include "storage/pmsignal.h"
48
#include "storage/proc.h"
49
#include "storage/procarray.h"
50
#include "storage/procsignal.h"
51
#include "storage/spin.h"
52
#include "storage/standby.h"
53
#include "utils/timeout.h"
54
#include "utils/timestamp.h"
55
56
/* GUC variables */
57
int     DeadlockTimeout = 1000;
58
int     StatementTimeout = 0;
59
int     LockTimeout = 0;
60
int     IdleInTransactionSessionTimeout = 0;
61
int     TransactionTimeout = 0;
62
int     IdleSessionTimeout = 0;
63
bool    log_lock_waits = true;
64
65
/* Pointer to this process's PGPROC struct, if any */
66
PGPROC     *MyProc = NULL;
67
68
/*
69
 * This spinlock protects the freelist of recycled PGPROC structures.
70
 * We cannot use an LWLock because the LWLock manager depends on already
71
 * having a PGPROC and a wait semaphore!  But these structures are touched
72
 * relatively infrequently (only at backend startup or shutdown) and not for
73
 * very long, so a spinlock is okay.
74
 */
75
NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
76
77
/* Pointers to shared-memory structures */
78
PROC_HDR   *ProcGlobal = NULL;
79
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
80
PGPROC     *PreparedXactProcs = NULL;
81
82
static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
83
84
/* Is a deadlock check pending? */
85
static volatile sig_atomic_t got_deadlock_timeout;
86
87
static void RemoveProcFromArray(int code, Datum arg);
88
static void ProcKill(int code, Datum arg);
89
static void AuxiliaryProcKill(int code, Datum arg);
90
static void CheckDeadLock(void);
91
92
93
/*
94
 * Report shared-memory space needed by PGPROC.
95
 */
96
static Size
97
PGProcShmemSize(void)
98
0
{
99
0
  Size    size = 0;
100
0
  Size    TotalProcs =
101
0
    add_size(MaxBackends, add_size(NUM_AUXILIARY_PROCS, max_prepared_xacts));
102
103
0
  size = add_size(size, mul_size(TotalProcs, sizeof(PGPROC)));
104
0
  size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->xids)));
105
0
  size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->subxidStates)));
106
0
  size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->statusFlags)));
107
108
0
  return size;
109
0
}
110
111
/*
112
 * Report shared-memory space needed by Fast-Path locks.
113
 */
114
static Size
115
FastPathLockShmemSize(void)
116
0
{
117
0
  Size    size = 0;
118
0
  Size    TotalProcs =
119
0
    add_size(MaxBackends, add_size(NUM_AUXILIARY_PROCS, max_prepared_xacts));
120
0
  Size    fpLockBitsSize,
121
0
        fpRelIdSize;
122
123
  /*
124
   * Memory needed for PGPROC fast-path lock arrays. Make sure the sizes are
125
   * nicely aligned in each backend.
126
   */
127
0
  fpLockBitsSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(uint64));
128
0
  fpRelIdSize = MAXALIGN(FastPathLockSlotsPerBackend() * sizeof(Oid));
129
130
0
  size = add_size(size, mul_size(TotalProcs, (fpLockBitsSize + fpRelIdSize)));
131
132
0
  return size;
133
0
}
134
135
/*
136
 * Report shared-memory space needed by InitProcGlobal.
137
 */
138
Size
139
ProcGlobalShmemSize(void)
140
0
{
141
0
  Size    size = 0;
142
143
  /* ProcGlobal */
144
0
  size = add_size(size, sizeof(PROC_HDR));
145
0
  size = add_size(size, sizeof(slock_t));
146
147
0
  size = add_size(size, PGProcShmemSize());
148
0
  size = add_size(size, FastPathLockShmemSize());
149
150
0
  return size;
151
0
}
152
153
/*
154
 * Report number of semaphores needed by InitProcGlobal.
155
 */
156
int
157
ProcGlobalSemas(void)
158
0
{
159
  /*
160
   * We need a sema per backend (including autovacuum), plus one for each
161
   * auxiliary process.
162
   */
163
0
  return MaxBackends + NUM_AUXILIARY_PROCS;
164
0
}
165
166
/*
167
 * InitProcGlobal -
168
 *    Initialize the global process table during postmaster or standalone
169
 *    backend startup.
170
 *
171
 *    We also create all the per-process semaphores we will need to support
172
 *    the requested number of backends.  We used to allocate semaphores
173
 *    only when backends were actually started up, but that is bad because
174
 *    it lets Postgres fail under load --- a lot of Unix systems are
175
 *    (mis)configured with small limits on the number of semaphores, and
176
 *    running out when trying to start another backend is a common failure.
177
 *    So, now we grab enough semaphores to support the desired max number
178
 *    of backends immediately at initialization --- if the sysadmin has set
179
 *    MaxConnections, max_worker_processes, max_wal_senders, or
180
 *    autovacuum_worker_slots higher than his kernel will support, he'll
181
 *    find out sooner rather than later.
182
 *
183
 *    Another reason for creating semaphores here is that the semaphore
184
 *    implementation typically requires us to create semaphores in the
185
 *    postmaster, not in backends.
186
 *
187
 * Note: this is NOT called by individual backends under a postmaster,
188
 * not even in the EXEC_BACKEND case.  The ProcGlobal and AuxiliaryProcs
189
 * pointers must be propagated specially for EXEC_BACKEND operation.
190
 */
191
void
192
InitProcGlobal(void)
193
0
{
194
0
  PGPROC     *procs;
195
0
  int     i,
196
0
        j;
197
0
  bool    found;
198
0
  uint32    TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts;
199
200
  /* Used for setup of per-backend fast-path slots. */
201
0
  char     *fpPtr,
202
0
         *fpEndPtr PG_USED_FOR_ASSERTS_ONLY;
203
0
  Size    fpLockBitsSize,
204
0
        fpRelIdSize;
205
0
  Size    requestSize;
206
0
  char     *ptr;
207
208
  /* Create the ProcGlobal shared structure */
209
0
  ProcGlobal = (PROC_HDR *)
210
0
    ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
211
0
  Assert(!found);
212
213
  /*
214
   * Initialize the data structures.
215
   */
216
0
  ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
217
0
  dlist_init(&ProcGlobal->freeProcs);
218
0
  dlist_init(&ProcGlobal->autovacFreeProcs);
219
0
  dlist_init(&ProcGlobal->bgworkerFreeProcs);
220
0
  dlist_init(&ProcGlobal->walsenderFreeProcs);
221
0
  ProcGlobal->startupBufferPinWaitBufId = -1;
222
0
  ProcGlobal->walwriterProc = INVALID_PROC_NUMBER;
223
0
  ProcGlobal->checkpointerProc = INVALID_PROC_NUMBER;
224
0
  pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PROC_NUMBER);
225
0
  pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PROC_NUMBER);
226
227
  /*
228
   * Create and initialize all the PGPROC structures we'll need.  There are
229
   * six separate consumers: (1) normal backends, (2) autovacuum workers and
230
   * special workers, (3) background workers, (4) walsenders, (5) auxiliary
231
   * processes, and (6) prepared transactions.  (For largely-historical
232
   * reasons, we combine autovacuum and special workers into one category
233
   * with a single freelist.)  Each PGPROC structure is dedicated to exactly
234
   * one of these purposes, and they do not move between groups.
235
   */
236
0
  requestSize = PGProcShmemSize();
237
238
0
  ptr = ShmemInitStruct("PGPROC structures",
239
0
              requestSize,
240
0
              &found);
241
242
0
  MemSet(ptr, 0, requestSize);
243
244
0
  procs = (PGPROC *) ptr;
245
0
  ptr = (char *) ptr + TotalProcs * sizeof(PGPROC);
246
247
0
  ProcGlobal->allProcs = procs;
248
  /* XXX allProcCount isn't really all of them; it excludes prepared xacts */
249
0
  ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS;
250
251
  /*
252
   * Allocate arrays mirroring PGPROC fields in a dense manner. See
253
   * PROC_HDR.
254
   *
255
   * XXX: It might make sense to increase padding for these arrays, given
256
   * how hotly they are accessed.
257
   */
258
0
  ProcGlobal->xids = (TransactionId *) ptr;
259
0
  ptr = (char *) ptr + (TotalProcs * sizeof(*ProcGlobal->xids));
260
261
0
  ProcGlobal->subxidStates = (XidCacheStatus *) ptr;
262
0
  ptr = (char *) ptr + (TotalProcs * sizeof(*ProcGlobal->subxidStates));
263
264
0
  ProcGlobal->statusFlags = (uint8 *) ptr;
265
0
  ptr = (char *) ptr + (TotalProcs * sizeof(*ProcGlobal->statusFlags));
266
267
  /* make sure wer didn't overflow */
268
0
  Assert((ptr > (char *) procs) && (ptr <= (char *) procs + requestSize));
269
270
  /*
271
   * Allocate arrays for fast-path locks. Those are variable-length, so
272
   * can't be included in PGPROC directly. We allocate a separate piece of
273
   * shared memory and then divide that between backends.
274
   */
275
0
  fpLockBitsSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(uint64));
276
0
  fpRelIdSize = MAXALIGN(FastPathLockSlotsPerBackend() * sizeof(Oid));
277
278
0
  requestSize = FastPathLockShmemSize();
279
280
0
  fpPtr = ShmemInitStruct("Fast-Path Lock Array",
281
0
              requestSize,
282
0
              &found);
283
284
0
  MemSet(fpPtr, 0, requestSize);
285
286
  /* For asserts checking we did not overflow. */
287
0
  fpEndPtr = fpPtr + requestSize;
288
289
0
  for (i = 0; i < TotalProcs; i++)
290
0
  {
291
0
    PGPROC     *proc = &procs[i];
292
293
    /* Common initialization for all PGPROCs, regardless of type. */
294
295
    /*
296
     * Set the fast-path lock arrays, and move the pointer. We interleave
297
     * the two arrays, to (hopefully) get some locality for each backend.
298
     */
299
0
    proc->fpLockBits = (uint64 *) fpPtr;
300
0
    fpPtr += fpLockBitsSize;
301
302
0
    proc->fpRelId = (Oid *) fpPtr;
303
0
    fpPtr += fpRelIdSize;
304
305
0
    Assert(fpPtr <= fpEndPtr);
306
307
    /*
308
     * Set up per-PGPROC semaphore, latch, and fpInfoLock.  Prepared xact
309
     * dummy PGPROCs don't need these though - they're never associated
310
     * with a real process
311
     */
312
0
    if (i < MaxBackends + NUM_AUXILIARY_PROCS)
313
0
    {
314
0
      proc->sem = PGSemaphoreCreate();
315
0
      InitSharedLatch(&(proc->procLatch));
316
0
      LWLockInitialize(&(proc->fpInfoLock), LWTRANCHE_LOCK_FASTPATH);
317
0
    }
318
319
    /*
320
     * Newly created PGPROCs for normal backends, autovacuum workers,
321
     * special workers, bgworkers, and walsenders must be queued up on the
322
     * appropriate free list.  Because there can only ever be a small,
323
     * fixed number of auxiliary processes, no free list is used in that
324
     * case; InitAuxiliaryProcess() instead uses a linear search.  PGPROCs
325
     * for prepared transactions are added to a free list by
326
     * TwoPhaseShmemInit().
327
     */
328
0
    if (i < MaxConnections)
329
0
    {
330
      /* PGPROC for normal backend, add to freeProcs list */
331
0
      dlist_push_tail(&ProcGlobal->freeProcs, &proc->links);
332
0
      proc->procgloballist = &ProcGlobal->freeProcs;
333
0
    }
334
0
    else if (i < MaxConnections + autovacuum_worker_slots + NUM_SPECIAL_WORKER_PROCS)
335
0
    {
336
      /* PGPROC for AV or special worker, add to autovacFreeProcs list */
337
0
      dlist_push_tail(&ProcGlobal->autovacFreeProcs, &proc->links);
338
0
      proc->procgloballist = &ProcGlobal->autovacFreeProcs;
339
0
    }
340
0
    else if (i < MaxConnections + autovacuum_worker_slots + NUM_SPECIAL_WORKER_PROCS + max_worker_processes)
341
0
    {
342
      /* PGPROC for bgworker, add to bgworkerFreeProcs list */
343
0
      dlist_push_tail(&ProcGlobal->bgworkerFreeProcs, &proc->links);
344
0
      proc->procgloballist = &ProcGlobal->bgworkerFreeProcs;
345
0
    }
346
0
    else if (i < MaxBackends)
347
0
    {
348
      /* PGPROC for walsender, add to walsenderFreeProcs list */
349
0
      dlist_push_tail(&ProcGlobal->walsenderFreeProcs, &proc->links);
350
0
      proc->procgloballist = &ProcGlobal->walsenderFreeProcs;
351
0
    }
352
353
    /* Initialize myProcLocks[] shared memory queues. */
354
0
    for (j = 0; j < NUM_LOCK_PARTITIONS; j++)
355
0
      dlist_init(&(proc->myProcLocks[j]));
356
357
    /* Initialize lockGroupMembers list. */
358
0
    dlist_init(&proc->lockGroupMembers);
359
360
    /*
361
     * Initialize the atomic variables, otherwise, it won't be safe to
362
     * access them for backends that aren't currently in use.
363
     */
364
0
    pg_atomic_init_u32(&(proc->procArrayGroupNext), INVALID_PROC_NUMBER);
365
0
    pg_atomic_init_u32(&(proc->clogGroupNext), INVALID_PROC_NUMBER);
366
0
    pg_atomic_init_u64(&(proc->waitStart), 0);
367
0
  }
368
369
  /* Should have consumed exactly the expected amount of fast-path memory. */
370
0
  Assert(fpPtr == fpEndPtr);
371
372
  /*
373
   * Save pointers to the blocks of PGPROC structures reserved for auxiliary
374
   * processes and prepared transactions.
375
   */
376
0
  AuxiliaryProcs = &procs[MaxBackends];
377
0
  PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS];
378
379
  /* Create ProcStructLock spinlock, too */
380
0
  ProcStructLock = (slock_t *) ShmemInitStruct("ProcStructLock spinlock",
381
0
                         sizeof(slock_t),
382
0
                         &found);
383
0
  SpinLockInit(ProcStructLock);
384
0
}
385
386
/*
387
 * InitProcess -- initialize a per-process PGPROC entry for this backend
388
 */
389
void
390
InitProcess(void)
391
0
{
392
0
  dlist_head *procgloballist;
393
394
  /*
395
   * ProcGlobal should be set up already (if we are a backend, we inherit
396
   * this by fork() or EXEC_BACKEND mechanism from the postmaster).
397
   */
398
0
  if (ProcGlobal == NULL)
399
0
    elog(PANIC, "proc header uninitialized");
400
401
0
  if (MyProc != NULL)
402
0
    elog(ERROR, "you already exist");
403
404
  /*
405
   * Before we start accessing the shared memory in a serious way, mark
406
   * ourselves as an active postmaster child; this is so that the postmaster
407
   * can detect it if we exit without cleaning up.
408
   */
409
0
  if (IsUnderPostmaster)
410
0
    RegisterPostmasterChildActive();
411
412
  /*
413
   * Decide which list should supply our PGPROC.  This logic must match the
414
   * way the freelists were constructed in InitProcGlobal().
415
   */
416
0
  if (AmAutoVacuumWorkerProcess() || AmSpecialWorkerProcess())
417
0
    procgloballist = &ProcGlobal->autovacFreeProcs;
418
0
  else if (AmBackgroundWorkerProcess())
419
0
    procgloballist = &ProcGlobal->bgworkerFreeProcs;
420
0
  else if (AmWalSenderProcess())
421
0
    procgloballist = &ProcGlobal->walsenderFreeProcs;
422
0
  else
423
0
    procgloballist = &ProcGlobal->freeProcs;
424
425
  /*
426
   * Try to get a proc struct from the appropriate free list.  If this
427
   * fails, we must be out of PGPROC structures (not to mention semaphores).
428
   *
429
   * While we are holding the ProcStructLock, also copy the current shared
430
   * estimate of spins_per_delay to local storage.
431
   */
432
0
  SpinLockAcquire(ProcStructLock);
433
434
0
  set_spins_per_delay(ProcGlobal->spins_per_delay);
435
436
0
  if (!dlist_is_empty(procgloballist))
437
0
  {
438
0
    MyProc = dlist_container(PGPROC, links, dlist_pop_head_node(procgloballist));
439
0
    SpinLockRelease(ProcStructLock);
440
0
  }
441
0
  else
442
0
  {
443
    /*
444
     * If we reach here, all the PGPROCs are in use.  This is one of the
445
     * possible places to detect "too many backends", so give the standard
446
     * error message.  XXX do we need to give a different failure message
447
     * in the autovacuum case?
448
     */
449
0
    SpinLockRelease(ProcStructLock);
450
0
    if (AmWalSenderProcess())
451
0
      ereport(FATAL,
452
0
          (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
453
0
           errmsg("number of requested standby connections exceeds \"max_wal_senders\" (currently %d)",
454
0
              max_wal_senders)));
455
0
    ereport(FATAL,
456
0
        (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
457
0
         errmsg("sorry, too many clients already")));
458
0
  }
459
0
  MyProcNumber = GetNumberFromPGProc(MyProc);
460
461
  /*
462
   * Cross-check that the PGPROC is of the type we expect; if this were not
463
   * the case, it would get returned to the wrong list.
464
   */
465
0
  Assert(MyProc->procgloballist == procgloballist);
466
467
  /*
468
   * Initialize all fields of MyProc, except for those previously
469
   * initialized by InitProcGlobal.
470
   */
471
0
  dlist_node_init(&MyProc->links);
472
0
  MyProc->waitStatus = PROC_WAIT_STATUS_OK;
473
0
  MyProc->fpVXIDLock = false;
474
0
  MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
475
0
  MyProc->xid = InvalidTransactionId;
476
0
  MyProc->xmin = InvalidTransactionId;
477
0
  MyProc->pid = MyProcPid;
478
0
  MyProc->vxid.procNumber = MyProcNumber;
479
0
  MyProc->vxid.lxid = InvalidLocalTransactionId;
480
  /* databaseId and roleId will be filled in later */
481
0
  MyProc->databaseId = InvalidOid;
482
0
  MyProc->roleId = InvalidOid;
483
0
  MyProc->tempNamespaceId = InvalidOid;
484
0
  MyProc->isRegularBackend = AmRegularBackendProcess();
485
0
  MyProc->delayChkptFlags = 0;
486
0
  MyProc->statusFlags = 0;
487
  /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
488
0
  if (AmAutoVacuumWorkerProcess())
489
0
    MyProc->statusFlags |= PROC_IS_AUTOVACUUM;
490
0
  MyProc->lwWaiting = LW_WS_NOT_WAITING;
491
0
  MyProc->lwWaitMode = 0;
492
0
  MyProc->waitLock = NULL;
493
0
  MyProc->waitProcLock = NULL;
494
0
  pg_atomic_write_u64(&MyProc->waitStart, 0);
495
#ifdef USE_ASSERT_CHECKING
496
  {
497
    int     i;
498
499
    /* Last process should have released all locks. */
500
    for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
501
      Assert(dlist_is_empty(&(MyProc->myProcLocks[i])));
502
  }
503
#endif
504
0
  MyProc->recoveryConflictPending = false;
505
506
  /* Initialize fields for sync rep */
507
0
  MyProc->waitLSN = 0;
508
0
  MyProc->syncRepState = SYNC_REP_NOT_WAITING;
509
0
  dlist_node_init(&MyProc->syncRepLinks);
510
511
  /* Initialize fields for group XID clearing. */
512
0
  MyProc->procArrayGroupMember = false;
513
0
  MyProc->procArrayGroupMemberXid = InvalidTransactionId;
514
0
  Assert(pg_atomic_read_u32(&MyProc->procArrayGroupNext) == INVALID_PROC_NUMBER);
515
516
  /* Check that group locking fields are in a proper initial state. */
517
0
  Assert(MyProc->lockGroupLeader == NULL);
518
0
  Assert(dlist_is_empty(&MyProc->lockGroupMembers));
519
520
  /* Initialize wait event information. */
521
0
  MyProc->wait_event_info = 0;
522
523
  /* Initialize fields for group transaction status update. */
524
0
  MyProc->clogGroupMember = false;
525
0
  MyProc->clogGroupMemberXid = InvalidTransactionId;
526
0
  MyProc->clogGroupMemberXidStatus = TRANSACTION_STATUS_IN_PROGRESS;
527
0
  MyProc->clogGroupMemberPage = -1;
528
0
  MyProc->clogGroupMemberLsn = InvalidXLogRecPtr;
529
0
  Assert(pg_atomic_read_u32(&MyProc->clogGroupNext) == INVALID_PROC_NUMBER);
530
531
  /*
532
   * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
533
   * on it.  That allows us to repoint the process latch, which so far
534
   * points to process local one, to the shared one.
535
   */
536
0
  OwnLatch(&MyProc->procLatch);
537
0
  SwitchToSharedLatch();
538
539
  /* now that we have a proc, report wait events to shared memory */
540
0
  pgstat_set_wait_event_storage(&MyProc->wait_event_info);
541
542
  /*
543
   * We might be reusing a semaphore that belonged to a failed process. So
544
   * be careful and reinitialize its value here.  (This is not strictly
545
   * necessary anymore, but seems like a good idea for cleanliness.)
546
   */
547
0
  PGSemaphoreReset(MyProc->sem);
548
549
  /*
550
   * Arrange to clean up at backend exit.
551
   */
552
0
  on_shmem_exit(ProcKill, 0);
553
554
  /*
555
   * Now that we have a PGPROC, we could try to acquire locks, so initialize
556
   * local state needed for LWLocks, and the deadlock checker.
557
   */
558
0
  InitLWLockAccess();
559
0
  InitDeadLockChecking();
560
561
#ifdef EXEC_BACKEND
562
563
  /*
564
   * Initialize backend-local pointers to all the shared data structures.
565
   * (We couldn't do this until now because it needs LWLocks.)
566
   */
567
  if (IsUnderPostmaster)
568
    AttachSharedMemoryStructs();
569
#endif
570
0
}
571
572
/*
573
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
574
 *
575
 * This is separate from InitProcess because we can't acquire LWLocks until
576
 * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't
577
 * work until after we've done AttachSharedMemoryStructs.
578
 */
579
void
580
InitProcessPhase2(void)
581
0
{
582
0
  Assert(MyProc != NULL);
583
584
  /*
585
   * Add our PGPROC to the PGPROC array in shared memory.
586
   */
587
0
  ProcArrayAdd(MyProc);
588
589
  /*
590
   * Arrange to clean that up at backend exit.
591
   */
592
0
  on_shmem_exit(RemoveProcFromArray, 0);
593
0
}
594
595
/*
596
 * InitAuxiliaryProcess -- create a PGPROC entry for an auxiliary process
597
 *
598
 * This is called by bgwriter and similar processes so that they will have a
599
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
600
 * and sema that are assigned are one of the extra ones created during
601
 * InitProcGlobal.
602
 *
603
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
604
 * locks, so we need not set up the deadlock checker.  They are never added
605
 * to the ProcArray or the sinval messaging mechanism, either.  They also
606
 * don't get a VXID assigned, since this is only useful when we actually
607
 * hold lockmgr locks.
608
 *
609
 * Startup process however uses locks but never waits for them in the
610
 * normal backend sense. Startup process also takes part in sinval messaging
611
 * as a sendOnly process, so never reads messages from sinval queue. So
612
 * Startup process does have a VXID and does show up in pg_locks.
613
 */
614
void
615
InitAuxiliaryProcess(void)
616
0
{
617
0
  PGPROC     *auxproc;
618
0
  int     proctype;
619
620
  /*
621
   * ProcGlobal should be set up already (if we are a backend, we inherit
622
   * this by fork() or EXEC_BACKEND mechanism from the postmaster).
623
   */
624
0
  if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
625
0
    elog(PANIC, "proc header uninitialized");
626
627
0
  if (MyProc != NULL)
628
0
    elog(ERROR, "you already exist");
629
630
0
  if (IsUnderPostmaster)
631
0
    RegisterPostmasterChildActive();
632
633
  /*
634
   * We use the ProcStructLock to protect assignment and releasing of
635
   * AuxiliaryProcs entries.
636
   *
637
   * While we are holding the ProcStructLock, also copy the current shared
638
   * estimate of spins_per_delay to local storage.
639
   */
640
0
  SpinLockAcquire(ProcStructLock);
641
642
0
  set_spins_per_delay(ProcGlobal->spins_per_delay);
643
644
  /*
645
   * Find a free auxproc ... *big* trouble if there isn't one ...
646
   */
647
0
  for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
648
0
  {
649
0
    auxproc = &AuxiliaryProcs[proctype];
650
0
    if (auxproc->pid == 0)
651
0
      break;
652
0
  }
653
0
  if (proctype >= NUM_AUXILIARY_PROCS)
654
0
  {
655
0
    SpinLockRelease(ProcStructLock);
656
0
    elog(FATAL, "all AuxiliaryProcs are in use");
657
0
  }
658
659
  /* Mark auxiliary proc as in use by me */
660
  /* use volatile pointer to prevent code rearrangement */
661
0
  ((volatile PGPROC *) auxproc)->pid = MyProcPid;
662
663
0
  SpinLockRelease(ProcStructLock);
664
665
0
  MyProc = auxproc;
666
0
  MyProcNumber = GetNumberFromPGProc(MyProc);
667
668
  /*
669
   * Initialize all fields of MyProc, except for those previously
670
   * initialized by InitProcGlobal.
671
   */
672
0
  dlist_node_init(&MyProc->links);
673
0
  MyProc->waitStatus = PROC_WAIT_STATUS_OK;
674
0
  MyProc->fpVXIDLock = false;
675
0
  MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
676
0
  MyProc->xid = InvalidTransactionId;
677
0
  MyProc->xmin = InvalidTransactionId;
678
0
  MyProc->vxid.procNumber = INVALID_PROC_NUMBER;
679
0
  MyProc->vxid.lxid = InvalidLocalTransactionId;
680
0
  MyProc->databaseId = InvalidOid;
681
0
  MyProc->roleId = InvalidOid;
682
0
  MyProc->tempNamespaceId = InvalidOid;
683
0
  MyProc->isRegularBackend = false;
684
0
  MyProc->delayChkptFlags = 0;
685
0
  MyProc->statusFlags = 0;
686
0
  MyProc->lwWaiting = LW_WS_NOT_WAITING;
687
0
  MyProc->lwWaitMode = 0;
688
0
  MyProc->waitLock = NULL;
689
0
  MyProc->waitProcLock = NULL;
690
0
  pg_atomic_write_u64(&MyProc->waitStart, 0);
691
#ifdef USE_ASSERT_CHECKING
692
  {
693
    int     i;
694
695
    /* Last process should have released all locks. */
696
    for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
697
      Assert(dlist_is_empty(&(MyProc->myProcLocks[i])));
698
  }
699
#endif
700
701
  /*
702
   * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
703
   * on it.  That allows us to repoint the process latch, which so far
704
   * points to process local one, to the shared one.
705
   */
706
0
  OwnLatch(&MyProc->procLatch);
707
0
  SwitchToSharedLatch();
708
709
  /* now that we have a proc, report wait events to shared memory */
710
0
  pgstat_set_wait_event_storage(&MyProc->wait_event_info);
711
712
  /* Check that group locking fields are in a proper initial state. */
713
0
  Assert(MyProc->lockGroupLeader == NULL);
714
0
  Assert(dlist_is_empty(&MyProc->lockGroupMembers));
715
716
  /*
717
   * We might be reusing a semaphore that belonged to a failed process. So
718
   * be careful and reinitialize its value here.  (This is not strictly
719
   * necessary anymore, but seems like a good idea for cleanliness.)
720
   */
721
0
  PGSemaphoreReset(MyProc->sem);
722
723
  /*
724
   * Arrange to clean up at process exit.
725
   */
726
0
  on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
727
728
  /*
729
   * Now that we have a PGPROC, we could try to acquire lightweight locks.
730
   * Initialize local state needed for them.  (Heavyweight locks cannot be
731
   * acquired in aux processes.)
732
   */
733
0
  InitLWLockAccess();
734
735
#ifdef EXEC_BACKEND
736
737
  /*
738
   * Initialize backend-local pointers to all the shared data structures.
739
   * (We couldn't do this until now because it needs LWLocks.)
740
   */
741
  if (IsUnderPostmaster)
742
    AttachSharedMemoryStructs();
743
#endif
744
0
}
745
746
/*
747
 * Used from bufmgr to share the value of the buffer that Startup waits on,
748
 * or to reset the value to "not waiting" (-1). This allows processing
749
 * of recovery conflicts for buffer pins. Set is made before backends look
750
 * at this value, so locking not required, especially since the set is
751
 * an atomic integer set operation.
752
 */
753
void
754
SetStartupBufferPinWaitBufId(int bufid)
755
0
{
756
  /* use volatile pointer to prevent code rearrangement */
757
0
  volatile PROC_HDR *procglobal = ProcGlobal;
758
759
0
  procglobal->startupBufferPinWaitBufId = bufid;
760
0
}
761
762
/*
763
 * Used by backends when they receive a request to check for buffer pin waits.
764
 */
765
int
766
GetStartupBufferPinWaitBufId(void)
767
0
{
768
  /* use volatile pointer to prevent code rearrangement */
769
0
  volatile PROC_HDR *procglobal = ProcGlobal;
770
771
0
  return procglobal->startupBufferPinWaitBufId;
772
0
}
773
774
/*
775
 * Check whether there are at least N free PGPROC objects.  If false is
776
 * returned, *nfree will be set to the number of free PGPROC objects.
777
 * Otherwise, *nfree will be set to n.
778
 *
779
 * Note: this is designed on the assumption that N will generally be small.
780
 */
781
bool
782
HaveNFreeProcs(int n, int *nfree)
783
0
{
784
0
  dlist_iter  iter;
785
786
0
  Assert(n > 0);
787
0
  Assert(nfree);
788
789
0
  SpinLockAcquire(ProcStructLock);
790
791
0
  *nfree = 0;
792
0
  dlist_foreach(iter, &ProcGlobal->freeProcs)
793
0
  {
794
0
    (*nfree)++;
795
0
    if (*nfree == n)
796
0
      break;
797
0
  }
798
799
0
  SpinLockRelease(ProcStructLock);
800
801
0
  return (*nfree == n);
802
0
}
803
804
/*
805
 * Cancel any pending wait for lock, when aborting a transaction, and revert
806
 * any strong lock count acquisition for a lock being acquired.
807
 *
808
 * (Normally, this would only happen if we accept a cancel/die
809
 * interrupt while waiting; but an ereport(ERROR) before or during the lock
810
 * wait is within the realm of possibility, too.)
811
 */
812
void
813
LockErrorCleanup(void)
814
0
{
815
0
  LOCALLOCK  *lockAwaited;
816
0
  LWLock     *partitionLock;
817
0
  DisableTimeoutParams timeouts[2];
818
819
0
  HOLD_INTERRUPTS();
820
821
0
  AbortStrongLockAcquire();
822
823
  /* Nothing to do if we weren't waiting for a lock */
824
0
  lockAwaited = GetAwaitedLock();
825
0
  if (lockAwaited == NULL)
826
0
  {
827
0
    RESUME_INTERRUPTS();
828
0
    return;
829
0
  }
830
831
  /*
832
   * Turn off the deadlock and lock timeout timers, if they are still
833
   * running (see ProcSleep).  Note we must preserve the LOCK_TIMEOUT
834
   * indicator flag, since this function is executed before
835
   * ProcessInterrupts when responding to SIGINT; else we'd lose the
836
   * knowledge that the SIGINT came from a lock timeout and not an external
837
   * source.
838
   */
839
0
  timeouts[0].id = DEADLOCK_TIMEOUT;
840
0
  timeouts[0].keep_indicator = false;
841
0
  timeouts[1].id = LOCK_TIMEOUT;
842
0
  timeouts[1].keep_indicator = true;
843
0
  disable_timeouts(timeouts, 2);
844
845
  /* Unlink myself from the wait queue, if on it (might not be anymore!) */
846
0
  partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
847
0
  LWLockAcquire(partitionLock, LW_EXCLUSIVE);
848
849
0
  if (!dlist_node_is_detached(&MyProc->links))
850
0
  {
851
    /* We could not have been granted the lock yet */
852
0
    RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
853
0
  }
854
0
  else
855
0
  {
856
    /*
857
     * Somebody kicked us off the lock queue already.  Perhaps they
858
     * granted us the lock, or perhaps they detected a deadlock. If they
859
     * did grant us the lock, we'd better remember it in our local lock
860
     * table.
861
     */
862
0
    if (MyProc->waitStatus == PROC_WAIT_STATUS_OK)
863
0
      GrantAwaitedLock();
864
0
  }
865
866
0
  ResetAwaitedLock();
867
868
0
  LWLockRelease(partitionLock);
869
870
0
  RESUME_INTERRUPTS();
871
0
}
872
873
874
/*
875
 * ProcReleaseLocks() -- release locks associated with current transaction
876
 *      at main transaction commit or abort
877
 *
878
 * At main transaction commit, we release standard locks except session locks.
879
 * At main transaction abort, we release all locks including session locks.
880
 *
881
 * Advisory locks are released only if they are transaction-level;
882
 * session-level holds remain, whether this is a commit or not.
883
 *
884
 * At subtransaction commit, we don't release any locks (so this func is not
885
 * needed at all); we will defer the releasing to the parent transaction.
886
 * At subtransaction abort, we release all locks held by the subtransaction;
887
 * this is implemented by retail releasing of the locks under control of
888
 * the ResourceOwner mechanism.
889
 */
890
void
891
ProcReleaseLocks(bool isCommit)
892
0
{
893
0
  if (!MyProc)
894
0
    return;
895
  /* If waiting, get off wait queue (should only be needed after error) */
896
0
  LockErrorCleanup();
897
  /* Release standard locks, including session-level if aborting */
898
0
  LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
899
  /* Release transaction-level advisory locks */
900
0
  LockReleaseAll(USER_LOCKMETHOD, false);
901
0
}
902
903
904
/*
905
 * RemoveProcFromArray() -- Remove this process from the shared ProcArray.
906
 */
907
static void
908
RemoveProcFromArray(int code, Datum arg)
909
0
{
910
0
  Assert(MyProc != NULL);
911
0
  ProcArrayRemove(MyProc, InvalidTransactionId);
912
0
}
913
914
/*
915
 * ProcKill() -- Destroy the per-proc data structure for
916
 *    this process. Release any of its held LW locks.
917
 */
918
static void
919
ProcKill(int code, Datum arg)
920
0
{
921
0
  PGPROC     *proc;
922
0
  dlist_head *procgloballist;
923
924
0
  Assert(MyProc != NULL);
925
926
  /* not safe if forked by system(), etc. */
927
0
  if (MyProc->pid != (int) getpid())
928
0
    elog(PANIC, "ProcKill() called in child process");
929
930
  /* Make sure we're out of the sync rep lists */
931
0
  SyncRepCleanupAtProcExit();
932
933
#ifdef USE_ASSERT_CHECKING
934
  {
935
    int     i;
936
937
    /* Last process should have released all locks. */
938
    for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
939
      Assert(dlist_is_empty(&(MyProc->myProcLocks[i])));
940
  }
941
#endif
942
943
  /*
944
   * Release any LW locks I am holding.  There really shouldn't be any, but
945
   * it's cheap to check again before we cut the knees off the LWLock
946
   * facility by releasing our PGPROC ...
947
   */
948
0
  LWLockReleaseAll();
949
950
  /* Cancel any pending condition variable sleep, too */
951
0
  ConditionVariableCancelSleep();
952
953
  /*
954
   * Detach from any lock group of which we are a member.  If the leader
955
   * exits before all other group members, its PGPROC will remain allocated
956
   * until the last group process exits; that process must return the
957
   * leader's PGPROC to the appropriate list.
958
   */
959
0
  if (MyProc->lockGroupLeader != NULL)
960
0
  {
961
0
    PGPROC     *leader = MyProc->lockGroupLeader;
962
0
    LWLock     *leader_lwlock = LockHashPartitionLockByProc(leader);
963
964
0
    LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
965
0
    Assert(!dlist_is_empty(&leader->lockGroupMembers));
966
0
    dlist_delete(&MyProc->lockGroupLink);
967
0
    if (dlist_is_empty(&leader->lockGroupMembers))
968
0
    {
969
0
      leader->lockGroupLeader = NULL;
970
0
      if (leader != MyProc)
971
0
      {
972
0
        procgloballist = leader->procgloballist;
973
974
        /* Leader exited first; return its PGPROC. */
975
0
        SpinLockAcquire(ProcStructLock);
976
0
        dlist_push_head(procgloballist, &leader->links);
977
0
        SpinLockRelease(ProcStructLock);
978
0
      }
979
0
    }
980
0
    else if (leader != MyProc)
981
0
      MyProc->lockGroupLeader = NULL;
982
0
    LWLockRelease(leader_lwlock);
983
0
  }
984
985
  /*
986
   * Reset MyLatch to the process local one.  This is so that signal
987
   * handlers et al can continue using the latch after the shared latch
988
   * isn't ours anymore.
989
   *
990
   * Similarly, stop reporting wait events to MyProc->wait_event_info.
991
   *
992
   * After that clear MyProc and disown the shared latch.
993
   */
994
0
  SwitchBackToLocalLatch();
995
0
  pgstat_reset_wait_event_storage();
996
997
0
  proc = MyProc;
998
0
  MyProc = NULL;
999
0
  MyProcNumber = INVALID_PROC_NUMBER;
1000
0
  DisownLatch(&proc->procLatch);
1001
1002
  /* Mark the proc no longer in use */
1003
0
  proc->pid = 0;
1004
0
  proc->vxid.procNumber = INVALID_PROC_NUMBER;
1005
0
  proc->vxid.lxid = InvalidTransactionId;
1006
1007
0
  procgloballist = proc->procgloballist;
1008
0
  SpinLockAcquire(ProcStructLock);
1009
1010
  /*
1011
   * If we're still a member of a locking group, that means we're a leader
1012
   * which has somehow exited before its children.  The last remaining child
1013
   * will release our PGPROC.  Otherwise, release it now.
1014
   */
1015
0
  if (proc->lockGroupLeader == NULL)
1016
0
  {
1017
    /* Since lockGroupLeader is NULL, lockGroupMembers should be empty. */
1018
0
    Assert(dlist_is_empty(&proc->lockGroupMembers));
1019
1020
    /* Return PGPROC structure (and semaphore) to appropriate freelist */
1021
0
    dlist_push_tail(procgloballist, &proc->links);
1022
0
  }
1023
1024
  /* Update shared estimate of spins_per_delay */
1025
0
  ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
1026
1027
0
  SpinLockRelease(ProcStructLock);
1028
1029
  /* wake autovac launcher if needed -- see comments in FreeWorkerInfo */
1030
0
  if (AutovacuumLauncherPid != 0)
1031
0
    kill(AutovacuumLauncherPid, SIGUSR2);
1032
0
}
1033
1034
/*
1035
 * AuxiliaryProcKill() -- Cut-down version of ProcKill for auxiliary
1036
 *    processes (bgwriter, etc).  The PGPROC and sema are not released, only
1037
 *    marked as not-in-use.
1038
 */
1039
static void
1040
AuxiliaryProcKill(int code, Datum arg)
1041
0
{
1042
0
  int     proctype = DatumGetInt32(arg);
1043
0
  PGPROC     *auxproc PG_USED_FOR_ASSERTS_ONLY;
1044
0
  PGPROC     *proc;
1045
1046
0
  Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS);
1047
1048
  /* not safe if forked by system(), etc. */
1049
0
  if (MyProc->pid != (int) getpid())
1050
0
    elog(PANIC, "AuxiliaryProcKill() called in child process");
1051
1052
0
  auxproc = &AuxiliaryProcs[proctype];
1053
1054
0
  Assert(MyProc == auxproc);
1055
1056
  /* Release any LW locks I am holding (see notes above) */
1057
0
  LWLockReleaseAll();
1058
1059
  /* Cancel any pending condition variable sleep, too */
1060
0
  ConditionVariableCancelSleep();
1061
1062
  /* look at the equivalent ProcKill() code for comments */
1063
0
  SwitchBackToLocalLatch();
1064
0
  pgstat_reset_wait_event_storage();
1065
1066
0
  proc = MyProc;
1067
0
  MyProc = NULL;
1068
0
  MyProcNumber = INVALID_PROC_NUMBER;
1069
0
  DisownLatch(&proc->procLatch);
1070
1071
0
  SpinLockAcquire(ProcStructLock);
1072
1073
  /* Mark auxiliary proc no longer in use */
1074
0
  proc->pid = 0;
1075
0
  proc->vxid.procNumber = INVALID_PROC_NUMBER;
1076
0
  proc->vxid.lxid = InvalidTransactionId;
1077
1078
  /* Update shared estimate of spins_per_delay */
1079
0
  ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
1080
1081
0
  SpinLockRelease(ProcStructLock);
1082
0
}
1083
1084
/*
1085
 * AuxiliaryPidGetProc -- get PGPROC for an auxiliary process
1086
 * given its PID
1087
 *
1088
 * Returns NULL if not found.
1089
 */
1090
PGPROC *
1091
AuxiliaryPidGetProc(int pid)
1092
0
{
1093
0
  PGPROC     *result = NULL;
1094
0
  int     index;
1095
1096
0
  if (pid == 0)       /* never match dummy PGPROCs */
1097
0
    return NULL;
1098
1099
0
  for (index = 0; index < NUM_AUXILIARY_PROCS; index++)
1100
0
  {
1101
0
    PGPROC     *proc = &AuxiliaryProcs[index];
1102
1103
0
    if (proc->pid == pid)
1104
0
    {
1105
0
      result = proc;
1106
0
      break;
1107
0
    }
1108
0
  }
1109
0
  return result;
1110
0
}
1111
1112
1113
/*
1114
 * JoinWaitQueue -- join the wait queue on the specified lock
1115
 *
1116
 * It's not actually guaranteed that we need to wait when this function is
1117
 * called, because it could be that when we try to find a position at which
1118
 * to insert ourself into the wait queue, we discover that we must be inserted
1119
 * ahead of everyone who wants a lock that conflict with ours. In that case,
1120
 * we get the lock immediately. Because of this, it's sensible for this function
1121
 * to have a dontWait argument, despite the name.
1122
 *
1123
 * On entry, the caller has already set up LOCK and PROCLOCK entries to
1124
 * reflect that we have "requested" the lock.  The caller is responsible for
1125
 * cleaning that up, if we end up not joining the queue after all.
1126
 *
1127
 * The lock table's partition lock must be held at entry, and is still held
1128
 * at exit.  The caller must release it before calling ProcSleep().
1129
 *
1130
 * Result is one of the following:
1131
 *
1132
 *  PROC_WAIT_STATUS_OK       - lock was immediately granted
1133
 *  PROC_WAIT_STATUS_WAITING  - joined the wait queue; call ProcSleep()
1134
 *  PROC_WAIT_STATUS_ERROR    - immediate deadlock was detected, or would
1135
 *                              need to wait and dontWait == true
1136
 *
1137
 * NOTES: The process queue is now a priority queue for locking.
1138
 */
1139
ProcWaitStatus
1140
JoinWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
1141
0
{
1142
0
  LOCKMODE  lockmode = locallock->tag.mode;
1143
0
  LOCK     *lock = locallock->lock;
1144
0
  PROCLOCK   *proclock = locallock->proclock;
1145
0
  uint32    hashcode = locallock->hashcode;
1146
0
  LWLock     *partitionLock PG_USED_FOR_ASSERTS_ONLY = LockHashPartitionLock(hashcode);
1147
0
  dclist_head *waitQueue = &lock->waitProcs;
1148
0
  PGPROC     *insert_before = NULL;
1149
0
  LOCKMASK  myProcHeldLocks;
1150
0
  LOCKMASK  myHeldLocks;
1151
0
  bool    early_deadlock = false;
1152
0
  PGPROC     *leader = MyProc->lockGroupLeader;
1153
1154
0
  Assert(LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE));
1155
1156
  /*
1157
   * Set bitmask of locks this process already holds on this object.
1158
   */
1159
0
  myHeldLocks = MyProc->heldLocks = proclock->holdMask;
1160
1161
  /*
1162
   * Determine which locks we're already holding.
1163
   *
1164
   * If group locking is in use, locks held by members of my locking group
1165
   * need to be included in myHeldLocks.  This is not required for relation
1166
   * extension lock which conflict among group members. However, including
1167
   * them in myHeldLocks will give group members the priority to get those
1168
   * locks as compared to other backends which are also trying to acquire
1169
   * those locks.  OTOH, we can avoid giving priority to group members for
1170
   * that kind of locks, but there doesn't appear to be a clear advantage of
1171
   * the same.
1172
   */
1173
0
  myProcHeldLocks = proclock->holdMask;
1174
0
  myHeldLocks = myProcHeldLocks;
1175
0
  if (leader != NULL)
1176
0
  {
1177
0
    dlist_iter  iter;
1178
1179
0
    dlist_foreach(iter, &lock->procLocks)
1180
0
    {
1181
0
      PROCLOCK   *otherproclock;
1182
1183
0
      otherproclock = dlist_container(PROCLOCK, lockLink, iter.cur);
1184
1185
0
      if (otherproclock->groupLeader == leader)
1186
0
        myHeldLocks |= otherproclock->holdMask;
1187
0
    }
1188
0
  }
1189
1190
  /*
1191
   * Determine where to add myself in the wait queue.
1192
   *
1193
   * Normally I should go at the end of the queue.  However, if I already
1194
   * hold locks that conflict with the request of any previous waiter, put
1195
   * myself in the queue just in front of the first such waiter. This is not
1196
   * a necessary step, since deadlock detection would move me to before that
1197
   * waiter anyway; but it's relatively cheap to detect such a conflict
1198
   * immediately, and avoid delaying till deadlock timeout.
1199
   *
1200
   * Special case: if I find I should go in front of some waiter, check to
1201
   * see if I conflict with already-held locks or the requests before that
1202
   * waiter.  If not, then just grant myself the requested lock immediately.
1203
   * This is the same as the test for immediate grant in LockAcquire, except
1204
   * we are only considering the part of the wait queue before my insertion
1205
   * point.
1206
   */
1207
0
  if (myHeldLocks != 0 && !dclist_is_empty(waitQueue))
1208
0
  {
1209
0
    LOCKMASK  aheadRequests = 0;
1210
0
    dlist_iter  iter;
1211
1212
0
    dclist_foreach(iter, waitQueue)
1213
0
    {
1214
0
      PGPROC     *proc = dlist_container(PGPROC, links, iter.cur);
1215
1216
      /*
1217
       * If we're part of the same locking group as this waiter, its
1218
       * locks neither conflict with ours nor contribute to
1219
       * aheadRequests.
1220
       */
1221
0
      if (leader != NULL && leader == proc->lockGroupLeader)
1222
0
        continue;
1223
1224
      /* Must he wait for me? */
1225
0
      if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
1226
0
      {
1227
        /* Must I wait for him ? */
1228
0
        if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
1229
0
        {
1230
          /*
1231
           * Yes, so we have a deadlock.  Easiest way to clean up
1232
           * correctly is to call RemoveFromWaitQueue(), but we
1233
           * can't do that until we are *on* the wait queue. So, set
1234
           * a flag to check below, and break out of loop.  Also,
1235
           * record deadlock info for later message.
1236
           */
1237
0
          RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
1238
0
          early_deadlock = true;
1239
0
          break;
1240
0
        }
1241
        /* I must go before this waiter.  Check special case. */
1242
0
        if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
1243
0
          !LockCheckConflicts(lockMethodTable, lockmode, lock,
1244
0
                    proclock))
1245
0
        {
1246
          /* Skip the wait and just grant myself the lock. */
1247
0
          GrantLock(lock, proclock, lockmode);
1248
0
          return PROC_WAIT_STATUS_OK;
1249
0
        }
1250
1251
        /* Put myself into wait queue before conflicting process */
1252
0
        insert_before = proc;
1253
0
        break;
1254
0
      }
1255
      /* Nope, so advance to next waiter */
1256
0
      aheadRequests |= LOCKBIT_ON(proc->waitLockMode);
1257
0
    }
1258
0
  }
1259
1260
  /*
1261
   * If we detected deadlock, give up without waiting.  This must agree with
1262
   * CheckDeadLock's recovery code.
1263
   */
1264
0
  if (early_deadlock)
1265
0
    return PROC_WAIT_STATUS_ERROR;
1266
1267
  /*
1268
   * At this point we know that we'd really need to sleep. If we've been
1269
   * commanded not to do that, bail out.
1270
   */
1271
0
  if (dontWait)
1272
0
    return PROC_WAIT_STATUS_ERROR;
1273
1274
  /*
1275
   * Insert self into queue, at the position determined above.
1276
   */
1277
0
  if (insert_before)
1278
0
    dclist_insert_before(waitQueue, &insert_before->links, &MyProc->links);
1279
0
  else
1280
0
    dclist_push_tail(waitQueue, &MyProc->links);
1281
1282
0
  lock->waitMask |= LOCKBIT_ON(lockmode);
1283
1284
  /* Set up wait information in PGPROC object, too */
1285
0
  MyProc->heldLocks = myProcHeldLocks;
1286
0
  MyProc->waitLock = lock;
1287
0
  MyProc->waitProcLock = proclock;
1288
0
  MyProc->waitLockMode = lockmode;
1289
1290
0
  MyProc->waitStatus = PROC_WAIT_STATUS_WAITING;
1291
1292
0
  return PROC_WAIT_STATUS_WAITING;
1293
0
}
1294
1295
/*
1296
 * ProcSleep -- put process to sleep waiting on lock
1297
 *
1298
 * This must be called when JoinWaitQueue() returns PROC_WAIT_STATUS_WAITING.
1299
 * Returns after the lock has been granted, or if a deadlock is detected.  Can
1300
 * also bail out with ereport(ERROR), if some other error condition, or a
1301
 * timeout or cancellation is triggered.
1302
 *
1303
 * Result is one of the following:
1304
 *
1305
 *  PROC_WAIT_STATUS_OK      - lock was granted
1306
 *  PROC_WAIT_STATUS_ERROR   - a deadlock was detected
1307
 */
1308
ProcWaitStatus
1309
ProcSleep(LOCALLOCK *locallock)
1310
0
{
1311
0
  LOCKMODE  lockmode = locallock->tag.mode;
1312
0
  LOCK     *lock = locallock->lock;
1313
0
  uint32    hashcode = locallock->hashcode;
1314
0
  LWLock     *partitionLock = LockHashPartitionLock(hashcode);
1315
0
  TimestampTz standbyWaitStart = 0;
1316
0
  bool    allow_autovacuum_cancel = true;
1317
0
  bool    logged_recovery_conflict = false;
1318
0
  ProcWaitStatus myWaitStatus;
1319
1320
  /* The caller must've armed the on-error cleanup mechanism */
1321
0
  Assert(GetAwaitedLock() == locallock);
1322
0
  Assert(!LWLockHeldByMe(partitionLock));
1323
1324
  /*
1325
   * Now that we will successfully clean up after an ereport, it's safe to
1326
   * check to see if there's a buffer pin deadlock against the Startup
1327
   * process.  Of course, that's only necessary if we're doing Hot Standby
1328
   * and are not the Startup process ourselves.
1329
   */
1330
0
  if (RecoveryInProgress() && !InRecovery)
1331
0
    CheckRecoveryConflictDeadlock();
1332
1333
  /* Reset deadlock_state before enabling the timeout handler */
1334
0
  deadlock_state = DS_NOT_YET_CHECKED;
1335
0
  got_deadlock_timeout = false;
1336
1337
  /*
1338
   * Set timer so we can wake up after awhile and check for a deadlock. If a
1339
   * deadlock is detected, the handler sets MyProc->waitStatus =
1340
   * PROC_WAIT_STATUS_ERROR, allowing us to know that we must report failure
1341
   * rather than success.
1342
   *
1343
   * By delaying the check until we've waited for a bit, we can avoid
1344
   * running the rather expensive deadlock-check code in most cases.
1345
   *
1346
   * If LockTimeout is set, also enable the timeout for that.  We can save a
1347
   * few cycles by enabling both timeout sources in one call.
1348
   *
1349
   * If InHotStandby we set lock waits slightly later for clarity with other
1350
   * code.
1351
   */
1352
0
  if (!InHotStandby)
1353
0
  {
1354
0
    if (LockTimeout > 0)
1355
0
    {
1356
0
      EnableTimeoutParams timeouts[2];
1357
1358
0
      timeouts[0].id = DEADLOCK_TIMEOUT;
1359
0
      timeouts[0].type = TMPARAM_AFTER;
1360
0
      timeouts[0].delay_ms = DeadlockTimeout;
1361
0
      timeouts[1].id = LOCK_TIMEOUT;
1362
0
      timeouts[1].type = TMPARAM_AFTER;
1363
0
      timeouts[1].delay_ms = LockTimeout;
1364
0
      enable_timeouts(timeouts, 2);
1365
0
    }
1366
0
    else
1367
0
      enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
1368
1369
    /*
1370
     * Use the current time obtained for the deadlock timeout timer as
1371
     * waitStart (i.e., the time when this process started waiting for the
1372
     * lock). Since getting the current time newly can cause overhead, we
1373
     * reuse the already-obtained time to avoid that overhead.
1374
     *
1375
     * Note that waitStart is updated without holding the lock table's
1376
     * partition lock, to avoid the overhead by additional lock
1377
     * acquisition. This can cause "waitstart" in pg_locks to become NULL
1378
     * for a very short period of time after the wait started even though
1379
     * "granted" is false. This is OK in practice because we can assume
1380
     * that users are likely to look at "waitstart" when waiting for the
1381
     * lock for a long time.
1382
     */
1383
0
    pg_atomic_write_u64(&MyProc->waitStart,
1384
0
              get_timeout_start_time(DEADLOCK_TIMEOUT));
1385
0
  }
1386
0
  else if (log_recovery_conflict_waits)
1387
0
  {
1388
    /*
1389
     * Set the wait start timestamp if logging is enabled and in hot
1390
     * standby.
1391
     */
1392
0
    standbyWaitStart = GetCurrentTimestamp();
1393
0
  }
1394
1395
  /*
1396
   * If somebody wakes us between LWLockRelease and WaitLatch, the latch
1397
   * will not wait. But a set latch does not necessarily mean that the lock
1398
   * is free now, as there are many other sources for latch sets than
1399
   * somebody releasing the lock.
1400
   *
1401
   * We process interrupts whenever the latch has been set, so cancel/die
1402
   * interrupts are processed quickly. This means we must not mind losing
1403
   * control to a cancel/die interrupt here.  We don't, because we have no
1404
   * shared-state-change work to do after being granted the lock (the
1405
   * grantor did it all).  We do have to worry about canceling the deadlock
1406
   * timeout and updating the locallock table, but if we lose control to an
1407
   * error, LockErrorCleanup will fix that up.
1408
   */
1409
0
  do
1410
0
  {
1411
0
    if (InHotStandby)
1412
0
    {
1413
0
      bool    maybe_log_conflict =
1414
0
        (standbyWaitStart != 0 && !logged_recovery_conflict);
1415
1416
      /* Set a timer and wait for that or for the lock to be granted */
1417
0
      ResolveRecoveryConflictWithLock(locallock->tag.lock,
1418
0
                      maybe_log_conflict);
1419
1420
      /*
1421
       * Emit the log message if the startup process is waiting longer
1422
       * than deadlock_timeout for recovery conflict on lock.
1423
       */
1424
0
      if (maybe_log_conflict)
1425
0
      {
1426
0
        TimestampTz now = GetCurrentTimestamp();
1427
1428
0
        if (TimestampDifferenceExceeds(standbyWaitStart, now,
1429
0
                         DeadlockTimeout))
1430
0
        {
1431
0
          VirtualTransactionId *vxids;
1432
0
          int     cnt;
1433
1434
0
          vxids = GetLockConflicts(&locallock->tag.lock,
1435
0
                       AccessExclusiveLock, &cnt);
1436
1437
          /*
1438
           * Log the recovery conflict and the list of PIDs of
1439
           * backends holding the conflicting lock. Note that we do
1440
           * logging even if there are no such backends right now
1441
           * because the startup process here has already waited
1442
           * longer than deadlock_timeout.
1443
           */
1444
0
          LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
1445
0
                    standbyWaitStart, now,
1446
0
                    cnt > 0 ? vxids : NULL, true);
1447
0
          logged_recovery_conflict = true;
1448
0
        }
1449
0
      }
1450
0
    }
1451
0
    else
1452
0
    {
1453
0
      (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
1454
0
               PG_WAIT_LOCK | locallock->tag.lock.locktag_type);
1455
0
      ResetLatch(MyLatch);
1456
      /* check for deadlocks first, as that's probably log-worthy */
1457
0
      if (got_deadlock_timeout)
1458
0
      {
1459
0
        CheckDeadLock();
1460
0
        got_deadlock_timeout = false;
1461
0
      }
1462
0
      CHECK_FOR_INTERRUPTS();
1463
0
    }
1464
1465
    /*
1466
     * waitStatus could change from PROC_WAIT_STATUS_WAITING to something
1467
     * else asynchronously.  Read it just once per loop to prevent
1468
     * surprising behavior (such as missing log messages).
1469
     */
1470
0
    myWaitStatus = *((volatile ProcWaitStatus *) &MyProc->waitStatus);
1471
1472
    /*
1473
     * If we are not deadlocked, but are waiting on an autovacuum-induced
1474
     * task, send a signal to interrupt it.
1475
     */
1476
0
    if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
1477
0
    {
1478
0
      PGPROC     *autovac = GetBlockingAutoVacuumPgproc();
1479
0
      uint8   statusFlags;
1480
0
      uint8   lockmethod_copy;
1481
0
      LOCKTAG   locktag_copy;
1482
1483
      /*
1484
       * Grab info we need, then release lock immediately.  Note this
1485
       * coding means that there is a tiny chance that the process
1486
       * terminates its current transaction and starts a different one
1487
       * before we have a change to send the signal; the worst possible
1488
       * consequence is that a for-wraparound vacuum is canceled.  But
1489
       * that could happen in any case unless we were to do kill() with
1490
       * the lock held, which is much more undesirable.
1491
       */
1492
0
      LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1493
0
      statusFlags = ProcGlobal->statusFlags[autovac->pgxactoff];
1494
0
      lockmethod_copy = lock->tag.locktag_lockmethodid;
1495
0
      locktag_copy = lock->tag;
1496
0
      LWLockRelease(ProcArrayLock);
1497
1498
      /*
1499
       * Only do it if the worker is not working to protect against Xid
1500
       * wraparound.
1501
       */
1502
0
      if ((statusFlags & PROC_IS_AUTOVACUUM) &&
1503
0
        !(statusFlags & PROC_VACUUM_FOR_WRAPAROUND))
1504
0
      {
1505
0
        int     pid = autovac->pid;
1506
1507
        /* report the case, if configured to do so */
1508
0
        if (message_level_is_interesting(DEBUG1))
1509
0
        {
1510
0
          StringInfoData locktagbuf;
1511
0
          StringInfoData logbuf;  /* errdetail for server log */
1512
1513
0
          initStringInfo(&locktagbuf);
1514
0
          initStringInfo(&logbuf);
1515
0
          DescribeLockTag(&locktagbuf, &locktag_copy);
1516
0
          appendStringInfo(&logbuf,
1517
0
                   "Process %d waits for %s on %s.",
1518
0
                   MyProcPid,
1519
0
                   GetLockmodeName(lockmethod_copy, lockmode),
1520
0
                   locktagbuf.data);
1521
1522
0
          ereport(DEBUG1,
1523
0
              (errmsg_internal("sending cancel to blocking autovacuum PID %d",
1524
0
                       pid),
1525
0
               errdetail_log("%s", logbuf.data)));
1526
1527
0
          pfree(locktagbuf.data);
1528
0
          pfree(logbuf.data);
1529
0
        }
1530
1531
        /* send the autovacuum worker Back to Old Kent Road */
1532
0
        if (kill(pid, SIGINT) < 0)
1533
0
        {
1534
          /*
1535
           * There's a race condition here: once we release the
1536
           * ProcArrayLock, it's possible for the autovac worker to
1537
           * close up shop and exit before we can do the kill().
1538
           * Therefore, we do not whinge about no-such-process.
1539
           * Other errors such as EPERM could conceivably happen if
1540
           * the kernel recycles the PID fast enough, but such cases
1541
           * seem improbable enough that it's probably best to issue
1542
           * a warning if we see some other errno.
1543
           */
1544
0
          if (errno != ESRCH)
1545
0
            ereport(WARNING,
1546
0
                (errmsg("could not send signal to process %d: %m",
1547
0
                    pid)));
1548
0
        }
1549
0
      }
1550
1551
      /* prevent signal from being sent again more than once */
1552
0
      allow_autovacuum_cancel = false;
1553
0
    }
1554
1555
    /*
1556
     * If awoken after the deadlock check interrupt has run, and
1557
     * log_lock_waits is on, then report about the wait.
1558
     */
1559
0
    if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED)
1560
0
    {
1561
0
      StringInfoData buf,
1562
0
            lock_waiters_sbuf,
1563
0
            lock_holders_sbuf;
1564
0
      const char *modename;
1565
0
      long    secs;
1566
0
      int     usecs;
1567
0
      long    msecs;
1568
0
      int     lockHoldersNum = 0;
1569
1570
0
      initStringInfo(&buf);
1571
0
      initStringInfo(&lock_waiters_sbuf);
1572
0
      initStringInfo(&lock_holders_sbuf);
1573
1574
0
      DescribeLockTag(&buf, &locallock->tag.lock);
1575
0
      modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
1576
0
                     lockmode);
1577
0
      TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT),
1578
0
                GetCurrentTimestamp(),
1579
0
                &secs, &usecs);
1580
0
      msecs = secs * 1000 + usecs / 1000;
1581
0
      usecs = usecs % 1000;
1582
1583
      /* Gather a list of all lock holders and waiters */
1584
0
      LWLockAcquire(partitionLock, LW_SHARED);
1585
0
      GetLockHoldersAndWaiters(locallock, &lock_holders_sbuf,
1586
0
                   &lock_waiters_sbuf, &lockHoldersNum);
1587
0
      LWLockRelease(partitionLock);
1588
1589
0
      if (deadlock_state == DS_SOFT_DEADLOCK)
1590
0
        ereport(LOG,
1591
0
            (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms",
1592
0
                MyProcPid, modename, buf.data, msecs, usecs),
1593
0
             (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
1594
0
                         "Processes holding the lock: %s. Wait queue: %s.",
1595
0
                         lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1596
0
      else if (deadlock_state == DS_HARD_DEADLOCK)
1597
0
      {
1598
        /*
1599
         * This message is a bit redundant with the error that will be
1600
         * reported subsequently, but in some cases the error report
1601
         * might not make it to the log (eg, if it's caught by an
1602
         * exception handler), and we want to ensure all long-wait
1603
         * events get logged.
1604
         */
1605
0
        ereport(LOG,
1606
0
            (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
1607
0
                MyProcPid, modename, buf.data, msecs, usecs),
1608
0
             (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
1609
0
                         "Processes holding the lock: %s. Wait queue: %s.",
1610
0
                         lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1611
0
      }
1612
1613
0
      if (myWaitStatus == PROC_WAIT_STATUS_WAITING)
1614
0
        ereport(LOG,
1615
0
            (errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
1616
0
                MyProcPid, modename, buf.data, msecs, usecs),
1617
0
             (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
1618
0
                         "Processes holding the lock: %s. Wait queue: %s.",
1619
0
                         lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1620
0
      else if (myWaitStatus == PROC_WAIT_STATUS_OK)
1621
0
        ereport(LOG,
1622
0
            (errmsg("process %d acquired %s on %s after %ld.%03d ms",
1623
0
                MyProcPid, modename, buf.data, msecs, usecs)));
1624
0
      else
1625
0
      {
1626
0
        Assert(myWaitStatus == PROC_WAIT_STATUS_ERROR);
1627
1628
        /*
1629
         * Currently, the deadlock checker always kicks its own
1630
         * process, which means that we'll only see
1631
         * PROC_WAIT_STATUS_ERROR when deadlock_state ==
1632
         * DS_HARD_DEADLOCK, and there's no need to print redundant
1633
         * messages.  But for completeness and future-proofing, print
1634
         * a message if it looks like someone else kicked us off the
1635
         * lock.
1636
         */
1637
0
        if (deadlock_state != DS_HARD_DEADLOCK)
1638
0
          ereport(LOG,
1639
0
              (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms",
1640
0
                  MyProcPid, modename, buf.data, msecs, usecs),
1641
0
               (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.",
1642
0
                           "Processes holding the lock: %s. Wait queue: %s.",
1643
0
                           lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data))));
1644
0
      }
1645
1646
      /*
1647
       * At this point we might still need to wait for the lock. Reset
1648
       * state so we don't print the above messages again.
1649
       */
1650
0
      deadlock_state = DS_NO_DEADLOCK;
1651
1652
0
      pfree(buf.data);
1653
0
      pfree(lock_holders_sbuf.data);
1654
0
      pfree(lock_waiters_sbuf.data);
1655
0
    }
1656
0
  } while (myWaitStatus == PROC_WAIT_STATUS_WAITING);
1657
1658
  /*
1659
   * Disable the timers, if they are still running.  As in LockErrorCleanup,
1660
   * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has
1661
   * already caused QueryCancelPending to become set, we want the cancel to
1662
   * be reported as a lock timeout, not a user cancel.
1663
   */
1664
0
  if (!InHotStandby)
1665
0
  {
1666
0
    if (LockTimeout > 0)
1667
0
    {
1668
0
      DisableTimeoutParams timeouts[2];
1669
1670
0
      timeouts[0].id = DEADLOCK_TIMEOUT;
1671
0
      timeouts[0].keep_indicator = false;
1672
0
      timeouts[1].id = LOCK_TIMEOUT;
1673
0
      timeouts[1].keep_indicator = true;
1674
0
      disable_timeouts(timeouts, 2);
1675
0
    }
1676
0
    else
1677
0
      disable_timeout(DEADLOCK_TIMEOUT, false);
1678
0
  }
1679
1680
  /*
1681
   * Emit the log message if recovery conflict on lock was resolved but the
1682
   * startup process waited longer than deadlock_timeout for it.
1683
   */
1684
0
  if (InHotStandby && logged_recovery_conflict)
1685
0
    LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
1686
0
              standbyWaitStart, GetCurrentTimestamp(),
1687
0
              NULL, false);
1688
1689
  /*
1690
   * We don't have to do anything else, because the awaker did all the
1691
   * necessary updates of the lock table and MyProc. (The caller is
1692
   * responsible for updating the local lock table.)
1693
   */
1694
0
  return myWaitStatus;
1695
0
}
1696
1697
1698
/*
1699
 * ProcWakeup -- wake up a process by setting its latch.
1700
 *
1701
 *   Also remove the process from the wait queue and set its links invalid.
1702
 *
1703
 * The appropriate lock partition lock must be held by caller.
1704
 *
1705
 * XXX: presently, this code is only used for the "success" case, and only
1706
 * works correctly for that case.  To clean up in failure case, would need
1707
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
1708
 * Hence, in practice the waitStatus parameter must be PROC_WAIT_STATUS_OK.
1709
 */
1710
void
1711
ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus)
1712
0
{
1713
0
  if (dlist_node_is_detached(&proc->links))
1714
0
    return;
1715
1716
0
  Assert(proc->waitStatus == PROC_WAIT_STATUS_WAITING);
1717
1718
  /* Remove process from wait queue */
1719
0
  dclist_delete_from_thoroughly(&proc->waitLock->waitProcs, &proc->links);
1720
1721
  /* Clean up process' state and pass it the ok/fail signal */
1722
0
  proc->waitLock = NULL;
1723
0
  proc->waitProcLock = NULL;
1724
0
  proc->waitStatus = waitStatus;
1725
0
  pg_atomic_write_u64(&MyProc->waitStart, 0);
1726
1727
  /* And awaken it */
1728
0
  SetLatch(&proc->procLatch);
1729
0
}
1730
1731
/*
1732
 * ProcLockWakeup -- routine for waking up processes when a lock is
1733
 *    released (or a prior waiter is aborted).  Scan all waiters
1734
 *    for lock, waken any that are no longer blocked.
1735
 *
1736
 * The appropriate lock partition lock must be held by caller.
1737
 */
1738
void
1739
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
1740
0
{
1741
0
  dclist_head *waitQueue = &lock->waitProcs;
1742
0
  LOCKMASK  aheadRequests = 0;
1743
0
  dlist_mutable_iter miter;
1744
1745
0
  if (dclist_is_empty(waitQueue))
1746
0
    return;
1747
1748
0
  dclist_foreach_modify(miter, waitQueue)
1749
0
  {
1750
0
    PGPROC     *proc = dlist_container(PGPROC, links, miter.cur);
1751
0
    LOCKMODE  lockmode = proc->waitLockMode;
1752
1753
    /*
1754
     * Waken if (a) doesn't conflict with requests of earlier waiters, and
1755
     * (b) doesn't conflict with already-held locks.
1756
     */
1757
0
    if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
1758
0
      !LockCheckConflicts(lockMethodTable, lockmode, lock,
1759
0
                proc->waitProcLock))
1760
0
    {
1761
      /* OK to waken */
1762
0
      GrantLock(lock, proc->waitProcLock, lockmode);
1763
      /* removes proc from the lock's waiting process queue */
1764
0
      ProcWakeup(proc, PROC_WAIT_STATUS_OK);
1765
0
    }
1766
0
    else
1767
0
    {
1768
      /*
1769
       * Lock conflicts: Don't wake, but remember requested mode for
1770
       * later checks.
1771
       */
1772
0
      aheadRequests |= LOCKBIT_ON(lockmode);
1773
0
    }
1774
0
  }
1775
0
}
1776
1777
/*
1778
 * CheckDeadLock
1779
 *
1780
 * We only get to this routine, if DEADLOCK_TIMEOUT fired while waiting for a
1781
 * lock to be released by some other process.  Check if there's a deadlock; if
1782
 * not, just return.  (But signal ProcSleep to log a message, if
1783
 * log_lock_waits is true.)  If we have a real deadlock, remove ourselves from
1784
 * the lock's wait queue and signal an error to ProcSleep.
1785
 */
1786
static void
1787
CheckDeadLock(void)
1788
0
{
1789
0
  int     i;
1790
1791
  /*
1792
   * Acquire exclusive lock on the entire shared lock data structures. Must
1793
   * grab LWLocks in partition-number order to avoid LWLock deadlock.
1794
   *
1795
   * Note that the deadlock check interrupt had better not be enabled
1796
   * anywhere that this process itself holds lock partition locks, else this
1797
   * will wait forever.  Also note that LWLockAcquire creates a critical
1798
   * section, so that this routine cannot be interrupted by cancel/die
1799
   * interrupts.
1800
   */
1801
0
  for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
1802
0
    LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
1803
1804
  /*
1805
   * Check to see if we've been awoken by anyone in the interim.
1806
   *
1807
   * If we have, we can return and resume our transaction -- happy day.
1808
   * Before we are awoken the process releasing the lock grants it to us so
1809
   * we know that we don't have to wait anymore.
1810
   *
1811
   * We check by looking to see if we've been unlinked from the wait queue.
1812
   * This is safe because we hold the lock partition lock.
1813
   */
1814
0
  if (MyProc->links.prev == NULL ||
1815
0
    MyProc->links.next == NULL)
1816
0
    goto check_done;
1817
1818
#ifdef LOCK_DEBUG
1819
  if (Debug_deadlocks)
1820
    DumpAllLocks();
1821
#endif
1822
1823
  /* Run the deadlock check, and set deadlock_state for use by ProcSleep */
1824
0
  deadlock_state = DeadLockCheck(MyProc);
1825
1826
0
  if (deadlock_state == DS_HARD_DEADLOCK)
1827
0
  {
1828
    /*
1829
     * Oops.  We have a deadlock.
1830
     *
1831
     * Get this process out of wait state. (Note: we could do this more
1832
     * efficiently by relying on lockAwaited, but use this coding to
1833
     * preserve the flexibility to kill some other transaction than the
1834
     * one detecting the deadlock.)
1835
     *
1836
     * RemoveFromWaitQueue sets MyProc->waitStatus to
1837
     * PROC_WAIT_STATUS_ERROR, so ProcSleep will report an error after we
1838
     * return from the signal handler.
1839
     */
1840
0
    Assert(MyProc->waitLock != NULL);
1841
0
    RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
1842
1843
    /*
1844
     * We're done here.  Transaction abort caused by the error that
1845
     * ProcSleep will raise will cause any other locks we hold to be
1846
     * released, thus allowing other processes to wake up; we don't need
1847
     * to do that here.  NOTE: an exception is that releasing locks we
1848
     * hold doesn't consider the possibility of waiters that were blocked
1849
     * behind us on the lock we just failed to get, and might now be
1850
     * wakable because we're not in front of them anymore.  However,
1851
     * RemoveFromWaitQueue took care of waking up any such processes.
1852
     */
1853
0
  }
1854
1855
  /*
1856
   * And release locks.  We do this in reverse order for two reasons: (1)
1857
   * Anyone else who needs more than one of the locks will be trying to lock
1858
   * them in increasing order; we don't want to release the other process
1859
   * until it can get all the locks it needs. (2) This avoids O(N^2)
1860
   * behavior inside LWLockRelease.
1861
   */
1862
0
check_done:
1863
0
  for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
1864
0
    LWLockRelease(LockHashPartitionLockByIndex(i));
1865
0
}
1866
1867
/*
1868
 * CheckDeadLockAlert - Handle the expiry of deadlock_timeout.
1869
 *
1870
 * NB: Runs inside a signal handler, be careful.
1871
 */
1872
void
1873
CheckDeadLockAlert(void)
1874
0
{
1875
0
  int     save_errno = errno;
1876
1877
0
  got_deadlock_timeout = true;
1878
1879
  /*
1880
   * Have to set the latch again, even if handle_sig_alarm already did. Back
1881
   * then got_deadlock_timeout wasn't yet set... It's unlikely that this
1882
   * ever would be a problem, but setting a set latch again is cheap.
1883
   *
1884
   * Note that, when this function runs inside procsignal_sigusr1_handler(),
1885
   * the handler function sets the latch again after the latch is set here.
1886
   */
1887
0
  SetLatch(MyLatch);
1888
0
  errno = save_errno;
1889
0
}
1890
1891
/*
1892
 * GetLockHoldersAndWaiters - get lock holders and waiters for a lock
1893
 *
1894
 * Fill lock_holders_sbuf and lock_waiters_sbuf with the PIDs of processes holding
1895
 * and waiting for the lock, and set lockHoldersNum to the number of lock holders.
1896
 *
1897
 * The lock table's partition lock must be held on entry and remains held on exit.
1898
 */
1899
void
1900
GetLockHoldersAndWaiters(LOCALLOCK *locallock, StringInfo lock_holders_sbuf,
1901
             StringInfo lock_waiters_sbuf, int *lockHoldersNum)
1902
0
{
1903
0
  dlist_iter  proc_iter;
1904
0
  PROCLOCK   *curproclock;
1905
0
  LOCK     *lock = locallock->lock;
1906
0
  bool    first_holder = true,
1907
0
        first_waiter = true;
1908
1909
#ifdef USE_ASSERT_CHECKING
1910
  {
1911
    uint32    hashcode = locallock->hashcode;
1912
    LWLock     *partitionLock = LockHashPartitionLock(hashcode);
1913
1914
    Assert(LWLockHeldByMe(partitionLock));
1915
  }
1916
#endif
1917
1918
0
  *lockHoldersNum = 0;
1919
1920
  /*
1921
   * Loop over the lock's procLocks to gather a list of all holders and
1922
   * waiters. Thus we will be able to provide more detailed information for
1923
   * lock debugging purposes.
1924
   *
1925
   * lock->procLocks contains all processes which hold or wait for this
1926
   * lock.
1927
   */
1928
0
  dlist_foreach(proc_iter, &lock->procLocks)
1929
0
  {
1930
0
    curproclock =
1931
0
      dlist_container(PROCLOCK, lockLink, proc_iter.cur);
1932
1933
    /*
1934
     * We are a waiter if myProc->waitProcLock == curproclock; we are a
1935
     * holder if it is NULL or something different.
1936
     */
1937
0
    if (curproclock->tag.myProc->waitProcLock == curproclock)
1938
0
    {
1939
0
      if (first_waiter)
1940
0
      {
1941
0
        appendStringInfo(lock_waiters_sbuf, "%d",
1942
0
                 curproclock->tag.myProc->pid);
1943
0
        first_waiter = false;
1944
0
      }
1945
0
      else
1946
0
        appendStringInfo(lock_waiters_sbuf, ", %d",
1947
0
                 curproclock->tag.myProc->pid);
1948
0
    }
1949
0
    else
1950
0
    {
1951
0
      if (first_holder)
1952
0
      {
1953
0
        appendStringInfo(lock_holders_sbuf, "%d",
1954
0
                 curproclock->tag.myProc->pid);
1955
0
        first_holder = false;
1956
0
      }
1957
0
      else
1958
0
        appendStringInfo(lock_holders_sbuf, ", %d",
1959
0
                 curproclock->tag.myProc->pid);
1960
1961
0
      (*lockHoldersNum)++;
1962
0
    }
1963
0
  }
1964
0
}
1965
1966
/*
1967
 * ProcWaitForSignal - wait for a signal from another backend.
1968
 *
1969
 * As this uses the generic process latch the caller has to be robust against
1970
 * unrelated wakeups: Always check that the desired state has occurred, and
1971
 * wait again if not.
1972
 */
1973
void
1974
ProcWaitForSignal(uint32 wait_event_info)
1975
0
{
1976
0
  (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
1977
0
           wait_event_info);
1978
0
  ResetLatch(MyLatch);
1979
0
  CHECK_FOR_INTERRUPTS();
1980
0
}
1981
1982
/*
1983
 * ProcSendSignal - set the latch of a backend identified by ProcNumber
1984
 */
1985
void
1986
ProcSendSignal(ProcNumber procNumber)
1987
0
{
1988
0
  if (procNumber < 0 || procNumber >= ProcGlobal->allProcCount)
1989
0
    elog(ERROR, "procNumber out of range");
1990
1991
0
  SetLatch(&ProcGlobal->allProcs[procNumber].procLatch);
1992
0
}
1993
1994
/*
1995
 * BecomeLockGroupLeader - designate process as lock group leader
1996
 *
1997
 * Once this function has returned, other processes can join the lock group
1998
 * by calling BecomeLockGroupMember.
1999
 */
2000
void
2001
BecomeLockGroupLeader(void)
2002
0
{
2003
0
  LWLock     *leader_lwlock;
2004
2005
  /* If we already did it, we don't need to do it again. */
2006
0
  if (MyProc->lockGroupLeader == MyProc)
2007
0
    return;
2008
2009
  /* We had better not be a follower. */
2010
0
  Assert(MyProc->lockGroupLeader == NULL);
2011
2012
  /* Create single-member group, containing only ourselves. */
2013
0
  leader_lwlock = LockHashPartitionLockByProc(MyProc);
2014
0
  LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
2015
0
  MyProc->lockGroupLeader = MyProc;
2016
0
  dlist_push_head(&MyProc->lockGroupMembers, &MyProc->lockGroupLink);
2017
0
  LWLockRelease(leader_lwlock);
2018
0
}
2019
2020
/*
2021
 * BecomeLockGroupMember - designate process as lock group member
2022
 *
2023
 * This is pretty straightforward except for the possibility that the leader
2024
 * whose group we're trying to join might exit before we manage to do so;
2025
 * and the PGPROC might get recycled for an unrelated process.  To avoid
2026
 * that, we require the caller to pass the PID of the intended PGPROC as
2027
 * an interlock.  Returns true if we successfully join the intended lock
2028
 * group, and false if not.
2029
 */
2030
bool
2031
BecomeLockGroupMember(PGPROC *leader, int pid)
2032
0
{
2033
0
  LWLock     *leader_lwlock;
2034
0
  bool    ok = false;
2035
2036
  /* Group leader can't become member of group */
2037
0
  Assert(MyProc != leader);
2038
2039
  /* Can't already be a member of a group */
2040
0
  Assert(MyProc->lockGroupLeader == NULL);
2041
2042
  /* PID must be valid. */
2043
0
  Assert(pid != 0);
2044
2045
  /*
2046
   * Get lock protecting the group fields.  Note LockHashPartitionLockByProc
2047
   * calculates the proc number based on the PGPROC slot without looking at
2048
   * its contents, so we will acquire the correct lock even if the leader
2049
   * PGPROC is in process of being recycled.
2050
   */
2051
0
  leader_lwlock = LockHashPartitionLockByProc(leader);
2052
0
  LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
2053
2054
  /* Is this the leader we're looking for? */
2055
0
  if (leader->pid == pid && leader->lockGroupLeader == leader)
2056
0
  {
2057
    /* OK, join the group */
2058
0
    ok = true;
2059
0
    MyProc->lockGroupLeader = leader;
2060
0
    dlist_push_tail(&leader->lockGroupMembers, &MyProc->lockGroupLink);
2061
0
  }
2062
0
  LWLockRelease(leader_lwlock);
2063
2064
0
  return ok;
2065
0
}