Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/statistics/attribute_stats.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 * attribute_stats.c
3
 *
4
 *    PostgreSQL relation attribute statistics manipulation.
5
 *
6
 * Code supporting the direct import of relation attribute statistics, similar
7
 * to what is done by the ANALYZE command.
8
 *
9
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10
 * Portions Copyright (c) 1994, Regents of the University of California
11
 *
12
 * IDENTIFICATION
13
 *       src/backend/statistics/attribute_stats.c
14
 *
15
 *-------------------------------------------------------------------------
16
 */
17
18
#include "postgres.h"
19
20
#include "access/heapam.h"
21
#include "catalog/indexing.h"
22
#include "catalog/pg_collation.h"
23
#include "catalog/pg_operator.h"
24
#include "nodes/nodeFuncs.h"
25
#include "statistics/statistics.h"
26
#include "statistics/stat_utils.h"
27
#include "utils/array.h"
28
#include "utils/builtins.h"
29
#include "utils/fmgroids.h"
30
#include "utils/lsyscache.h"
31
#include "utils/syscache.h"
32
33
0
#define DEFAULT_NULL_FRAC      Float4GetDatum(0.0)
34
0
#define DEFAULT_AVG_WIDTH      Int32GetDatum(0) /* unknown */
35
0
#define DEFAULT_N_DISTINCT     Float4GetDatum(0.0)  /* unknown */
36
37
enum attribute_stats_argnum
38
{
39
  ATTRELSCHEMA_ARG = 0,
40
  ATTRELNAME_ARG,
41
  ATTNAME_ARG,
42
  ATTNUM_ARG,
43
  INHERITED_ARG,
44
  NULL_FRAC_ARG,
45
  AVG_WIDTH_ARG,
46
  N_DISTINCT_ARG,
47
  MOST_COMMON_VALS_ARG,
48
  MOST_COMMON_FREQS_ARG,
49
  HISTOGRAM_BOUNDS_ARG,
50
  CORRELATION_ARG,
51
  MOST_COMMON_ELEMS_ARG,
52
  MOST_COMMON_ELEM_FREQS_ARG,
53
  ELEM_COUNT_HISTOGRAM_ARG,
54
  RANGE_LENGTH_HISTOGRAM_ARG,
55
  RANGE_EMPTY_FRAC_ARG,
56
  RANGE_BOUNDS_HISTOGRAM_ARG,
57
  NUM_ATTRIBUTE_STATS_ARGS
58
};
59
60
static struct StatsArgInfo attarginfo[] =
61
{
62
  [ATTRELSCHEMA_ARG] = {"schemaname", TEXTOID},
63
  [ATTRELNAME_ARG] = {"relname", TEXTOID},
64
  [ATTNAME_ARG] = {"attname", TEXTOID},
65
  [ATTNUM_ARG] = {"attnum", INT2OID},
66
  [INHERITED_ARG] = {"inherited", BOOLOID},
67
  [NULL_FRAC_ARG] = {"null_frac", FLOAT4OID},
68
  [AVG_WIDTH_ARG] = {"avg_width", INT4OID},
69
  [N_DISTINCT_ARG] = {"n_distinct", FLOAT4OID},
70
  [MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTOID},
71
  [MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT4ARRAYOID},
72
  [HISTOGRAM_BOUNDS_ARG] = {"histogram_bounds", TEXTOID},
73
  [CORRELATION_ARG] = {"correlation", FLOAT4OID},
74
  [MOST_COMMON_ELEMS_ARG] = {"most_common_elems", TEXTOID},
75
  [MOST_COMMON_ELEM_FREQS_ARG] = {"most_common_elem_freqs", FLOAT4ARRAYOID},
76
  [ELEM_COUNT_HISTOGRAM_ARG] = {"elem_count_histogram", FLOAT4ARRAYOID},
77
  [RANGE_LENGTH_HISTOGRAM_ARG] = {"range_length_histogram", TEXTOID},
78
  [RANGE_EMPTY_FRAC_ARG] = {"range_empty_frac", FLOAT4OID},
79
  [RANGE_BOUNDS_HISTOGRAM_ARG] = {"range_bounds_histogram", TEXTOID},
80
  [NUM_ATTRIBUTE_STATS_ARGS] = {0}
81
};
82
83
enum clear_attribute_stats_argnum
84
{
85
  C_ATTRELSCHEMA_ARG = 0,
86
  C_ATTRELNAME_ARG,
87
  C_ATTNAME_ARG,
88
  C_INHERITED_ARG,
89
  C_NUM_ATTRIBUTE_STATS_ARGS
90
};
91
92
static struct StatsArgInfo cleararginfo[] =
93
{
94
  [C_ATTRELSCHEMA_ARG] = {"relation", TEXTOID},
95
  [C_ATTRELNAME_ARG] = {"relation", TEXTOID},
96
  [C_ATTNAME_ARG] = {"attname", TEXTOID},
97
  [C_INHERITED_ARG] = {"inherited", BOOLOID},
98
  [C_NUM_ATTRIBUTE_STATS_ARGS] = {0}
99
};
100
101
static bool attribute_statistics_update(FunctionCallInfo fcinfo);
102
static Node *get_attr_expr(Relation rel, int attnum);
103
static void get_attr_stat_type(Oid reloid, AttrNumber attnum,
104
                 Oid *atttypid, int32 *atttypmod,
105
                 char *atttyptype, Oid *atttypcoll,
106
                 Oid *eq_opr, Oid *lt_opr);
107
static bool get_elem_stat_type(Oid atttypid, char atttyptype,
108
                 Oid *elemtypid, Oid *elem_eq_opr);
109
static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d,
110
                 Oid typid, int32 typmod, bool *ok);
111
static void set_stats_slot(Datum *values, bool *nulls, bool *replaces,
112
               int16 stakind, Oid staop, Oid stacoll,
113
               Datum stanumbers, bool stanumbers_isnull,
114
               Datum stavalues, bool stavalues_isnull);
115
static void upsert_pg_statistic(Relation starel, HeapTuple oldtup,
116
                Datum *values, bool *nulls, bool *replaces);
117
static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit);
118
static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
119
                   Datum *values, bool *nulls, bool *replaces);
120
121
/*
122
 * Insert or Update Attribute Statistics
123
 *
124
 * See pg_statistic.h for an explanation of how each statistic kind is
125
 * stored. Custom statistics kinds are not supported.
126
 *
127
 * Depending on the statistics kind, we need to derive information from the
128
 * attribute for which we're storing the stats. For instance, the MCVs are
129
 * stored as an anyarray, and the representation of the array needs to store
130
 * the correct element type, which must be derived from the attribute.
131
 *
132
 * Major errors, such as the table not existing, the attribute not existing,
133
 * or a permissions failure are always reported at ERROR. Other errors, such
134
 * as a conversion failure on one statistic kind, are reported as a WARNING
135
 * and other statistic kinds may still be updated.
136
 */
137
static bool
138
attribute_statistics_update(FunctionCallInfo fcinfo)
139
0
{
140
0
  char     *nspname;
141
0
  char     *relname;
142
0
  Oid     reloid;
143
0
  char     *attname;
144
0
  AttrNumber  attnum;
145
0
  bool    inherited;
146
147
0
  Relation  starel;
148
0
  HeapTuple statup;
149
150
0
  Oid     atttypid = InvalidOid;
151
0
  int32   atttypmod;
152
0
  char    atttyptype;
153
0
  Oid     atttypcoll = InvalidOid;
154
0
  Oid     eq_opr = InvalidOid;
155
0
  Oid     lt_opr = InvalidOid;
156
157
0
  Oid     elemtypid = InvalidOid;
158
0
  Oid     elem_eq_opr = InvalidOid;
159
160
0
  FmgrInfo  array_in_fn;
161
162
0
  bool    do_mcv = !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) &&
163
0
    !PG_ARGISNULL(MOST_COMMON_VALS_ARG);
164
0
  bool    do_histogram = !PG_ARGISNULL(HISTOGRAM_BOUNDS_ARG);
165
0
  bool    do_correlation = !PG_ARGISNULL(CORRELATION_ARG);
166
0
  bool    do_mcelem = !PG_ARGISNULL(MOST_COMMON_ELEMS_ARG) &&
167
0
    !PG_ARGISNULL(MOST_COMMON_ELEM_FREQS_ARG);
168
0
  bool    do_dechist = !PG_ARGISNULL(ELEM_COUNT_HISTOGRAM_ARG);
169
0
  bool    do_bounds_histogram = !PG_ARGISNULL(RANGE_BOUNDS_HISTOGRAM_ARG);
170
0
  bool    do_range_length_histogram = !PG_ARGISNULL(RANGE_LENGTH_HISTOGRAM_ARG) &&
171
0
    !PG_ARGISNULL(RANGE_EMPTY_FRAC_ARG);
172
173
0
  Datum   values[Natts_pg_statistic] = {0};
174
0
  bool    nulls[Natts_pg_statistic] = {0};
175
0
  bool    replaces[Natts_pg_statistic] = {0};
176
177
0
  bool    result = true;
178
179
0
  stats_check_required_arg(fcinfo, attarginfo, ATTRELSCHEMA_ARG);
180
0
  stats_check_required_arg(fcinfo, attarginfo, ATTRELNAME_ARG);
181
182
0
  nspname = TextDatumGetCString(PG_GETARG_DATUM(ATTRELSCHEMA_ARG));
183
0
  relname = TextDatumGetCString(PG_GETARG_DATUM(ATTRELNAME_ARG));
184
185
0
  reloid = stats_lookup_relid(nspname, relname);
186
187
0
  if (RecoveryInProgress())
188
0
    ereport(ERROR,
189
0
        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
190
0
         errmsg("recovery is in progress"),
191
0
         errhint("Statistics cannot be modified during recovery.")));
192
193
  /* lock before looking up attribute */
194
0
  stats_lock_check_privileges(reloid);
195
196
  /* user can specify either attname or attnum, but not both */
197
0
  if (!PG_ARGISNULL(ATTNAME_ARG))
198
0
  {
199
0
    if (!PG_ARGISNULL(ATTNUM_ARG))
200
0
      ereport(ERROR,
201
0
          (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
202
0
           errmsg("cannot specify both attname and attnum")));
203
0
    attname = TextDatumGetCString(PG_GETARG_DATUM(ATTNAME_ARG));
204
0
    attnum = get_attnum(reloid, attname);
205
    /* note that this test covers attisdropped cases too: */
206
0
    if (attnum == InvalidAttrNumber)
207
0
      ereport(ERROR,
208
0
          (errcode(ERRCODE_UNDEFINED_COLUMN),
209
0
           errmsg("column \"%s\" of relation \"%s\" does not exist",
210
0
              attname, relname)));
211
0
  }
212
0
  else if (!PG_ARGISNULL(ATTNUM_ARG))
213
0
  {
214
0
    attnum = PG_GETARG_INT16(ATTNUM_ARG);
215
0
    attname = get_attname(reloid, attnum, true);
216
    /* annoyingly, get_attname doesn't check attisdropped */
217
0
    if (attname == NULL ||
218
0
      !SearchSysCacheExistsAttName(reloid, attname))
219
0
      ereport(ERROR,
220
0
          (errcode(ERRCODE_UNDEFINED_COLUMN),
221
0
           errmsg("column %d of relation \"%s\" does not exist",
222
0
              attnum, relname)));
223
0
  }
224
0
  else
225
0
  {
226
0
    ereport(ERROR,
227
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
228
0
         errmsg("must specify either attname or attnum")));
229
0
    attname = NULL;     /* keep compiler quiet */
230
0
    attnum = 0;
231
0
  }
232
233
0
  if (attnum < 0)
234
0
    ereport(ERROR,
235
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
236
0
         errmsg("cannot modify statistics on system column \"%s\"",
237
0
            attname)));
238
239
0
  stats_check_required_arg(fcinfo, attarginfo, INHERITED_ARG);
240
0
  inherited = PG_GETARG_BOOL(INHERITED_ARG);
241
242
  /*
243
   * Check argument sanity. If some arguments are unusable, emit a WARNING
244
   * and set the corresponding argument to NULL in fcinfo.
245
   */
246
247
0
  if (!stats_check_arg_array(fcinfo, attarginfo, MOST_COMMON_FREQS_ARG))
248
0
  {
249
0
    do_mcv = false;
250
0
    result = false;
251
0
  }
252
253
0
  if (!stats_check_arg_array(fcinfo, attarginfo, MOST_COMMON_ELEM_FREQS_ARG))
254
0
  {
255
0
    do_mcelem = false;
256
0
    result = false;
257
0
  }
258
0
  if (!stats_check_arg_array(fcinfo, attarginfo, ELEM_COUNT_HISTOGRAM_ARG))
259
0
  {
260
0
    do_dechist = false;
261
0
    result = false;
262
0
  }
263
264
0
  if (!stats_check_arg_pair(fcinfo, attarginfo,
265
0
                MOST_COMMON_VALS_ARG, MOST_COMMON_FREQS_ARG))
266
0
  {
267
0
    do_mcv = false;
268
0
    result = false;
269
0
  }
270
271
0
  if (!stats_check_arg_pair(fcinfo, attarginfo,
272
0
                MOST_COMMON_ELEMS_ARG,
273
0
                MOST_COMMON_ELEM_FREQS_ARG))
274
0
  {
275
0
    do_mcelem = false;
276
0
    result = false;
277
0
  }
278
279
0
  if (!stats_check_arg_pair(fcinfo, attarginfo,
280
0
                RANGE_LENGTH_HISTOGRAM_ARG,
281
0
                RANGE_EMPTY_FRAC_ARG))
282
0
  {
283
0
    do_range_length_histogram = false;
284
0
    result = false;
285
0
  }
286
287
  /* derive information from attribute */
288
0
  get_attr_stat_type(reloid, attnum,
289
0
             &atttypid, &atttypmod,
290
0
             &atttyptype, &atttypcoll,
291
0
             &eq_opr, &lt_opr);
292
293
  /* if needed, derive element type */
294
0
  if (do_mcelem || do_dechist)
295
0
  {
296
0
    if (!get_elem_stat_type(atttypid, atttyptype,
297
0
                &elemtypid, &elem_eq_opr))
298
0
    {
299
0
      ereport(WARNING,
300
0
          (errmsg("unable to determine element type of attribute \"%s\"", attname),
301
0
           errdetail("Cannot set STATISTIC_KIND_MCELEM or STATISTIC_KIND_DECHIST.")));
302
0
      elemtypid = InvalidOid;
303
0
      elem_eq_opr = InvalidOid;
304
305
0
      do_mcelem = false;
306
0
      do_dechist = false;
307
0
      result = false;
308
0
    }
309
0
  }
310
311
  /* histogram and correlation require less-than operator */
312
0
  if ((do_histogram || do_correlation) && !OidIsValid(lt_opr))
313
0
  {
314
0
    ereport(WARNING,
315
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
316
0
         errmsg("could not determine less-than operator for attribute \"%s\"", attname),
317
0
         errdetail("Cannot set STATISTIC_KIND_HISTOGRAM or STATISTIC_KIND_CORRELATION.")));
318
319
0
    do_histogram = false;
320
0
    do_correlation = false;
321
0
    result = false;
322
0
  }
323
324
  /* only range types can have range stats */
325
0
  if ((do_range_length_histogram || do_bounds_histogram) &&
326
0
    !(atttyptype == TYPTYPE_RANGE || atttyptype == TYPTYPE_MULTIRANGE))
327
0
  {
328
0
    ereport(WARNING,
329
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
330
0
         errmsg("attribute \"%s\" is not a range type", attname),
331
0
         errdetail("Cannot set STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM or STATISTIC_KIND_BOUNDS_HISTOGRAM.")));
332
333
0
    do_bounds_histogram = false;
334
0
    do_range_length_histogram = false;
335
0
    result = false;
336
0
  }
337
338
0
  fmgr_info(F_ARRAY_IN, &array_in_fn);
339
340
0
  starel = table_open(StatisticRelationId, RowExclusiveLock);
341
342
0
  statup = SearchSysCache3(STATRELATTINH, reloid, attnum, inherited);
343
344
  /* initialize from existing tuple if exists */
345
0
  if (HeapTupleIsValid(statup))
346
0
    heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls);
347
0
  else
348
0
    init_empty_stats_tuple(reloid, attnum, inherited, values, nulls,
349
0
                 replaces);
350
351
  /* if specified, set to argument values */
352
0
  if (!PG_ARGISNULL(NULL_FRAC_ARG))
353
0
  {
354
0
    values[Anum_pg_statistic_stanullfrac - 1] = PG_GETARG_DATUM(NULL_FRAC_ARG);
355
0
    replaces[Anum_pg_statistic_stanullfrac - 1] = true;
356
0
  }
357
0
  if (!PG_ARGISNULL(AVG_WIDTH_ARG))
358
0
  {
359
0
    values[Anum_pg_statistic_stawidth - 1] = PG_GETARG_DATUM(AVG_WIDTH_ARG);
360
0
    replaces[Anum_pg_statistic_stawidth - 1] = true;
361
0
  }
362
0
  if (!PG_ARGISNULL(N_DISTINCT_ARG))
363
0
  {
364
0
    values[Anum_pg_statistic_stadistinct - 1] = PG_GETARG_DATUM(N_DISTINCT_ARG);
365
0
    replaces[Anum_pg_statistic_stadistinct - 1] = true;
366
0
  }
367
368
  /* STATISTIC_KIND_MCV */
369
0
  if (do_mcv)
370
0
  {
371
0
    bool    converted;
372
0
    Datum   stanumbers = PG_GETARG_DATUM(MOST_COMMON_FREQS_ARG);
373
0
    Datum   stavalues = text_to_stavalues("most_common_vals",
374
0
                          &array_in_fn,
375
0
                          PG_GETARG_DATUM(MOST_COMMON_VALS_ARG),
376
0
                          atttypid, atttypmod,
377
0
                          &converted);
378
379
0
    if (converted)
380
0
    {
381
0
      set_stats_slot(values, nulls, replaces,
382
0
               STATISTIC_KIND_MCV,
383
0
               eq_opr, atttypcoll,
384
0
               stanumbers, false, stavalues, false);
385
0
    }
386
0
    else
387
0
      result = false;
388
0
  }
389
390
  /* STATISTIC_KIND_HISTOGRAM */
391
0
  if (do_histogram)
392
0
  {
393
0
    Datum   stavalues;
394
0
    bool    converted = false;
395
396
0
    stavalues = text_to_stavalues("histogram_bounds",
397
0
                    &array_in_fn,
398
0
                    PG_GETARG_DATUM(HISTOGRAM_BOUNDS_ARG),
399
0
                    atttypid, atttypmod,
400
0
                    &converted);
401
402
0
    if (converted)
403
0
    {
404
0
      set_stats_slot(values, nulls, replaces,
405
0
               STATISTIC_KIND_HISTOGRAM,
406
0
               lt_opr, atttypcoll,
407
0
               0, true, stavalues, false);
408
0
    }
409
0
    else
410
0
      result = false;
411
0
  }
412
413
  /* STATISTIC_KIND_CORRELATION */
414
0
  if (do_correlation)
415
0
  {
416
0
    Datum   elems[] = {PG_GETARG_DATUM(CORRELATION_ARG)};
417
0
    ArrayType  *arry = construct_array_builtin(elems, 1, FLOAT4OID);
418
0
    Datum   stanumbers = PointerGetDatum(arry);
419
420
0
    set_stats_slot(values, nulls, replaces,
421
0
             STATISTIC_KIND_CORRELATION,
422
0
             lt_opr, atttypcoll,
423
0
             stanumbers, false, 0, true);
424
0
  }
425
426
  /* STATISTIC_KIND_MCELEM */
427
0
  if (do_mcelem)
428
0
  {
429
0
    Datum   stanumbers = PG_GETARG_DATUM(MOST_COMMON_ELEM_FREQS_ARG);
430
0
    bool    converted = false;
431
0
    Datum   stavalues;
432
433
0
    stavalues = text_to_stavalues("most_common_elems",
434
0
                    &array_in_fn,
435
0
                    PG_GETARG_DATUM(MOST_COMMON_ELEMS_ARG),
436
0
                    elemtypid, atttypmod,
437
0
                    &converted);
438
439
0
    if (converted)
440
0
    {
441
0
      set_stats_slot(values, nulls, replaces,
442
0
               STATISTIC_KIND_MCELEM,
443
0
               elem_eq_opr, atttypcoll,
444
0
               stanumbers, false, stavalues, false);
445
0
    }
446
0
    else
447
0
      result = false;
448
0
  }
449
450
  /* STATISTIC_KIND_DECHIST */
451
0
  if (do_dechist)
452
0
  {
453
0
    Datum   stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG);
454
455
0
    set_stats_slot(values, nulls, replaces,
456
0
             STATISTIC_KIND_DECHIST,
457
0
             elem_eq_opr, atttypcoll,
458
0
             stanumbers, false, 0, true);
459
0
  }
460
461
  /*
462
   * STATISTIC_KIND_BOUNDS_HISTOGRAM
463
   *
464
   * This stakind appears before STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM even
465
   * though it is numerically greater, and all other stakinds appear in
466
   * numerical order. We duplicate this quirk for consistency.
467
   */
468
0
  if (do_bounds_histogram)
469
0
  {
470
0
    bool    converted = false;
471
0
    Datum   stavalues;
472
473
0
    stavalues = text_to_stavalues("range_bounds_histogram",
474
0
                    &array_in_fn,
475
0
                    PG_GETARG_DATUM(RANGE_BOUNDS_HISTOGRAM_ARG),
476
0
                    atttypid, atttypmod,
477
0
                    &converted);
478
479
0
    if (converted)
480
0
    {
481
0
      set_stats_slot(values, nulls, replaces,
482
0
               STATISTIC_KIND_BOUNDS_HISTOGRAM,
483
0
               InvalidOid, InvalidOid,
484
0
               0, true, stavalues, false);
485
0
    }
486
0
    else
487
0
      result = false;
488
0
  }
489
490
  /* STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM */
491
0
  if (do_range_length_histogram)
492
0
  {
493
    /* The anyarray is always a float8[] for this stakind */
494
0
    Datum   elems[] = {PG_GETARG_DATUM(RANGE_EMPTY_FRAC_ARG)};
495
0
    ArrayType  *arry = construct_array_builtin(elems, 1, FLOAT4OID);
496
0
    Datum   stanumbers = PointerGetDatum(arry);
497
498
0
    bool    converted = false;
499
0
    Datum   stavalues;
500
501
0
    stavalues = text_to_stavalues("range_length_histogram",
502
0
                    &array_in_fn,
503
0
                    PG_GETARG_DATUM(RANGE_LENGTH_HISTOGRAM_ARG),
504
0
                    FLOAT8OID, 0, &converted);
505
506
0
    if (converted)
507
0
    {
508
0
      set_stats_slot(values, nulls, replaces,
509
0
               STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
510
0
               Float8LessOperator, InvalidOid,
511
0
               stanumbers, false, stavalues, false);
512
0
    }
513
0
    else
514
0
      result = false;
515
0
  }
516
517
0
  upsert_pg_statistic(starel, statup, values, nulls, replaces);
518
519
0
  if (HeapTupleIsValid(statup))
520
0
    ReleaseSysCache(statup);
521
0
  table_close(starel, RowExclusiveLock);
522
523
0
  return result;
524
0
}
525
526
/*
527
 * If this relation is an index and that index has expressions in it, and
528
 * the attnum specified is known to be an expression, then we must walk
529
 * the list attributes up to the specified attnum to get the right
530
 * expression.
531
 */
532
static Node *
533
get_attr_expr(Relation rel, int attnum)
534
0
{
535
0
  List     *index_exprs;
536
0
  ListCell   *indexpr_item;
537
538
  /* relation is not an index */
539
0
  if (rel->rd_rel->relkind != RELKIND_INDEX &&
540
0
    rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
541
0
    return NULL;
542
543
0
  index_exprs = RelationGetIndexExpressions(rel);
544
545
  /* index has no expressions to give */
546
0
  if (index_exprs == NIL)
547
0
    return NULL;
548
549
  /*
550
   * The index attnum points directly to a relation attnum, then it's not an
551
   * expression attribute.
552
   */
553
0
  if (rel->rd_index->indkey.values[attnum - 1] != 0)
554
0
    return NULL;
555
556
0
  indexpr_item = list_head(rel->rd_indexprs);
557
558
0
  for (int i = 0; i < attnum - 1; i++)
559
0
    if (rel->rd_index->indkey.values[i] == 0)
560
0
      indexpr_item = lnext(rel->rd_indexprs, indexpr_item);
561
562
0
  if (indexpr_item == NULL) /* shouldn't happen */
563
0
    elog(ERROR, "too few entries in indexprs list");
564
565
0
  return (Node *) lfirst(indexpr_item);
566
0
}
567
568
/*
569
 * Derive type information from the attribute.
570
 */
571
static void
572
get_attr_stat_type(Oid reloid, AttrNumber attnum,
573
           Oid *atttypid, int32 *atttypmod,
574
           char *atttyptype, Oid *atttypcoll,
575
           Oid *eq_opr, Oid *lt_opr)
576
0
{
577
0
  Relation  rel = relation_open(reloid, AccessShareLock);
578
0
  Form_pg_attribute attr;
579
0
  HeapTuple atup;
580
0
  Node     *expr;
581
0
  TypeCacheEntry *typcache;
582
583
0
  atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid),
584
0
               Int16GetDatum(attnum));
585
586
  /* Attribute not found */
587
0
  if (!HeapTupleIsValid(atup))
588
0
    ereport(ERROR,
589
0
        (errcode(ERRCODE_UNDEFINED_COLUMN),
590
0
         errmsg("attribute %d of relation \"%s\" does not exist",
591
0
            attnum, RelationGetRelationName(rel))));
592
593
0
  attr = (Form_pg_attribute) GETSTRUCT(atup);
594
595
0
  if (attr->attisdropped)
596
0
    ereport(ERROR,
597
0
        (errcode(ERRCODE_UNDEFINED_COLUMN),
598
0
         errmsg("attribute %d of relation \"%s\" does not exist",
599
0
            attnum, RelationGetRelationName(rel))));
600
601
0
  expr = get_attr_expr(rel, attr->attnum);
602
603
  /*
604
   * When analyzing an expression index, believe the expression tree's type
605
   * not the column datatype --- the latter might be the opckeytype storage
606
   * type of the opclass, which is not interesting for our purposes. This
607
   * mimics the behavior of examine_attribute().
608
   */
609
0
  if (expr == NULL)
610
0
  {
611
0
    *atttypid = attr->atttypid;
612
0
    *atttypmod = attr->atttypmod;
613
0
    *atttypcoll = attr->attcollation;
614
0
  }
615
0
  else
616
0
  {
617
0
    *atttypid = exprType(expr);
618
0
    *atttypmod = exprTypmod(expr);
619
620
0
    if (OidIsValid(attr->attcollation))
621
0
      *atttypcoll = attr->attcollation;
622
0
    else
623
0
      *atttypcoll = exprCollation(expr);
624
0
  }
625
0
  ReleaseSysCache(atup);
626
627
  /*
628
   * If it's a multirange, step down to the range type, as is done by
629
   * multirange_typanalyze().
630
   */
631
0
  if (type_is_multirange(*atttypid))
632
0
    *atttypid = get_multirange_range(*atttypid);
633
634
  /* finds the right operators even if atttypid is a domain */
635
0
  typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR);
636
0
  *atttyptype = typcache->typtype;
637
0
  *eq_opr = typcache->eq_opr;
638
0
  *lt_opr = typcache->lt_opr;
639
640
  /*
641
   * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See
642
   * compute_tsvector_stats().
643
   */
644
0
  if (*atttypid == TSVECTOROID)
645
0
    *atttypcoll = DEFAULT_COLLATION_OID;
646
647
0
  relation_close(rel, NoLock);
648
0
}
649
650
/*
651
 * Derive element type information from the attribute type.
652
 */
653
static bool
654
get_elem_stat_type(Oid atttypid, char atttyptype,
655
           Oid *elemtypid, Oid *elem_eq_opr)
656
0
{
657
0
  TypeCacheEntry *elemtypcache;
658
659
0
  if (atttypid == TSVECTOROID)
660
0
  {
661
    /*
662
     * Special case: element type for tsvector is text. See
663
     * compute_tsvector_stats().
664
     */
665
0
    *elemtypid = TEXTOID;
666
0
  }
667
0
  else
668
0
  {
669
    /* find underlying element type through any domain */
670
0
    *elemtypid = get_base_element_type(atttypid);
671
0
  }
672
673
0
  if (!OidIsValid(*elemtypid))
674
0
    return false;
675
676
  /* finds the right operator even if elemtypid is a domain */
677
0
  elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR);
678
0
  if (!OidIsValid(elemtypcache->eq_opr))
679
0
    return false;
680
681
0
  *elem_eq_opr = elemtypcache->eq_opr;
682
683
0
  return true;
684
0
}
685
686
/*
687
 * Cast a text datum into an array with element type elemtypid.
688
 *
689
 * If an error is encountered, capture it and re-throw a WARNING, and set ok
690
 * to false. If the resulting array contains NULLs, raise a WARNING and set ok
691
 * to false. Otherwise, set ok to true.
692
 */
693
static Datum
694
text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid,
695
          int32 typmod, bool *ok)
696
0
{
697
0
  LOCAL_FCINFO(fcinfo, 8);
698
0
  char     *s;
699
0
  Datum   result;
700
0
  ErrorSaveContext escontext = {T_ErrorSaveContext};
701
702
0
  escontext.details_wanted = true;
703
704
0
  s = TextDatumGetCString(d);
705
706
0
  InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid,
707
0
               (Node *) &escontext, NULL);
708
709
0
  fcinfo->args[0].value = CStringGetDatum(s);
710
0
  fcinfo->args[0].isnull = false;
711
0
  fcinfo->args[1].value = ObjectIdGetDatum(typid);
712
0
  fcinfo->args[1].isnull = false;
713
0
  fcinfo->args[2].value = Int32GetDatum(typmod);
714
0
  fcinfo->args[2].isnull = false;
715
716
0
  result = FunctionCallInvoke(fcinfo);
717
718
0
  pfree(s);
719
720
0
  if (escontext.error_occurred)
721
0
  {
722
0
    escontext.error_data->elevel = WARNING;
723
0
    ThrowErrorData(escontext.error_data);
724
0
    *ok = false;
725
0
    return (Datum) 0;
726
0
  }
727
728
0
  if (array_contains_nulls(DatumGetArrayTypeP(result)))
729
0
  {
730
0
    ereport(WARNING,
731
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
732
0
         errmsg("\"%s\" array cannot contain NULL values", staname)));
733
0
    *ok = false;
734
0
    return (Datum) 0;
735
0
  }
736
737
0
  *ok = true;
738
739
0
  return result;
740
0
}
741
742
/*
743
 * Find and update the slot with the given stakind, or use the first empty
744
 * slot.
745
 */
746
static void
747
set_stats_slot(Datum *values, bool *nulls, bool *replaces,
748
         int16 stakind, Oid staop, Oid stacoll,
749
         Datum stanumbers, bool stanumbers_isnull,
750
         Datum stavalues, bool stavalues_isnull)
751
0
{
752
0
  int     slotidx;
753
0
  int     first_empty = -1;
754
0
  AttrNumber  stakind_attnum;
755
0
  AttrNumber  staop_attnum;
756
0
  AttrNumber  stacoll_attnum;
757
758
  /* find existing slot with given stakind */
759
0
  for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++)
760
0
  {
761
0
    stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
762
763
0
    if (first_empty < 0 &&
764
0
      DatumGetInt16(values[stakind_attnum]) == 0)
765
0
      first_empty = slotidx;
766
0
    if (DatumGetInt16(values[stakind_attnum]) == stakind)
767
0
      break;
768
0
  }
769
770
0
  if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0)
771
0
    slotidx = first_empty;
772
773
0
  if (slotidx >= STATISTIC_NUM_SLOTS)
774
0
    ereport(ERROR,
775
0
        (errmsg("maximum number of statistics slots exceeded: %d",
776
0
            slotidx + 1)));
777
778
0
  stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
779
0
  staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx;
780
0
  stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx;
781
782
0
  if (DatumGetInt16(values[stakind_attnum]) != stakind)
783
0
  {
784
0
    values[stakind_attnum] = Int16GetDatum(stakind);
785
0
    replaces[stakind_attnum] = true;
786
0
  }
787
0
  if (DatumGetObjectId(values[staop_attnum]) != staop)
788
0
  {
789
0
    values[staop_attnum] = ObjectIdGetDatum(staop);
790
0
    replaces[staop_attnum] = true;
791
0
  }
792
0
  if (DatumGetObjectId(values[stacoll_attnum]) != stacoll)
793
0
  {
794
0
    values[stacoll_attnum] = ObjectIdGetDatum(stacoll);
795
0
    replaces[stacoll_attnum] = true;
796
0
  }
797
0
  if (!stanumbers_isnull)
798
0
  {
799
0
    values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers;
800
0
    nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false;
801
0
    replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true;
802
0
  }
803
0
  if (!stavalues_isnull)
804
0
  {
805
0
    values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues;
806
0
    nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false;
807
0
    replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true;
808
0
  }
809
0
}
810
811
/*
812
 * Upsert the pg_statistic record.
813
 */
814
static void
815
upsert_pg_statistic(Relation starel, HeapTuple oldtup,
816
          Datum *values, bool *nulls, bool *replaces)
817
0
{
818
0
  HeapTuple newtup;
819
820
0
  if (HeapTupleIsValid(oldtup))
821
0
  {
822
0
    newtup = heap_modify_tuple(oldtup, RelationGetDescr(starel),
823
0
                   values, nulls, replaces);
824
0
    CatalogTupleUpdate(starel, &newtup->t_self, newtup);
825
0
  }
826
0
  else
827
0
  {
828
0
    newtup = heap_form_tuple(RelationGetDescr(starel), values, nulls);
829
0
    CatalogTupleInsert(starel, newtup);
830
0
  }
831
832
0
  heap_freetuple(newtup);
833
834
0
  CommandCounterIncrement();
835
0
}
836
837
/*
838
 * Delete pg_statistic record.
839
 */
840
static bool
841
delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit)
842
0
{
843
0
  Relation  sd = table_open(StatisticRelationId, RowExclusiveLock);
844
0
  HeapTuple oldtup;
845
0
  bool    result = false;
846
847
  /* Is there already a pg_statistic tuple for this attribute? */
848
0
  oldtup = SearchSysCache3(STATRELATTINH,
849
0
               ObjectIdGetDatum(reloid),
850
0
               Int16GetDatum(attnum),
851
0
               BoolGetDatum(stainherit));
852
853
0
  if (HeapTupleIsValid(oldtup))
854
0
  {
855
0
    CatalogTupleDelete(sd, &oldtup->t_self);
856
0
    ReleaseSysCache(oldtup);
857
0
    result = true;
858
0
  }
859
860
0
  table_close(sd, RowExclusiveLock);
861
862
0
  CommandCounterIncrement();
863
864
0
  return result;
865
0
}
866
867
/*
868
 * Initialize values and nulls for a new stats tuple.
869
 */
870
static void
871
init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
872
             Datum *values, bool *nulls, bool *replaces)
873
0
{
874
0
  memset(nulls, true, sizeof(bool) * Natts_pg_statistic);
875
0
  memset(replaces, true, sizeof(bool) * Natts_pg_statistic);
876
877
  /* must initialize non-NULL attributes */
878
879
0
  values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid);
880
0
  nulls[Anum_pg_statistic_starelid - 1] = false;
881
0
  values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum);
882
0
  nulls[Anum_pg_statistic_staattnum - 1] = false;
883
0
  values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited);
884
0
  nulls[Anum_pg_statistic_stainherit - 1] = false;
885
886
0
  values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_NULL_FRAC;
887
0
  nulls[Anum_pg_statistic_stanullfrac - 1] = false;
888
0
  values[Anum_pg_statistic_stawidth - 1] = DEFAULT_AVG_WIDTH;
889
0
  nulls[Anum_pg_statistic_stawidth - 1] = false;
890
0
  values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_N_DISTINCT;
891
0
  nulls[Anum_pg_statistic_stadistinct - 1] = false;
892
893
  /* initialize stakind, staop, and stacoll slots */
894
0
  for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++)
895
0
  {
896
0
    values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0;
897
0
    nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false;
898
0
    values[Anum_pg_statistic_staop1 + slotnum - 1] = InvalidOid;
899
0
    nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false;
900
0
    values[Anum_pg_statistic_stacoll1 + slotnum - 1] = InvalidOid;
901
0
    nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false;
902
0
  }
903
0
}
904
905
/*
906
 * Delete statistics for the given attribute.
907
 */
908
Datum
909
pg_clear_attribute_stats(PG_FUNCTION_ARGS)
910
0
{
911
0
  char     *nspname;
912
0
  char     *relname;
913
0
  Oid     reloid;
914
0
  char     *attname;
915
0
  AttrNumber  attnum;
916
0
  bool    inherited;
917
918
0
  stats_check_required_arg(fcinfo, cleararginfo, C_ATTRELSCHEMA_ARG);
919
0
  stats_check_required_arg(fcinfo, cleararginfo, C_ATTRELNAME_ARG);
920
0
  stats_check_required_arg(fcinfo, cleararginfo, C_ATTNAME_ARG);
921
0
  stats_check_required_arg(fcinfo, cleararginfo, C_INHERITED_ARG);
922
923
0
  nspname = TextDatumGetCString(PG_GETARG_DATUM(C_ATTRELSCHEMA_ARG));
924
0
  relname = TextDatumGetCString(PG_GETARG_DATUM(C_ATTRELNAME_ARG));
925
926
0
  reloid = stats_lookup_relid(nspname, relname);
927
928
0
  if (RecoveryInProgress())
929
0
    ereport(ERROR,
930
0
        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
931
0
         errmsg("recovery is in progress"),
932
0
         errhint("Statistics cannot be modified during recovery.")));
933
934
0
  stats_lock_check_privileges(reloid);
935
936
0
  attname = TextDatumGetCString(PG_GETARG_DATUM(C_ATTNAME_ARG));
937
0
  attnum = get_attnum(reloid, attname);
938
939
0
  if (attnum < 0)
940
0
    ereport(ERROR,
941
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
942
0
         errmsg("cannot clear statistics on system column \"%s\"",
943
0
            attname)));
944
945
0
  if (attnum == InvalidAttrNumber)
946
0
    ereport(ERROR,
947
0
        (errcode(ERRCODE_UNDEFINED_COLUMN),
948
0
         errmsg("column \"%s\" of relation \"%s\" does not exist",
949
0
            attname, get_rel_name(reloid))));
950
951
0
  inherited = PG_GETARG_BOOL(C_INHERITED_ARG);
952
953
0
  delete_pg_statistic(reloid, attnum, inherited);
954
0
  PG_RETURN_VOID();
955
0
}
956
957
/*
958
 * Import statistics for a given relation attribute.
959
 *
960
 * Inserts or replaces a row in pg_statistic for the given relation and
961
 * attribute name or number. It takes input parameters that correspond to
962
 * columns in the view pg_stats.
963
 *
964
 * Parameters are given in a pseudo named-attribute style: they must be
965
 * pairs of parameter names (as text) and values (of appropriate types).
966
 * We do that, rather than using regular named-parameter notation, so
967
 * that we can add or change parameters without fear of breaking
968
 * carelessly-written calls.
969
 *
970
 * Parameters null_frac, avg_width, and n_distinct all correspond to NOT NULL
971
 * columns in pg_statistic. The remaining parameters all belong to a specific
972
 * stakind. Some stakinds require multiple parameters, which must be specified
973
 * together (or neither specified).
974
 *
975
 * Parameters are only superficially validated. Omitting a parameter or
976
 * passing NULL leaves the statistic unchanged.
977
 *
978
 * Parameters corresponding to ANYARRAY columns are instead passed in as text
979
 * values, which is a valid input string for an array of the type or element
980
 * type of the attribute. Any error generated by the array_in() function will
981
 * in turn fail the function.
982
 */
983
Datum
984
pg_restore_attribute_stats(PG_FUNCTION_ARGS)
985
0
{
986
0
  LOCAL_FCINFO(positional_fcinfo, NUM_ATTRIBUTE_STATS_ARGS);
987
0
  bool    result = true;
988
989
0
  InitFunctionCallInfoData(*positional_fcinfo, NULL, NUM_ATTRIBUTE_STATS_ARGS,
990
0
               InvalidOid, NULL, NULL);
991
992
0
  if (!stats_fill_fcinfo_from_arg_pairs(fcinfo, positional_fcinfo,
993
0
                      attarginfo))
994
0
    result = false;
995
996
0
  if (!attribute_statistics_update(positional_fcinfo))
997
0
    result = false;
998
999
0
  PG_RETURN_BOOL(result);
1000
0
}