/src/postgres/src/backend/utils/adt/like_match.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * like_match.c
 *    LIKE pattern matching internal code.
 *
 * This file is included by like.c four times, to provide matching code for
 * (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
 * and (4) case insensitive matches in single-byte encodings.
 * (UTF8 is a special case because we can use a much more efficient version
 * of NextChar than can be used for general multi-byte encodings.)
 *
 * Before the inclusion, we need to define the following macros:
 *
 * NextChar
 * MatchText - to name of function wanted
 * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
 * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
 *
 * Copyright (c) 1996-2025, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *  src/backend/utils/adt/like_match.c
 *
 *-------------------------------------------------------------------------
 */

/*
 *  Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
 *  Rich $alz is now <rsalz@bbn.com>.
 *  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the
 *  LIKE_ABORT code.
 *
 *  This code was shamelessly stolen from the "pql" code by myself and
 *  slightly modified :)
 *
 *  All references to the word "star" were replaced by "percent"
 *  All references to the word "wild" were replaced by "like"
 *
 *  All the nice shell RE matching stuff was replaced by just "_" and "%"
 *
 *  As I don't have a copy of the SQL standard handy I wasn't sure whether
 *  to leave in the '\' escape character handling.
 *
 *  Keith Parks. <keith@mtcc.demon.co.uk>
 *
 *  SQL lets you specify the escape character by saying
 *  LIKE <pattern> ESCAPE <escape character>. We are a small operation
 *  so we force you to use '\'. - ay 7/95
 *
 *  Now we have the like_escape() function that converts patterns with
 *  any specified escape character (or none at all) to the internal
 *  default escape character, which is still '\'. - tgl 9/2000
 *
 * The code is rewritten to avoid requiring null-terminated strings,
 * which in turn allows us to leave out some memcpy() operations.
 * This code should be faster and take less memory, but no promises...
 * - thomas 2000-08-06
 */


/*--------------------
 *  Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
 *
 *  LIKE_TRUE: they match
 *  LIKE_FALSE: they don't match
 *  LIKE_ABORT: not only don't they match, but the text is too short.
 *
 * If LIKE_ABORT is returned, then no suffix of the text can match the
 * pattern either, so an upper-level % scan can stop scanning now.
 *--------------------
 */

#ifdef MATCH_LOWER
#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
#else
#define GETCHAR(t, locale) (t)
#endif

static int
MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
{
  /* Fast path for match-everything pattern */
  if (plen == 1 && *p == '%')
    return LIKE_TRUE;

  /* Since this function recurses, it could be driven to stack overflow */
  check_stack_depth();

  /*
   * In this loop, we advance by char when matching wildcards (and thus on
   * recursive entry to this function we are properly char-synced). On other
   * occasions it is safe to advance by byte, as the text and pattern will
   * be in lockstep. This allows us to perform all comparisons between the
   * text and pattern on a byte by byte basis, even for multi-byte
   * encodings.
   */
  while (tlen > 0 && plen > 0)
  {
    if (*p == '\\')
    {
      /* Next pattern byte must match literally, whatever it is */
      NextByte(p, plen);
      /* ... and there had better be one, per SQL standard */
      if (plen <= 0)
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
             errmsg("LIKE pattern must not end with escape character")));
      if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
        return LIKE_FALSE;
    }
    else if (*p == '%')
    {
      char    firstpat;

      /*
       * % processing is essentially a search for a text position at
       * which the remainder of the text matches the remainder of the
       * pattern, using a recursive call to check each potential match.
       *
       * If there are wildcards immediately following the %, we can skip
       * over them first, using the idea that any sequence of N _'s and
       * one or more %'s is equivalent to N _'s and one % (ie, it will
       * match any sequence of at least N text characters).  In this way
       * we will always run the recursive search loop using a pattern
       * fragment that begins with a literal character-to-match, thereby
       * not recursing more than we have to.
       */
      NextByte(p, plen);

      while (plen > 0)
      {
        if (*p == '%')
          NextByte(p, plen);
        else if (*p == '_')
        {
          /* If not enough text left to match the pattern, ABORT */
          if (tlen <= 0)
            return LIKE_ABORT;
          NextChar(t, tlen);
          NextByte(p, plen);
        }
        else
          break;   /* Reached a non-wildcard pattern char */
      }

      /*
       * If we're at end of pattern, match: we have a trailing % which
       * matches any remaining text string.
       */
      if (plen <= 0)
        return LIKE_TRUE;

      /*
       * Otherwise, scan for a text position at which we can match the
       * rest of the pattern.  The first remaining pattern char is known
       * to be a regular or escaped literal character, so we can compare
       * the first pattern byte to each text byte to avoid recursing
       * more than we have to.  This fact also guarantees that we don't
       * have to consider a match to the zero-length substring at the
       * end of the text.  With a nondeterministic collation, we can't
       * rely on the first bytes being equal, so we have to recurse in
       * any case.
       */
      if (*p == '\\')
      {
        if (plen < 2)
          ereport(ERROR,
              (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
               errmsg("LIKE pattern must not end with escape character")));
        firstpat = GETCHAR(p[1], locale);
      }
      else
        firstpat = GETCHAR(*p, locale);

      while (tlen > 0)
      {
        if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic))
        {
          int     matched = MatchText(t, tlen, p, plen, locale);

          if (matched != LIKE_FALSE)
            return matched; /* TRUE or ABORT */
        }

        NextChar(t, tlen);
      }

      /*
       * End of text with no match, so no point in trying later places
       * to start matching this pattern.
       */
      return LIKE_ABORT;
    }
    else if (*p == '_')
    {
      /* _ matches any single character, and we know there is one */
      NextChar(t, tlen);
      NextByte(p, plen);
      continue;
    }
    else if (locale && !locale->deterministic)
    {
      /*
       * For nondeterministic locales, we find the next substring of the
       * pattern that does not contain wildcards and try to find a
       * matching substring in the text.  Crucially, we cannot do this
       * character by character, as in the normal case, but must do it
       * substring by substring, partitioned by the wildcard characters.
       * (This is per SQL standard.)
       */
      const char *p1;
      size_t    p1len;
      const char *t1;
      size_t    t1len;
      bool    found_escape;
      const char *subpat;
      size_t    subpatlen;
      char     *buf = NULL;

      /*
       * Determine next substring of pattern without wildcards.  p is
       * the start of the subpattern, p1 is one past the last byte. Also
       * track if we found an escape character.
       */
      p1 = p;
      p1len = plen;
      found_escape = false;
      while (p1len > 0)
      {
        if (*p1 == '\\')
        {
          found_escape = true;
          NextByte(p1, p1len);
          if (p1len == 0)
            ereport(ERROR,
                (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                 errmsg("LIKE pattern must not end with escape character")));
        }
        else if (*p1 == '_' || *p1 == '%')
          break;
        NextByte(p1, p1len);
      }

      /*
       * If we found an escape character, then make an unescaped copy of
       * the subpattern.
       */
      if (found_escape)
      {
        char     *b;

        b = buf = palloc(p1 - p);
        for (const char *c = p; c < p1; c++)
        {
          if (*c == '\\')
            ;
          else
            *(b++) = *c;
        }

        subpat = buf;
        subpatlen = b - buf;
      }
      else
      {
        subpat = p;
        subpatlen = p1 - p;
      }

      /*
       * Shortcut: If this is the end of the pattern, then the rest of
       * the text has to match the rest of the pattern.
       */
      if (p1len == 0)
      {
        int     cmp;

        cmp = pg_strncoll(subpat, subpatlen, t, tlen, locale);

        if (buf)
          pfree(buf);
        if (cmp == 0)
          return LIKE_TRUE;
        else
          return LIKE_FALSE;
      }

      /*
       * Now build a substring of the text and try to match it against
       * the subpattern.  t is the start of the text, t1 is one past the
       * last byte.  We start with a zero-length string.
       */
      t1 = t;
      t1len = tlen;
      for (;;)
      {
        int     cmp;

        CHECK_FOR_INTERRUPTS();

        cmp = pg_strncoll(subpat, subpatlen, t, (t1 - t), locale);

        /*
         * If we found a match, we have to test if the rest of pattern
         * can match against the rest of the string.  Otherwise we
         * have to continue here try matching with a longer substring.
         * (This is similar to the recursion for the '%' wildcard
         * above.)
         *
         * Note that we can't just wind forward p and t and continue
         * with the main loop.  This would fail for example with
         *
         * U&'\0061\0308bc' LIKE U&'\00E4_c' COLLATE ignore_accents
         *
         * You'd find that t=\0061 matches p=\00E4, but then the rest
         * won't match; but t=\0061\0308 also matches p=\00E4, and
         * then the rest will match.
         */
        if (cmp == 0)
        {
          int     matched = MatchText(t1, t1len, p1, p1len, locale);

          if (matched == LIKE_TRUE)
          {
            if (buf)
              pfree(buf);
            return matched;
          }
        }

        /*
         * Didn't match.  If we used up the whole text, then the match
         * fails.  Otherwise, try again with a longer substring.
         */
        if (t1len == 0)
        {
          if (buf)
            pfree(buf);
          return LIKE_FALSE;
        }
        else
          NextChar(t1, t1len);
      }
    }
    else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
    {
      /* non-wildcard pattern char fails to match text char */
      return LIKE_FALSE;
    }

    /*
     * Pattern and text match, so advance.
     *
     * It is safe to use NextByte instead of NextChar here, even for
     * multi-byte character sets, because we are not following immediately
     * after a wildcard character. If we are in the middle of a multibyte
     * character, we must already have matched at least one byte of the
     * character from both text and pattern; so we cannot get out-of-sync
     * on character boundaries.  And we know that no backend-legal
     * encoding allows ASCII characters such as '%' to appear as non-first
     * bytes of characters, so we won't mistakenly detect a new wildcard.
     */
    NextByte(t, tlen);
    NextByte(p, plen);
  }

  if (tlen > 0)
    return LIKE_FALSE;   /* end of pattern, but not of text */

  /*
   * End of text, but perhaps not of pattern.  Match iff the remaining
   * pattern can match a zero-length string, ie, it's zero or more %'s.
   */
  while (plen > 0 && *p == '%')
    NextByte(p, plen);
  if (plen <= 0)
    return LIKE_TRUE;

  /*
   * End of text with no match, so no point in trying later places to start
   * matching this pattern.
   */
  return LIKE_ABORT;
}                /* MatchText() */

/*
 * like_escape() --- given a pattern and an ESCAPE string,
 * convert the pattern to use Postgres' standard backslash escape convention.
 */
#ifdef do_like_escape

static text *
do_like_escape(text *pat, text *esc)
{
  text     *result;
  char     *p,
         *e,
         *r;
  int     plen,
        elen;
  bool    afterescape;

  p = VARDATA_ANY(pat);
  plen = VARSIZE_ANY_EXHDR(pat);
  e = VARDATA_ANY(esc);
  elen = VARSIZE_ANY_EXHDR(esc);

  /*
   * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
   * trying to calculate the size more accurately than that.
   */
  result = (text *) palloc(plen * 2 + VARHDRSZ);
  r = VARDATA(result);

  if (elen == 0)
  {
    /*
     * No escape character is wanted.  Double any backslashes in the
     * pattern to make them act like ordinary characters.
     */
    while (plen > 0)
    {
      if (*p == '\\')
        *r++ = '\\';
      CopyAdvChar(r, p, plen);
    }
  }
  else
  {
    /*
     * The specified escape must be only a single character.
     */
    NextChar(e, elen);
    if (elen != 0)
      ereport(ERROR,
          (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
           errmsg("invalid escape string"),
           errhint("Escape string must be empty or one character.")));

    e = VARDATA_ANY(esc);

    /*
     * If specified escape is '\', just copy the pattern as-is.
     */
    if (*e == '\\')
    {
      memcpy(result, pat, VARSIZE_ANY(pat));
      return result;
    }

    /*
     * Otherwise, convert occurrences of the specified escape character to
     * '\', and double occurrences of '\' --- unless they immediately
     * follow an escape character!
     */
    afterescape = false;
    while (plen > 0)
    {
      if (CHAREQ(p, e) && !afterescape)
      {
        *r++ = '\\';
        NextChar(p, plen);
        afterescape = true;
      }
      else if (*p == '\\')
      {
        *r++ = '\\';
        if (!afterescape)
          *r++ = '\\';
        NextChar(p, plen);
        afterescape = false;
      }
      else
      {
        CopyAdvChar(r, p, plen);
        afterescape = false;
      }
    }
  }

  SET_VARSIZE(result, r - ((char *) result));

  return result;
}
#endif              /* do_like_escape */

#ifdef CHAREQ
#undef CHAREQ
#endif

#undef NextChar
#undef CopyAdvChar
#undef MatchText

#ifdef do_like_escape
#undef do_like_escape
#endif

#undef GETCHAR

#ifdef MATCH_LOWER
#undef MATCH_LOWER

#endif

Coverage Report

Created: 2025-08-12 06:43

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* like_match.c
4		* LIKE pattern matching internal code.
5		*
6		* This file is included by like.c four times, to provide matching code for
7		* (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
8		* and (4) case insensitive matches in single-byte encodings.
9		* (UTF8 is a special case because we can use a much more efficient version
10		* of NextChar than can be used for general multi-byte encodings.)
11		*
12		* Before the inclusion, we need to define the following macros:
13		*
14		* NextChar
15		* MatchText - to name of function wanted
16		* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17		* MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
18		*
19		* Copyright (c) 1996-2025, PostgreSQL Global Development Group
20		*
21		* IDENTIFICATION
22		* src/backend/utils/adt/like_match.c
23		*
24		*-------------------------------------------------------------------------
25		*/
26
27		/*
28		* Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
29		* Rich $alz is now <rsalz@bbn.com>.
30		* Special thanks to Lars Mathiesen <thorinn@diku.dk> for the
31		* LIKE_ABORT code.
32		*
33		* This code was shamelessly stolen from the "pql" code by myself and
34		* slightly modified :)
35		*
36		* All references to the word "star" were replaced by "percent"
37		* All references to the word "wild" were replaced by "like"
38		*
39		* All the nice shell RE matching stuff was replaced by just "_" and "%"
40		*
41		* As I don't have a copy of the SQL standard handy I wasn't sure whether
42		* to leave in the '\' escape character handling.
43		*
44		* Keith Parks. <keith@mtcc.demon.co.uk>
45		*
46		* SQL lets you specify the escape character by saying
47		* LIKE <pattern> ESCAPE <escape character>. We are a small operation
48		* so we force you to use '\'. - ay 7/95
49		*
50		* Now we have the like_escape() function that converts patterns with
51		* any specified escape character (or none at all) to the internal
52		* default escape character, which is still '\'. - tgl 9/2000
53		*
54		* The code is rewritten to avoid requiring null-terminated strings,
55		* which in turn allows us to leave out some memcpy() operations.
56		* This code should be faster and take less memory, but no promises...
57		* - thomas 2000-08-06
58		*/
59
60
61		/*--------------------
62		* Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
63		*
64		* LIKE_TRUE: they match
65		* LIKE_FALSE: they don't match
66		* LIKE_ABORT: not only don't they match, but the text is too short.
67		*
68		* If LIKE_ABORT is returned, then no suffix of the text can match the
69		* pattern either, so an upper-level % scan can stop scanning now.
70		*--------------------
71		*/
72
73		#ifdef MATCH_LOWER
74	0	#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
75		#else
76	0	#define GETCHAR(t, locale) (t)
77		#endif
78
79		static int
80		MatchText(const char t, int tlen, const char p, int plen, pg_locale_t locale)
81	0	{
82		/* Fast path for match-everything pattern */
83	0	if (plen == 1 && *p == '%')
84	0	return LIKE_TRUE;
85
86		/* Since this function recurses, it could be driven to stack overflow */
87	0	check_stack_depth();
88
89		/*
90		* In this loop, we advance by char when matching wildcards (and thus on
91		* recursive entry to this function we are properly char-synced). On other
92		* occasions it is safe to advance by byte, as the text and pattern will
93		* be in lockstep. This allows us to perform all comparisons between the
94		* text and pattern on a byte by byte basis, even for multi-byte
95		* encodings.
96		*/
97	0	while (tlen > 0 && plen > 0)
98	0	{
99	0	if (*p == '\\')
100	0	{
101		/* Next pattern byte must match literally, whatever it is */
102	0	NextByte(p, plen);
103		/* ... and there had better be one, per SQL standard */
104	0	if (plen <= 0)
105	0	ereport(ERROR,
106	0	(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
107	0	errmsg("LIKE pattern must not end with escape character")));
108	0	if (GETCHAR(p, locale) != GETCHAR(t, locale))
109	0	return LIKE_FALSE;
110	0	}
111	0	else if (*p == '%')
112	0	{
113	0	char firstpat;
114
115		/*
116		* % processing is essentially a search for a text position at
117		* which the remainder of the text matches the remainder of the
118		* pattern, using a recursive call to check each potential match.
119		*
120		* If there are wildcards immediately following the %, we can skip
121		* over them first, using the idea that any sequence of N _'s and
122		* one or more %'s is equivalent to N _'s and one % (ie, it will
123		* match any sequence of at least N text characters). In this way
124		* we will always run the recursive search loop using a pattern
125		* fragment that begins with a literal character-to-match, thereby
126		* not recursing more than we have to.
127		*/
128	0	NextByte(p, plen);
129
130	0	while (plen > 0)
131	0	{
132	0	if (*p == '%')
133	0	NextByte(p, plen);
134	0	else if (*p == '_')
135	0	{
136		/* If not enough text left to match the pattern, ABORT */
137	0	if (tlen <= 0)
138	0	return LIKE_ABORT;
139	0	NextChar(t, tlen);
140	0	NextByte(p, plen);
141	0	}
142	0	else
143	0	break; /* Reached a non-wildcard pattern char */
144	0	}
145
146		/*
147		* If we're at end of pattern, match: we have a trailing % which
148		* matches any remaining text string.
149		*/
150	0	if (plen <= 0)
151	0	return LIKE_TRUE;
152
153		/*
154		* Otherwise, scan for a text position at which we can match the
155		* rest of the pattern. The first remaining pattern char is known
156		* to be a regular or escaped literal character, so we can compare
157		* the first pattern byte to each text byte to avoid recursing
158		* more than we have to. This fact also guarantees that we don't
159		* have to consider a match to the zero-length substring at the
160		* end of the text. With a nondeterministic collation, we can't
161		* rely on the first bytes being equal, so we have to recurse in
162		* any case.
163		*/
164	0	if (*p == '\\')
165	0	{
166	0	if (plen < 2)
167	0	ereport(ERROR,
168	0	(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
169	0	errmsg("LIKE pattern must not end with escape character")));
170	0	firstpat = GETCHAR(p[1], locale);
171	0	}
172	0	else
173	0	firstpat = GETCHAR(*p, locale);
174
175	0	while (tlen > 0)
176	0	{
177	0	if (GETCHAR(*t, locale) == firstpat \|\| (locale && !locale->deterministic))
178	0	{
179	0	int matched = MatchText(t, tlen, p, plen, locale);
180
181	0	if (matched != LIKE_FALSE)
182	0	return matched; /* TRUE or ABORT */
183	0	}
184
185	0	NextChar(t, tlen);
186	0	}
187
188		/*
189		* End of text with no match, so no point in trying later places
190		* to start matching this pattern.
191		*/
192	0	return LIKE_ABORT;
193	0	}
194	0	else if (*p == '_')
195	0	{
196		/* _ matches any single character, and we know there is one */
197	0	NextChar(t, tlen);
198	0	NextByte(p, plen);
199	0	continue;
200	0	}
201	0	else if (locale && !locale->deterministic)
202	0	{
203		/*
204		* For nondeterministic locales, we find the next substring of the
205		* pattern that does not contain wildcards and try to find a
206		* matching substring in the text. Crucially, we cannot do this
207		* character by character, as in the normal case, but must do it
208		* substring by substring, partitioned by the wildcard characters.
209		* (This is per SQL standard.)
210		*/
211	0	const char *p1;
212	0	size_t p1len;
213	0	const char *t1;
214	0	size_t t1len;
215	0	bool found_escape;
216	0	const char *subpat;
217	0	size_t subpatlen;
218	0	char *buf = NULL;
219
220		/*
221		* Determine next substring of pattern without wildcards. p is
222		* the start of the subpattern, p1 is one past the last byte. Also
223		* track if we found an escape character.
224		*/
225	0	p1 = p;
226	0	p1len = plen;
227	0	found_escape = false;
228	0	while (p1len > 0)
229	0	{
230	0	if (*p1 == '\\')
231	0	{
232	0	found_escape = true;
233	0	NextByte(p1, p1len);
234	0	if (p1len == 0)
235	0	ereport(ERROR,
236	0	(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
237	0	errmsg("LIKE pattern must not end with escape character")));
238	0	}
239	0	else if (p1 == '_' \|\| p1 == '%')
240	0	break;
241	0	NextByte(p1, p1len);
242	0	}
243
244		/*
245		* If we found an escape character, then make an unescaped copy of
246		* the subpattern.
247		*/
248	0	if (found_escape)
249	0	{
250	0	char *b;
251
252	0	b = buf = palloc(p1 - p);
253	0	for (const char *c = p; c < p1; c++)
254	0	{
255	0	if (*c == '\\')
256	0	;
257	0	else
258	0	(b++) = c;
259	0	}
260
261	0	subpat = buf;
262	0	subpatlen = b - buf;
263	0	}
264	0	else
265	0	{
266	0	subpat = p;
267	0	subpatlen = p1 - p;
268	0	}
269
270		/*
271		* Shortcut: If this is the end of the pattern, then the rest of
272		* the text has to match the rest of the pattern.
273		*/
274	0	if (p1len == 0)
275	0	{
276	0	int cmp;
277
278	0	cmp = pg_strncoll(subpat, subpatlen, t, tlen, locale);
279
280	0	if (buf)
281	0	pfree(buf);
282	0	if (cmp == 0)
283	0	return LIKE_TRUE;
284	0	else
285	0	return LIKE_FALSE;
286	0	}
287
288		/*
289		* Now build a substring of the text and try to match it against
290		* the subpattern. t is the start of the text, t1 is one past the
291		* last byte. We start with a zero-length string.
292		*/
293	0	t1 = t;
294	0	t1len = tlen;
295	0	for (;;)
296	0	{
297	0	int cmp;
298
299	0	CHECK_FOR_INTERRUPTS();
300
301	0	cmp = pg_strncoll(subpat, subpatlen, t, (t1 - t), locale);
302
303		/*
304		* If we found a match, we have to test if the rest of pattern
305		* can match against the rest of the string. Otherwise we
306		* have to continue here try matching with a longer substring.
307		* (This is similar to the recursion for the '%' wildcard
308		* above.)
309		*
310		* Note that we can't just wind forward p and t and continue
311		* with the main loop. This would fail for example with
312		*
313		* U&'\0061\0308bc' LIKE U&'\00E4_c' COLLATE ignore_accents
314		*
315		* You'd find that t=\0061 matches p=\00E4, but then the rest
316		* won't match; but t=\0061\0308 also matches p=\00E4, and
317		* then the rest will match.
318		*/
319	0	if (cmp == 0)
320	0	{
321	0	int matched = MatchText(t1, t1len, p1, p1len, locale);
322
323	0	if (matched == LIKE_TRUE)
324	0	{
325	0	if (buf)
326	0	pfree(buf);
327	0	return matched;
328	0	}
329	0	}
330
331		/*
332		* Didn't match. If we used up the whole text, then the match
333		* fails. Otherwise, try again with a longer substring.
334		*/
335	0	if (t1len == 0)
336	0	{
337	0	if (buf)
338	0	pfree(buf);
339	0	return LIKE_FALSE;
340	0	}
341	0	else
342	0	NextChar(t1, t1len);
343	0	}
344	0	}
345	0	else if (GETCHAR(p, locale) != GETCHAR(t, locale))
346	0	{
347		/* non-wildcard pattern char fails to match text char */
348	0	return LIKE_FALSE;
349	0	}
350
351		/*
352		* Pattern and text match, so advance.
353		*
354		* It is safe to use NextByte instead of NextChar here, even for
355		* multi-byte character sets, because we are not following immediately
356		* after a wildcard character. If we are in the middle of a multibyte
357		* character, we must already have matched at least one byte of the
358		* character from both text and pattern; so we cannot get out-of-sync
359		* on character boundaries. And we know that no backend-legal
360		* encoding allows ASCII characters such as '%' to appear as non-first
361		* bytes of characters, so we won't mistakenly detect a new wildcard.
362		*/
363	0	NextByte(t, tlen);
364	0	NextByte(p, plen);
365	0	}
366
367	0	if (tlen > 0)
368	0	return LIKE_FALSE; /* end of pattern, but not of text */
369
370		/*
371		* End of text, but perhaps not of pattern. Match iff the remaining
372		* pattern can match a zero-length string, ie, it's zero or more %'s.
373		*/
374	0	while (plen > 0 && *p == '%')
375	0	NextByte(p, plen);
376	0	if (plen <= 0)
377	0	return LIKE_TRUE;
378
379		/*
380		* End of text with no match, so no point in trying later places to start
381		* matching this pattern.
382		*/
383	0	return LIKE_ABORT;
384	0	} /* MatchText() */ Unexecuted instantiation: like.c:UTF8_MatchText Unexecuted instantiation: like.c:MB_MatchText Unexecuted instantiation: like.c:SB_MatchText Unexecuted instantiation: like.c:SB_IMatchText
385
386		/*
387		* like_escape() --- given a pattern and an ESCAPE string,
388		* convert the pattern to use Postgres' standard backslash escape convention.
389		*/
390		#ifdef do_like_escape
391
392		static text *
393		do_like_escape(text pat, text esc)
394	0	{
395	0	text *result;
396	0	char *p,
397	0	*e,
398	0	*r;
399	0	int plen,
400	0	elen;
401	0	bool afterescape;
402
403	0	p = VARDATA_ANY(pat);
404	0	plen = VARSIZE_ANY_EXHDR(pat);
405	0	e = VARDATA_ANY(esc);
406	0	elen = VARSIZE_ANY_EXHDR(esc);
407
408		/*
409		* Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
410		* trying to calculate the size more accurately than that.
411		*/
412	0	result = (text ) palloc(plen 2 + VARHDRSZ);
413	0	r = VARDATA(result);
414
415	0	if (elen == 0)
416	0	{
417		/*
418		* No escape character is wanted. Double any backslashes in the
419		* pattern to make them act like ordinary characters.
420		*/
421	0	while (plen > 0)
422	0	{
423	0	if (*p == '\\')
424	0	*r++ = '\\';
425	0	CopyAdvChar(r, p, plen);
426	0	}
427	0	}
428	0	else
429	0	{
430		/*
431		* The specified escape must be only a single character.
432		*/
433	0	NextChar(e, elen);
434	0	if (elen != 0)
435	0	ereport(ERROR,
436	0	(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
437	0	errmsg("invalid escape string"),
438	0	errhint("Escape string must be empty or one character.")));
439
440	0	e = VARDATA_ANY(esc);
441
442		/*
443		* If specified escape is '\', just copy the pattern as-is.
444		*/
445	0	if (*e == '\\')
446	0	{
447	0	memcpy(result, pat, VARSIZE_ANY(pat));
448	0	return result;
449	0	}
450
451		/*
452		* Otherwise, convert occurrences of the specified escape character to
453		* '\', and double occurrences of '\' --- unless they immediately
454		* follow an escape character!
455		*/
456	0	afterescape = false;
457	0	while (plen > 0)
458	0	{
459	0	if (CHAREQ(p, e) && !afterescape)
460	0	{
461	0	*r++ = '\\';
462	0	NextChar(p, plen);
463	0	afterescape = true;
464	0	}
465	0	else if (*p == '\\')
466	0	{
467	0	*r++ = '\\';
468	0	if (!afterescape)
469	0	*r++ = '\\';
470	0	NextChar(p, plen);
471	0	afterescape = false;
472	0	}
473	0	else
474	0	{
475	0	CopyAdvChar(r, p, plen);
476	0	afterescape = false;
477	0	}
478	0	}
479	0	}
480
481	0	SET_VARSIZE(result, r - ((char *) result));
482
483	0	return result;
484	0	} Unexecuted instantiation: like.c:SB_do_like_escape Unexecuted instantiation: like.c:MB_do_like_escape
485		#endif /* do_like_escape */
486
487		#ifdef CHAREQ
488		#undef CHAREQ
489		#endif
490
491		#undef NextChar
492		#undef CopyAdvChar
493		#undef MatchText
494
495		#ifdef do_like_escape
496		#undef do_like_escape
497		#endif
498
499		#undef GETCHAR
500
501		#ifdef MATCH_LOWER
502		#undef MATCH_LOWER
503
504		#endif