/src/neomutt/mutt/mbyte.c

Source (jump to first uncovered line)
/**
 * @file
 * Multi-byte String manipulation functions
 *
 * @authors
 * Copyright (C) 2017-2023 Richard Russon <rich@flatcap.org>
 * Copyright (C) 2019 Pietro Cerutti <gahr@gahr.ch>
 *
 * @copyright
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * @page mutt_mbyte Multi-byte String manipulation functions
 *
 * Some commonly-used multi-byte string manipulation routines.
 */

#include "config.h"
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "mbyte.h"
#include "buffer.h"
#include "charset.h"
#include "memory.h"
#include "pool.h"
#include "string2.h"

bool OptLocales; ///< (pseudo) set if user has valid locale definition

/**
 * mutt_mb_charlen - Count the bytes in a (multibyte) character
 * @param[in]  s     String to be examined
 * @param[out] width Number of screen columns the character would use
 * @retval num Bytes in the first (multibyte) character of input consumes
 * @retval <0  Conversion error
 * @retval =0  End of input
 * @retval >0  Length (bytes)
 */
int mutt_mb_charlen(const char *s, int *width)
{
  if (!s || (*s == '\0'))
    return 0;

  wchar_t wc = 0;
  mbstate_t mbstate = { 0 };

  size_t n = mutt_str_len(s);
  size_t k = mbrtowc(&wc, s, n, &mbstate);
  if (width)
    *width = wcwidth(wc);
  return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
}

/**
 * mutt_mb_get_initials - Turn a name into initials
 * @param name   String to be converted
 * @param buf    Buffer for the result
 * @param buflen Size of the buffer
 * @retval 1 Success
 * @retval 0 Failure
 *
 * Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK".
 * The function saves the first character from each word.  Words are delimited
 * by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").
 */
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
{
  if (!name || !buf)
    return false;

  while (*name)
  {
    /* Char's length in bytes */
    int clen = mutt_mb_charlen(name, NULL);
    if (clen < 1)
      return false;

    /* Ignore punctuation at the beginning of a word */
    if ((clen == 1) && ispunct(*name))
    {
      name++;
      continue;
    }

    if (clen >= buflen)
      return false;

    /* Copy one multibyte character */
    buflen -= clen;
    while (clen--)
      *buf++ = *name++;

    /* Skip to end-of-word */
    for (; *name; name += clen)
    {
      clen = mutt_mb_charlen(name, NULL);
      if (clen < 1)
        return false;
      if ((clen == 1) && (isspace(*name) || (*name == '-')))
        break;
    }

    /* Skip any whitespace, or hyphens */
    while (*name && (isspace(*name) || (*name == '-')))
      name++;
  }

  *buf = '\0';
  return true;
}

/**
 * mutt_mb_width - Measure a string's display width (in screen columns)
 * @param str     String to measure
 * @param col     Display column (used for expanding tabs)
 * @param indent  If true, newline-space will be indented 8 chars
 * @retval num String's width in screen columns
 *
 * This is like wcwidth(), but gets const char* not wchar_t*.
 */
int mutt_mb_width(const char *str, int col, bool indent)
{
  if (!str || !*str)
    return 0;

  bool nl = false;
  int total_width = 0;
  mbstate_t mbstate = { 0 };

  size_t str_len = mutt_str_len(str);

  while (*str && (str_len > 0))
  {
    wchar_t wc = L'\0';
    size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
    if (consumed == 0)
      break;

    if (consumed == ICONV_ILLEGAL_SEQ)
    {
      memset(&mbstate, 0, sizeof(mbstate));
      wc = ReplacementChar;
      consumed = 1;
    }
    else if (consumed == ICONV_BUF_TOO_SMALL)
    {
      wc = ReplacementChar;
      consumed = str_len;
    }

    int wchar_width = wcwidth(wc);
    if (wchar_width < 0)
      wchar_width = 1;

    if ((wc == L'\t') || (nl && (wc == L' ')))
    {
      /* correctly calc tab stop, even for sending as the line should look
       * pretty on the receiving end */
      nl = false;
      wchar_width = 8 - (col % 8);
    }
    else if (indent && (wc == '\n'))
    {
      /* track newlines for display-case: if we have a space after a newline,
       * assume 8 spaces as for display we always tab-fold */
      nl = true;
    }

    total_width += wchar_width;
    str += consumed;
    str_len -= consumed;
  }

  return total_width;
}

/**
 * mutt_mb_wcwidth - Measure the screen width of a character
 * @param wc Character to examine
 * @retval num Width in screen columns
 */
int mutt_mb_wcwidth(wchar_t wc)
{
  int n = wcwidth(wc);
  if (IsWPrint(wc) && (n > 0))
    return n;
  if (!(wc & ~0x7f))
    return 2;
  if (!(wc & ~0xffff))
    return 6;
  return 10;
}

/**
 * mutt_mb_wcswidth - Measure the screen width of a string
 * @param s String to measure
 * @param n Length of string in characters
 * @retval num Width in screen columns
 */
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
{
  if (!s)
    return 0;

  int w = 0;
  while (n--)
    w += mutt_mb_wcwidth(*s++);
  return w;
}

/**
 * mutt_mb_width_ceiling - Keep the end of the string on-screen
 * @param s String being displayed
 * @param n Length of string in characters
 * @param w1 Width limit
 * @retval num Chars to skip
 *
 * Given a string and a width, determine how many characters from the
 * beginning of the string should be skipped so that the string fits.
 */
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
{
  if (!s)
    return 0;

  const wchar_t *s0 = s;
  int w = 0;
  for (; n; s++, n--)
    if ((w += mutt_mb_wcwidth(*s)) > w1)
      break;
  return s - s0;
}

/**
 * buf_mb_wcstombs - Convert a string from wide to multibyte characters
 * @param dest Buffer for the result
 * @param wstr Source wide string to convert
 * @param wlen Length of the wide string
 */
void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
{
  if (!dest || !wstr)
    return;

  // Give ourselves 4 utf-8 bytes per wide character
  buf_alloc(dest, 4 * wlen);

  mbstate_t mbstate = { 0 };
  size_t k = 0;

  char *buf = dest->data;
  size_t buflen = dest->dsize;

  for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
  {
    k = wcrtomb(buf, *wstr, &mbstate);
    if (k == ICONV_ILLEGAL_SEQ)
      break;
    if (*wstr == L'\0')
      break;
  }

  *buf = '\0';
  buf_fix_dptr(dest);
}

/**
 * mutt_mb_mbstowcs - Convert a string from multibyte to wide characters
 * @param[out] pwbuf    Buffer for the result
 * @param[out] pwbuflen Length of the result buffer
 * @param[in]  i        Starting index into the result buffer
 * @param[in]  buf      String to convert
 * @retval num First character after the result
 */
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
{
  if (!pwbuf || !pwbuflen || !buf)
    return 0;

  wchar_t wc = 0;
  mbstate_t mbstate = { 0 };
  size_t k;
  wchar_t *wbuf = *pwbuf;
  size_t wbuflen = *pwbuflen;

  while (*buf != '\0')
  {
    memset(&mbstate, 0, sizeof(mbstate));
    for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
           (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
         buf += k)
    {
      if (i >= wbuflen)
      {
        wbuflen = i + 20;
        MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
      }
      wbuf[i++] = wc;
    }
    if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
    {
      if (i >= wbuflen)
      {
        wbuflen = i + 20;
        MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
      }
      wbuf[i++] = ReplacementChar;
      buf++;
    }
  }
  *pwbuf = wbuf;
  *pwbuflen = wbuflen;
  return i;
}

/**
 * mutt_mb_is_shell_char - Is character not typically part of a pathname
 * @param ch Character to examine
 * @retval true  Character is not typically part of a pathname
 * @retval false Character is typically part of a pathname
 *
 * @note The name is very confusing.
 */
bool mutt_mb_is_shell_char(wchar_t ch)
{
  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
  return wcschr(shell_chars, ch);
}

/**
 * mutt_mb_is_lower - Does a multi-byte string contain only lowercase characters?
 * @param s String to check
 * @retval true  String contains no uppercase characters
 * @retval false Error, or contains some uppercase characters
 *
 * Non-alphabetic characters are considered lowercase.
 */
bool mutt_mb_is_lower(const char *s)
{
  if (!s)
    return false;

  wchar_t wc = 0;
  mbstate_t mbstate = { 0 };
  size_t l;

  memset(&mbstate, 0, sizeof(mbstate));
  size_t n = mutt_str_len(s);

  for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)
  {
    if ((l == ICONV_BUF_TOO_SMALL) || (l == ICONV_ILLEGAL_SEQ))
      return false; // error; assume upper-case
    if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
      return false; // upper-case
  }

  return true; // lower-case
}

/**
 * mutt_mb_is_display_corrupting_utf8 - Will this character corrupt the display?
 * @param wc Character to examine
 * @retval true  Character would corrupt the display
 * @retval false Character is safe to display
 *
 * @note This list isn't complete.
 */
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
{
  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
      (wc == (wchar_t) 0x200e) || /* left-to-right mark */
      (wc == (wchar_t) 0x200f) || /* right-to-left mark */
      (wc == (wchar_t) 0xfeff))   /* zero width no-break space */
  {
    return true;
  }

  /* left-to-right isolate, right-to-left isolate, first strong isolate,
   * pop directional isolate */
  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
    return true;

  /* left-to-right embedding, right-to-left embedding, pop directional formatting,
   * left-to-right override, right-to-left override */
  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
    return true;

  /* arabic letter mark */
  if (wc == (wchar_t) 0x061c)
    return true;

  return false;
}

/**
 * mutt_mb_filter_unprintable - Replace unprintable characters
 * @param[in,out] s String to modify
 * @retval  0 Success
 * @retval -1 Error
 *
 * Unprintable characters will be replaced with #ReplacementChar.
 *
 * @note The source string will be freed and a newly allocated string will be
 * returned in its place.  The caller should free the returned string.
 */
int mutt_mb_filter_unprintable(char **s)
{
  if (!s || !*s)
    return -1;

  wchar_t wc = 0;
  size_t k, k2;
  char scratch[MB_LEN_MAX + 1];
  char *p = *s;
  mbstate_t mbstate1 = { 0 };
  mbstate_t mbstate2 = { 0 };

  struct Buffer *buf = buf_pool_get();
  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
  {
    if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
    {
      k = 1;
      memset(&mbstate1, 0, sizeof(mbstate1));
      wc = ReplacementChar;
    }
    if (CharsetIsUtf8 && IsBOM(wc))
    {
      continue;
    }
    if (!IsWPrint(wc))
      wc = '?';
    else if (CharsetIsUtf8 && mutt_mb_is_display_corrupting_utf8(wc))
      continue;
    k2 = wcrtomb(scratch, wc, &mbstate2);
    scratch[k2] = '\0';
    buf_addstr(buf, scratch);
  }
  FREE(s);

  if (buf_is_empty(buf))
    *s = MUTT_MEM_CALLOC(1, char); // Fake empty string
  else
    *s = buf_strdup(buf);

  buf_pool_release(&buf);
  return 0;
}

Coverage Report

Created: 2025-03-11 06:49

Line	Count	Source (jump to first uncovered line)
1		/**
2		* @file
3		* Multi-byte String manipulation functions
4		*
5		* @authors
6		* Copyright (C) 2017-2023 Richard Russon <rich@flatcap.org>
7		* Copyright (C) 2019 Pietro Cerutti <gahr@gahr.ch>
8		*
9		* @copyright
10		* This program is free software: you can redistribute it and/or modify it under
11		* the terms of the GNU General Public License as published by the Free Software
12		* Foundation, either version 2 of the License, or (at your option) any later
13		* version.
14		*
15		* This program is distributed in the hope that it will be useful, but WITHOUT
16		* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17		* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18		* details.
19		*
20		* You should have received a copy of the GNU General Public License along with
21		* this program. If not, see <http://www.gnu.org/licenses/>.
22		*/
23
24		/**
25		* @page mutt_mbyte Multi-byte String manipulation functions
26		*
27		* Some commonly-used multi-byte string manipulation routines.
28		*/
29
30		#include "config.h"
31		#include <ctype.h>
32		#include <limits.h>
33		#include <stdbool.h>
34		#include <string.h>
35		#include <wchar.h>
36		#include <wctype.h>
37		#include "mbyte.h"
38		#include "buffer.h"
39		#include "charset.h"
40		#include "memory.h"
41		#include "pool.h"
42		#include "string2.h"
43
44		bool OptLocales; ///< (pseudo) set if user has valid locale definition
45
46		/**
47		* mutt_mb_charlen - Count the bytes in a (multibyte) character
48		* @param[in] s String to be examined
49		* @param[out] width Number of screen columns the character would use
50		* @retval num Bytes in the first (multibyte) character of input consumes
51		* @retval <0 Conversion error
52		* @retval =0 End of input
53		* @retval >0 Length (bytes)
54		*/
55		int mutt_mb_charlen(const char s, int width)
56	67.3k	{
57	67.3k	if (!s \|\| (*s == '\0'))
58	0	return 0;
59
60	67.3k	wchar_t wc = 0;
61	67.3k	mbstate_t mbstate = { 0 };
62
63	67.3k	size_t n = mutt_str_len(s);
64	67.3k	size_t k = mbrtowc(&wc, s, n, &mbstate);
65	67.3k	if (width)
66	0	*width = wcwidth(wc);
67	67.3k	return ((k == ICONV_ILLEGAL_SEQ) \|\| (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
68	67.3k	}
69
70		/**
71		* mutt_mb_get_initials - Turn a name into initials
72		* @param name String to be converted
73		* @param buf Buffer for the result
74		* @param buflen Size of the buffer
75		* @retval 1 Success
76		* @retval 0 Failure
77		*
78		* Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK".
79		* The function saves the first character from each word. Words are delimited
80		* by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").
81		*/
82		bool mutt_mb_get_initials(const char name, char buf, size_t buflen)
83	0	{
84	0	if (!name \|\| !buf)
85	0	return false;
86
87	0	while (*name)
88	0	{
89		/* Char's length in bytes */
90	0	int clen = mutt_mb_charlen(name, NULL);
91	0	if (clen < 1)
92	0	return false;
93
94		/* Ignore punctuation at the beginning of a word */
95	0	if ((clen == 1) && ispunct(*name))
96	0	{
97	0	name++;
98	0	continue;
99	0	}
100
101	0	if (clen >= buflen)
102	0	return false;
103
104		/* Copy one multibyte character */
105	0	buflen -= clen;
106	0	while (clen--)
107	0	buf++ = name++;
108
109		/* Skip to end-of-word */
110	0	for (; *name; name += clen)
111	0	{
112	0	clen = mutt_mb_charlen(name, NULL);
113	0	if (clen < 1)
114	0	return false;
115	0	if ((clen == 1) && (isspace(name) \|\| (name == '-')))
116	0	break;
117	0	}
118
119		/* Skip any whitespace, or hyphens */
120	0	while (name && (isspace(name) \|\| (*name == '-')))
121	0	name++;
122	0	}
123
124	0	*buf = '\0';
125	0	return true;
126	0	}
127
128		/**
129		* mutt_mb_width - Measure a string's display width (in screen columns)
130		* @param str String to measure
131		* @param col Display column (used for expanding tabs)
132		* @param indent If true, newline-space will be indented 8 chars
133		* @retval num String's width in screen columns
134		*
135		* This is like wcwidth(), but gets const char* not wchar_t*.
136		*/
137		int mutt_mb_width(const char *str, int col, bool indent)
138	0	{
139	0	if (!str \|\| !*str)
140	0	return 0;
141
142	0	bool nl = false;
143	0	int total_width = 0;
144	0	mbstate_t mbstate = { 0 };
145
146	0	size_t str_len = mutt_str_len(str);
147
148	0	while (*str && (str_len > 0))
149	0	{
150	0	wchar_t wc = L'\0';
151	0	size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
152	0	if (consumed == 0)
153	0	break;
154
155	0	if (consumed == ICONV_ILLEGAL_SEQ)
156	0	{
157	0	memset(&mbstate, 0, sizeof(mbstate));
158	0	wc = ReplacementChar;
159	0	consumed = 1;
160	0	}
161	0	else if (consumed == ICONV_BUF_TOO_SMALL)
162	0	{
163	0	wc = ReplacementChar;
164	0	consumed = str_len;
165	0	}
166
167	0	int wchar_width = wcwidth(wc);
168	0	if (wchar_width < 0)
169	0	wchar_width = 1;
170
171	0	if ((wc == L'\t') \|\| (nl && (wc == L' ')))
172	0	{
173		/* correctly calc tab stop, even for sending as the line should look
174		* pretty on the receiving end */
175	0	nl = false;
176	0	wchar_width = 8 - (col % 8);
177	0	}
178	0	else if (indent && (wc == '\n'))
179	0	{
180		/* track newlines for display-case: if we have a space after a newline,
181		* assume 8 spaces as for display we always tab-fold */
182	0	nl = true;
183	0	}
184
185	0	total_width += wchar_width;
186	0	str += consumed;
187	0	str_len -= consumed;
188	0	}
189
190	0	return total_width;
191	0	}
192
193		/**
194		* mutt_mb_wcwidth - Measure the screen width of a character
195		* @param wc Character to examine
196		* @retval num Width in screen columns
197		*/
198		int mutt_mb_wcwidth(wchar_t wc)
199	0	{
200	0	int n = wcwidth(wc);
201	0	if (IsWPrint(wc) && (n > 0))
202	0	return n;
203	0	if (!(wc & ~0x7f))
204	0	return 2;
205	0	if (!(wc & ~0xffff))
206	0	return 6;
207	0	return 10;
208	0	}
209
210		/**
211		* mutt_mb_wcswidth - Measure the screen width of a string
212		* @param s String to measure
213		* @param n Length of string in characters
214		* @retval num Width in screen columns
215		*/
216		int mutt_mb_wcswidth(const wchar_t *s, size_t n)
217	0	{
218	0	if (!s)
219	0	return 0;
220
221	0	int w = 0;
222	0	while (n--)
223	0	w += mutt_mb_wcwidth(*s++);
224	0	return w;
225	0	}
226
227		/**
228		* mutt_mb_width_ceiling - Keep the end of the string on-screen
229		* @param s String being displayed
230		* @param n Length of string in characters
231		* @param w1 Width limit
232		* @retval num Chars to skip
233		*
234		* Given a string and a width, determine how many characters from the
235		* beginning of the string should be skipped so that the string fits.
236		*/
237		size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
238	0	{
239	0	if (!s)
240	0	return 0;
241
242	0	const wchar_t *s0 = s;
243	0	int w = 0;
244	0	for (; n; s++, n--)
245	0	if ((w += mutt_mb_wcwidth(*s)) > w1)
246	0	break;
247	0	return s - s0;
248	0	}
249
250		/**
251		* buf_mb_wcstombs - Convert a string from wide to multibyte characters
252		* @param dest Buffer for the result
253		* @param wstr Source wide string to convert
254		* @param wlen Length of the wide string
255		*/
256		void buf_mb_wcstombs(struct Buffer dest, const wchar_t wstr, size_t wlen)
257	0	{
258	0	if (!dest \|\| !wstr)
259	0	return;
260
261		// Give ourselves 4 utf-8 bytes per wide character
262	0	buf_alloc(dest, 4 * wlen);
263
264	0	mbstate_t mbstate = { 0 };
265	0	size_t k = 0;
266
267	0	char *buf = dest->data;
268	0	size_t buflen = dest->dsize;
269
270	0	for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
271	0	{
272	0	k = wcrtomb(buf, *wstr, &mbstate);
273	0	if (k == ICONV_ILLEGAL_SEQ)
274	0	break;
275	0	if (*wstr == L'\0')
276	0	break;
277	0	}
278
279	0	*buf = '\0';
280	0	buf_fix_dptr(dest);
281	0	}
282
283		/**
284		* mutt_mb_mbstowcs - Convert a string from multibyte to wide characters
285		* @param[out] pwbuf Buffer for the result
286		* @param[out] pwbuflen Length of the result buffer
287		* @param[in] i Starting index into the result buffer
288		* @param[in] buf String to convert
289		* @retval num First character after the result
290		*/
291		size_t mutt_mb_mbstowcs(wchar_t *pwbuf, size_t pwbuflen, size_t i, const char *buf)
292	0	{
293	0	if (!pwbuf \|\| !pwbuflen \|\| !buf)
294	0	return 0;
295
296	0	wchar_t wc = 0;
297	0	mbstate_t mbstate = { 0 };
298	0	size_t k;
299	0	wchar_t wbuf = pwbuf;
300	0	size_t wbuflen = *pwbuflen;
301
302	0	while (*buf != '\0')
303	0	{
304	0	memset(&mbstate, 0, sizeof(mbstate));
305	0	for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
306	0	(k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
307	0	buf += k)
308	0	{
309	0	if (i >= wbuflen)
310	0	{
311	0	wbuflen = i + 20;
312	0	MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
313	0	}
314	0	wbuf[i++] = wc;
315	0	}
316	0	if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) \|\| (k == ICONV_BUF_TOO_SMALL)))
317	0	{
318	0	if (i >= wbuflen)
319	0	{
320	0	wbuflen = i + 20;
321	0	MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
322	0	}
323	0	wbuf[i++] = ReplacementChar;
324	0	buf++;
325	0	}
326	0	}
327	0	*pwbuf = wbuf;
328	0	*pwbuflen = wbuflen;
329	0	return i;
330	0	}
331
332		/**
333		* mutt_mb_is_shell_char - Is character not typically part of a pathname
334		* @param ch Character to examine
335		* @retval true Character is not typically part of a pathname
336		* @retval false Character is typically part of a pathname
337		*
338		* @note The name is very confusing.
339		*/
340		bool mutt_mb_is_shell_char(wchar_t ch)
341	0	{
342	0	static const wchar_t shell_chars[] = L"<>&()$?;{}\| "; / ! not included because it can be part of a pathname in NeoMutt */
343	0	return wcschr(shell_chars, ch);
344	0	}
345
346		/**
347		* mutt_mb_is_lower - Does a multi-byte string contain only lowercase characters?
348		* @param s String to check
349		* @retval true String contains no uppercase characters
350		* @retval false Error, or contains some uppercase characters
351		*
352		* Non-alphabetic characters are considered lowercase.
353		*/
354		bool mutt_mb_is_lower(const char *s)
355	48.1k	{
356	48.1k	if (!s)
357	0	return false;
358
359	48.1k	wchar_t wc = 0;
360	48.1k	mbstate_t mbstate = { 0 };
361	48.1k	size_t l;
362
363	48.1k	memset(&mbstate, 0, sizeof(mbstate));
364	48.1k	size_t n = mutt_str_len(s);
365
366	865k	for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)
367	827k	{
368	827k	if ((l == ICONV_BUF_TOO_SMALL) \|\| (l == ICONV_ILLEGAL_SEQ))
369	0	return false; // error; assume upper-case
370	827k	if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
371	9.62k	return false; // upper-case
372	827k	}
373
374	38.4k	return true; // lower-case
375	48.1k	}
376
377		/**
378		* mutt_mb_is_display_corrupting_utf8 - Will this character corrupt the display?
379		* @param wc Character to examine
380		* @retval true Character would corrupt the display
381		* @retval false Character is safe to display
382		*
383		* @note This list isn't complete.
384		*/
385		bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
386	0	{
387	0	if ((wc == (wchar_t) 0x00ad) \|\| /* soft hyphen */
388	0	(wc == (wchar_t) 0x200e) \|\| /* left-to-right mark */
389	0	(wc == (wchar_t) 0x200f) \|\| /* right-to-left mark */
390	0	(wc == (wchar_t) 0xfeff)) /* zero width no-break space */
391	0	{
392	0	return true;
393	0	}
394
395		/* left-to-right isolate, right-to-left isolate, first strong isolate,
396		* pop directional isolate */
397	0	if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
398	0	return true;
399
400		/* left-to-right embedding, right-to-left embedding, pop directional formatting,
401		* left-to-right override, right-to-left override */
402	0	if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
403	0	return true;
404
405		/* arabic letter mark */
406	0	if (wc == (wchar_t) 0x061c)
407	0	return true;
408
409	0	return false;
410	0	}
411
412		/**
413		* mutt_mb_filter_unprintable - Replace unprintable characters
414		* @param[in,out] s String to modify
415		* @retval 0 Success
416		* @retval -1 Error
417		*
418		* Unprintable characters will be replaced with #ReplacementChar.
419		*
420		* @note The source string will be freed and a newly allocated string will be
421		* returned in its place. The caller should free the returned string.
422		*/
423		int mutt_mb_filter_unprintable(char **s)
424	6.43k	{
425	6.43k	if (!s \|\| !*s)
426	0	return -1;
427
428	6.43k	wchar_t wc = 0;
429	6.43k	size_t k, k2;
430	6.43k	char scratch[MB_LEN_MAX + 1];
431	6.43k	char p = s;
432	6.43k	mbstate_t mbstate1 = { 0 };
433	6.43k	mbstate_t mbstate2 = { 0 };
434
435	6.43k	struct Buffer *buf = buf_pool_get();
436	747k	for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
437	741k	{
438	741k	if ((k == ICONV_ILLEGAL_SEQ) \|\| (k == ICONV_BUF_TOO_SMALL))
439	284k	{
440	284k	k = 1;
441	284k	memset(&mbstate1, 0, sizeof(mbstate1));
442	284k	wc = ReplacementChar;
443	284k	}
444	741k	if (CharsetIsUtf8 && IsBOM(wc))
445	0	{
446	0	continue;
447	0	}
448	741k	if (!IsWPrint(wc))
449	10.7k	wc = '?';
450	730k	else if (CharsetIsUtf8 && mutt_mb_is_display_corrupting_utf8(wc))
451	0	continue;
452	741k	k2 = wcrtomb(scratch, wc, &mbstate2);
453	741k	scratch[k2] = '\0';
454	741k	buf_addstr(buf, scratch);
455	741k	}
456	6.43k	FREE(s);
457
458	6.43k	if (buf_is_empty(buf))
459	2.43k	*s = MUTT_MEM_CALLOC(1, char); // Fake empty string
460	4.00k	else
461	4.00k	*s = buf_strdup(buf);
462
463	6.43k	buf_pool_release(&buf);
464	6.43k	return 0;
465	6.43k	}