Coverage Report

Created: 2026-03-12 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext/gettext-tools/libgettextpo/unilbrk/u8-width-linebreaks.c
Line
Count
Source
1
/* Line breaking of UTF-8 strings.
2
   Copyright (C) 2001-2003, 2006-2026 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation, either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "unilbrk.h"
30
#include "unilbrk/internal.h"
31
32
#include "unilbrk/lbrktables.h"
33
#include "unistr.h"
34
#include "uniwidth.h"
35
36
int
37
u8_width_linebreaks_internal (const uint8_t *s, size_t n,
38
                              int width, int start_column, int at_end_columns,
39
                              const char *o, const char *encoding, int cr,
40
                              char *p)
41
0
{
42
0
  u8_possible_linebreaks_loop (s, n, encoding, cr, p);
43
44
0
  const uint8_t *s_end = s + n;
45
0
  char *last_p = NULL;
46
0
  int last_column = start_column;
47
0
  int piece_width = 0;
48
0
  while (s < s_end)
49
0
    {
50
0
      ucs4_t uc;
51
0
      int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
52
53
      /* Respect the override.  */
54
0
      if (o != NULL && *o != UC_BREAK_UNDEFINED)
55
0
        *p = *o;
56
57
0
      if (*p == UC_BREAK_POSSIBLE
58
0
          || *p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
59
0
        {
60
          /* An atomic piece of text ends here.  */
61
0
          if (last_p != NULL && last_column + piece_width > width)
62
0
            {
63
              /* Insert a line break.  */
64
0
              *last_p = UC_BREAK_POSSIBLE;
65
0
              last_column = 0;
66
0
            }
67
0
        }
68
69
0
      if (*p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
70
0
        {
71
          /* uc is a line break character.  */
72
          /* Start a new piece at column 0.  */
73
0
          last_p = NULL;
74
0
          last_column = 0;
75
0
          piece_width = 0;
76
0
        }
77
0
      else
78
0
        {
79
          /* uc is not a line break character.  */
80
0
          if (*p == UC_BREAK_POSSIBLE)
81
0
            {
82
              /* Start a new piece.  */
83
0
              last_p = p;
84
0
              last_column += piece_width;
85
0
              piece_width = 0;
86
              /* No line break for the moment, may be turned into
87
                 UC_BREAK_POSSIBLE later, via last_p. */
88
0
            }
89
90
0
          *p = UC_BREAK_PROHIBITED;
91
92
0
          int w = uc_width (uc, encoding);
93
0
          if (w >= 0) /* ignore control characters in the string */
94
0
            piece_width += w;
95
0
        }
96
97
0
      s += count;
98
0
      p += count;
99
0
      if (o != NULL)
100
0
        o += count;
101
0
    }
102
103
  /* The last atomic piece of text ends here.  */
104
0
  if (last_p != NULL && last_column + piece_width + at_end_columns > width)
105
0
    {
106
      /* Insert a line break.  */
107
0
      *last_p = UC_BREAK_POSSIBLE;
108
0
      last_column = 0;
109
0
    }
110
111
0
  return last_column + piece_width;
112
0
}
113
114
#if defined IN_LIBUNISTRING
115
/* For backward compatibility with older versions of libunistring.  */
116
117
# undef u8_width_linebreaks
118
119
int
120
u8_width_linebreaks (const uint8_t *s, size_t n,
121
                     int width, int start_column, int at_end_columns,
122
                     const char *o, const char *encoding,
123
                     char *p)
124
{
125
  return u8_width_linebreaks_internal (s, n,
126
                                       width, start_column, at_end_columns,
127
                                       o, encoding, -1, p);
128
}
129
130
#endif
131
132
int
133
u8_width_linebreaks_v2 (const uint8_t *s, size_t n,
134
                        int width, int start_column, int at_end_columns,
135
                        const char *o, const char *encoding,
136
                        char *p)
137
0
{
138
0
  return u8_width_linebreaks_internal (s, n,
139
0
                                       width, start_column, at_end_columns,
140
0
                                       o, encoding, LBP_CR, p);
141
0
}
142
143
144
#ifdef TEST
145
146
#include <stdio.h>
147
#include <stdlib.h>
148
#include <string.h>
149
150
/* Read the contents of an input stream, and return it, terminated with a NUL
151
   byte. */
152
char *
153
read_file (FILE *stream)
154
{
155
#define BUFSIZE 4096
156
  char *buf = NULL;
157
  int alloc = 0;
158
  int size = 0;
159
160
  while (! feof (stream))
161
    {
162
      if (size + BUFSIZE > alloc)
163
        {
164
          alloc = alloc + alloc / 2;
165
          if (alloc < size + BUFSIZE)
166
            alloc = size + BUFSIZE;
167
          buf = realloc (buf, alloc);
168
          if (buf == NULL)
169
            {
170
              fprintf (stderr, "out of memory\n");
171
              exit (1);
172
            }
173
        }
174
      int count = fread (buf + size, 1, BUFSIZE, stream);
175
      if (count == 0)
176
        {
177
          if (ferror (stream))
178
            {
179
              perror ("fread");
180
              exit (1);
181
            }
182
        }
183
      else
184
        size += count;
185
    }
186
  buf = realloc (buf, size + 1);
187
  if (buf == NULL)
188
    {
189
      fprintf (stderr, "out of memory\n");
190
      exit (1);
191
    }
192
  buf[size] = '\0';
193
  return buf;
194
#undef BUFSIZE
195
}
196
197
int
198
main (int argc, char * argv[])
199
{
200
  if (argc == 2)
201
    {
202
      /* Insert line breaks for a given width.  */
203
      int width = atoi (argv[1]);
204
      char *input = read_file (stdin);
205
      int length = strlen (input);
206
      char *breaks = malloc (length);
207
208
      u8_width_linebreaks_v2 ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
209
210
      for (int i = 0; i < length; i++)
211
        {
212
          switch (breaks[i])
213
            {
214
            case UC_BREAK_POSSIBLE:
215
              putc ('\n', stdout);
216
              break;
217
            case UC_BREAK_MANDATORY:
218
              break;
219
            case UC_BREAK_CR_BEFORE_LF:
220
              break;
221
            case UC_BREAK_PROHIBITED:
222
              break;
223
            default:
224
              abort ();
225
            }
226
          putc (input[i], stdout);
227
        }
228
229
      free (breaks);
230
231
      return 0;
232
    }
233
  else
234
    return 1;
235
}
236
237
#endif /* TEST */