/src/gettext-0.26/gettext-tools/libgettextpo/unilbrk/u8-width-linebreaks.c
Line | Count | Source |
1 | | /* Line breaking of UTF-8 strings. |
2 | | Copyright (C) 2001-2003, 2006-2025 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2001. |
4 | | |
5 | | This file is free software. |
6 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
7 | | You can redistribute it and/or modify it under either |
8 | | - the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 3, or (at your |
10 | | option) any later version, or |
11 | | - the terms of the GNU General Public License as published by the |
12 | | Free Software Foundation; either version 2, or (at your option) |
13 | | any later version, or |
14 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
15 | | |
16 | | This file is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | | Lesser General Public License and the GNU General Public License |
20 | | for more details. |
21 | | |
22 | | You should have received a copy of the GNU Lesser General Public |
23 | | License and of the GNU General Public License along with this |
24 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
25 | | |
26 | | #include <config.h> |
27 | | |
28 | | /* Specification. */ |
29 | | #include "unilbrk.h" |
30 | | #include "unilbrk/internal.h" |
31 | | |
32 | | #include "unilbrk/lbrktables.h" |
33 | | #include "unistr.h" |
34 | | #include "uniwidth.h" |
35 | | |
36 | | int |
37 | | u8_width_linebreaks_internal (const uint8_t *s, size_t n, |
38 | | int width, int start_column, int at_end_columns, |
39 | | const char *o, const char *encoding, int cr, |
40 | | char *p) |
41 | 0 | { |
42 | 0 | const uint8_t *s_end; |
43 | 0 | char *last_p; |
44 | 0 | int last_column; |
45 | 0 | int piece_width; |
46 | |
|
47 | 0 | u8_possible_linebreaks_loop (s, n, encoding, cr, p); |
48 | |
|
49 | 0 | s_end = s + n; |
50 | 0 | last_p = NULL; |
51 | 0 | last_column = start_column; |
52 | 0 | piece_width = 0; |
53 | 0 | while (s < s_end) |
54 | 0 | { |
55 | 0 | ucs4_t uc; |
56 | 0 | int count = u8_mbtouc_unsafe (&uc, s, s_end - s); |
57 | | |
58 | | /* Respect the override. */ |
59 | 0 | if (o != NULL && *o != UC_BREAK_UNDEFINED) |
60 | 0 | *p = *o; |
61 | |
|
62 | 0 | if (*p == UC_BREAK_POSSIBLE |
63 | 0 | || *p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF) |
64 | 0 | { |
65 | | /* An atomic piece of text ends here. */ |
66 | 0 | if (last_p != NULL && last_column + piece_width > width) |
67 | 0 | { |
68 | | /* Insert a line break. */ |
69 | 0 | *last_p = UC_BREAK_POSSIBLE; |
70 | 0 | last_column = 0; |
71 | 0 | } |
72 | 0 | } |
73 | |
|
74 | 0 | if (*p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF) |
75 | 0 | { |
76 | | /* uc is a line break character. */ |
77 | | /* Start a new piece at column 0. */ |
78 | 0 | last_p = NULL; |
79 | 0 | last_column = 0; |
80 | 0 | piece_width = 0; |
81 | 0 | } |
82 | 0 | else |
83 | 0 | { |
84 | | /* uc is not a line break character. */ |
85 | 0 | int w; |
86 | |
|
87 | 0 | if (*p == UC_BREAK_POSSIBLE) |
88 | 0 | { |
89 | | /* Start a new piece. */ |
90 | 0 | last_p = p; |
91 | 0 | last_column += piece_width; |
92 | 0 | piece_width = 0; |
93 | | /* No line break for the moment, may be turned into |
94 | | UC_BREAK_POSSIBLE later, via last_p. */ |
95 | 0 | } |
96 | |
|
97 | 0 | *p = UC_BREAK_PROHIBITED; |
98 | |
|
99 | 0 | w = uc_width (uc, encoding); |
100 | 0 | if (w >= 0) /* ignore control characters in the string */ |
101 | 0 | piece_width += w; |
102 | 0 | } |
103 | |
|
104 | 0 | s += count; |
105 | 0 | p += count; |
106 | 0 | if (o != NULL) |
107 | 0 | o += count; |
108 | 0 | } |
109 | | |
110 | | /* The last atomic piece of text ends here. */ |
111 | 0 | if (last_p != NULL && last_column + piece_width + at_end_columns > width) |
112 | 0 | { |
113 | | /* Insert a line break. */ |
114 | 0 | *last_p = UC_BREAK_POSSIBLE; |
115 | 0 | last_column = 0; |
116 | 0 | } |
117 | |
|
118 | 0 | return last_column + piece_width; |
119 | 0 | } |
120 | | |
121 | | #if defined IN_LIBUNISTRING |
122 | | /* For backward compatibility with older versions of libunistring. */ |
123 | | |
124 | | # undef u8_width_linebreaks |
125 | | |
126 | | int |
127 | | u8_width_linebreaks (const uint8_t *s, size_t n, |
128 | | int width, int start_column, int at_end_columns, |
129 | | const char *o, const char *encoding, |
130 | | char *p) |
131 | | { |
132 | | return u8_width_linebreaks_internal (s, n, |
133 | | width, start_column, at_end_columns, |
134 | | o, encoding, -1, p); |
135 | | } |
136 | | |
137 | | #endif |
138 | | |
139 | | int |
140 | | u8_width_linebreaks_v2 (const uint8_t *s, size_t n, |
141 | | int width, int start_column, int at_end_columns, |
142 | | const char *o, const char *encoding, |
143 | | char *p) |
144 | 0 | { |
145 | 0 | return u8_width_linebreaks_internal (s, n, |
146 | 0 | width, start_column, at_end_columns, |
147 | 0 | o, encoding, LBP_CR, p); |
148 | 0 | } |
149 | | |
150 | | |
151 | | #ifdef TEST |
152 | | |
153 | | #include <stdio.h> |
154 | | #include <stdlib.h> |
155 | | #include <string.h> |
156 | | |
157 | | /* Read the contents of an input stream, and return it, terminated with a NUL |
158 | | byte. */ |
159 | | char * |
160 | | read_file (FILE *stream) |
161 | | { |
162 | | #define BUFSIZE 4096 |
163 | | char *buf = NULL; |
164 | | int alloc = 0; |
165 | | int size = 0; |
166 | | int count; |
167 | | |
168 | | while (! feof (stream)) |
169 | | { |
170 | | if (size + BUFSIZE > alloc) |
171 | | { |
172 | | alloc = alloc + alloc / 2; |
173 | | if (alloc < size + BUFSIZE) |
174 | | alloc = size + BUFSIZE; |
175 | | buf = realloc (buf, alloc); |
176 | | if (buf == NULL) |
177 | | { |
178 | | fprintf (stderr, "out of memory\n"); |
179 | | exit (1); |
180 | | } |
181 | | } |
182 | | count = fread (buf + size, 1, BUFSIZE, stream); |
183 | | if (count == 0) |
184 | | { |
185 | | if (ferror (stream)) |
186 | | { |
187 | | perror ("fread"); |
188 | | exit (1); |
189 | | } |
190 | | } |
191 | | else |
192 | | size += count; |
193 | | } |
194 | | buf = realloc (buf, size + 1); |
195 | | if (buf == NULL) |
196 | | { |
197 | | fprintf (stderr, "out of memory\n"); |
198 | | exit (1); |
199 | | } |
200 | | buf[size] = '\0'; |
201 | | return buf; |
202 | | #undef BUFSIZE |
203 | | } |
204 | | |
205 | | int |
206 | | main (int argc, char * argv[]) |
207 | | { |
208 | | if (argc == 2) |
209 | | { |
210 | | /* Insert line breaks for a given width. */ |
211 | | int width = atoi (argv[1]); |
212 | | char *input = read_file (stdin); |
213 | | int length = strlen (input); |
214 | | char *breaks = malloc (length); |
215 | | int i; |
216 | | |
217 | | u8_width_linebreaks_v2 ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks); |
218 | | |
219 | | for (i = 0; i < length; i++) |
220 | | { |
221 | | switch (breaks[i]) |
222 | | { |
223 | | case UC_BREAK_POSSIBLE: |
224 | | putc ('\n', stdout); |
225 | | break; |
226 | | case UC_BREAK_MANDATORY: |
227 | | break; |
228 | | case UC_BREAK_CR_BEFORE_LF: |
229 | | break; |
230 | | case UC_BREAK_PROHIBITED: |
231 | | break; |
232 | | default: |
233 | | abort (); |
234 | | } |
235 | | putc (input[i], stdout); |
236 | | } |
237 | | |
238 | | free (breaks); |
239 | | |
240 | | return 0; |
241 | | } |
242 | | else |
243 | | return 1; |
244 | | } |
245 | | |
246 | | #endif /* TEST */ |