/src/gettext/gettext-tools/libgettextpo/unilbrk/u8-width-linebreaks.c
Line | Count | Source |
1 | | /* Line breaking of UTF-8 strings. |
2 | | Copyright (C) 2001-2003, 2006-2026 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2001. |
4 | | |
5 | | This file is free software. |
6 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
7 | | You can redistribute it and/or modify it under either |
8 | | - the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 3, or (at your |
10 | | option) any later version, or |
11 | | - the terms of the GNU General Public License as published by the |
12 | | Free Software Foundation; either version 2, or (at your option) |
13 | | any later version, or |
14 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
15 | | |
16 | | This file is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | | Lesser General Public License and the GNU General Public License |
20 | | for more details. |
21 | | |
22 | | You should have received a copy of the GNU Lesser General Public |
23 | | License and of the GNU General Public License along with this |
24 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
25 | | |
26 | | #include <config.h> |
27 | | |
28 | | /* Specification. */ |
29 | | #include "unilbrk.h" |
30 | | #include "unilbrk/internal.h" |
31 | | |
32 | | #include "unilbrk/lbrktables.h" |
33 | | #include "unistr.h" |
34 | | #include "uniwidth.h" |
35 | | |
36 | | int |
37 | | u8_width_linebreaks_internal (const uint8_t *s, size_t n, |
38 | | int width, int start_column, int at_end_columns, |
39 | | const char *o, const char *encoding, int cr, |
40 | | char *p) |
41 | 0 | { |
42 | 0 | u8_possible_linebreaks_loop (s, n, encoding, cr, p); |
43 | |
|
44 | 0 | const uint8_t *s_end = s + n; |
45 | 0 | char *last_p = NULL; |
46 | 0 | int last_column = start_column; |
47 | 0 | int piece_width = 0; |
48 | 0 | while (s < s_end) |
49 | 0 | { |
50 | 0 | ucs4_t uc; |
51 | 0 | int count = u8_mbtouc_unsafe (&uc, s, s_end - s); |
52 | | |
53 | | /* Respect the override. */ |
54 | 0 | if (o != NULL && *o != UC_BREAK_UNDEFINED) |
55 | 0 | *p = *o; |
56 | |
|
57 | 0 | if (*p == UC_BREAK_POSSIBLE |
58 | 0 | || *p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF) |
59 | 0 | { |
60 | | /* An atomic piece of text ends here. */ |
61 | 0 | if (last_p != NULL && last_column + piece_width > width) |
62 | 0 | { |
63 | | /* Insert a line break. */ |
64 | 0 | *last_p = UC_BREAK_POSSIBLE; |
65 | 0 | last_column = 0; |
66 | 0 | } |
67 | 0 | } |
68 | |
|
69 | 0 | if (*p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF) |
70 | 0 | { |
71 | | /* uc is a line break character. */ |
72 | | /* Start a new piece at column 0. */ |
73 | 0 | last_p = NULL; |
74 | 0 | last_column = 0; |
75 | 0 | piece_width = 0; |
76 | 0 | } |
77 | 0 | else |
78 | 0 | { |
79 | | /* uc is not a line break character. */ |
80 | 0 | if (*p == UC_BREAK_POSSIBLE) |
81 | 0 | { |
82 | | /* Start a new piece. */ |
83 | 0 | last_p = p; |
84 | 0 | last_column += piece_width; |
85 | 0 | piece_width = 0; |
86 | | /* No line break for the moment, may be turned into |
87 | | UC_BREAK_POSSIBLE later, via last_p. */ |
88 | 0 | } |
89 | |
|
90 | 0 | *p = UC_BREAK_PROHIBITED; |
91 | |
|
92 | 0 | int w = uc_width (uc, encoding); |
93 | 0 | if (w >= 0) /* ignore control characters in the string */ |
94 | 0 | piece_width += w; |
95 | 0 | } |
96 | |
|
97 | 0 | s += count; |
98 | 0 | p += count; |
99 | 0 | if (o != NULL) |
100 | 0 | o += count; |
101 | 0 | } |
102 | | |
103 | | /* The last atomic piece of text ends here. */ |
104 | 0 | if (last_p != NULL && last_column + piece_width + at_end_columns > width) |
105 | 0 | { |
106 | | /* Insert a line break. */ |
107 | 0 | *last_p = UC_BREAK_POSSIBLE; |
108 | 0 | last_column = 0; |
109 | 0 | } |
110 | |
|
111 | 0 | return last_column + piece_width; |
112 | 0 | } |
113 | | |
114 | | #if defined IN_LIBUNISTRING |
115 | | /* For backward compatibility with older versions of libunistring. */ |
116 | | |
117 | | # undef u8_width_linebreaks |
118 | | |
119 | | int |
120 | | u8_width_linebreaks (const uint8_t *s, size_t n, |
121 | | int width, int start_column, int at_end_columns, |
122 | | const char *o, const char *encoding, |
123 | | char *p) |
124 | | { |
125 | | return u8_width_linebreaks_internal (s, n, |
126 | | width, start_column, at_end_columns, |
127 | | o, encoding, -1, p); |
128 | | } |
129 | | |
130 | | #endif |
131 | | |
132 | | int |
133 | | u8_width_linebreaks_v2 (const uint8_t *s, size_t n, |
134 | | int width, int start_column, int at_end_columns, |
135 | | const char *o, const char *encoding, |
136 | | char *p) |
137 | 0 | { |
138 | 0 | return u8_width_linebreaks_internal (s, n, |
139 | 0 | width, start_column, at_end_columns, |
140 | 0 | o, encoding, LBP_CR, p); |
141 | 0 | } |
142 | | |
143 | | |
144 | | #ifdef TEST |
145 | | |
146 | | #include <stdio.h> |
147 | | #include <stdlib.h> |
148 | | #include <string.h> |
149 | | |
150 | | /* Read the contents of an input stream, and return it, terminated with a NUL |
151 | | byte. */ |
152 | | char * |
153 | | read_file (FILE *stream) |
154 | | { |
155 | | #define BUFSIZE 4096 |
156 | | char *buf = NULL; |
157 | | int alloc = 0; |
158 | | int size = 0; |
159 | | |
160 | | while (! feof (stream)) |
161 | | { |
162 | | if (size + BUFSIZE > alloc) |
163 | | { |
164 | | alloc = alloc + alloc / 2; |
165 | | if (alloc < size + BUFSIZE) |
166 | | alloc = size + BUFSIZE; |
167 | | buf = realloc (buf, alloc); |
168 | | if (buf == NULL) |
169 | | { |
170 | | fprintf (stderr, "out of memory\n"); |
171 | | exit (1); |
172 | | } |
173 | | } |
174 | | int count = fread (buf + size, 1, BUFSIZE, stream); |
175 | | if (count == 0) |
176 | | { |
177 | | if (ferror (stream)) |
178 | | { |
179 | | perror ("fread"); |
180 | | exit (1); |
181 | | } |
182 | | } |
183 | | else |
184 | | size += count; |
185 | | } |
186 | | buf = realloc (buf, size + 1); |
187 | | if (buf == NULL) |
188 | | { |
189 | | fprintf (stderr, "out of memory\n"); |
190 | | exit (1); |
191 | | } |
192 | | buf[size] = '\0'; |
193 | | return buf; |
194 | | #undef BUFSIZE |
195 | | } |
196 | | |
197 | | int |
198 | | main (int argc, char * argv[]) |
199 | | { |
200 | | if (argc == 2) |
201 | | { |
202 | | /* Insert line breaks for a given width. */ |
203 | | int width = atoi (argv[1]); |
204 | | char *input = read_file (stdin); |
205 | | int length = strlen (input); |
206 | | char *breaks = malloc (length); |
207 | | |
208 | | u8_width_linebreaks_v2 ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks); |
209 | | |
210 | | for (int i = 0; i < length; i++) |
211 | | { |
212 | | switch (breaks[i]) |
213 | | { |
214 | | case UC_BREAK_POSSIBLE: |
215 | | putc ('\n', stdout); |
216 | | break; |
217 | | case UC_BREAK_MANDATORY: |
218 | | break; |
219 | | case UC_BREAK_CR_BEFORE_LF: |
220 | | break; |
221 | | case UC_BREAK_PROHIBITED: |
222 | | break; |
223 | | default: |
224 | | abort (); |
225 | | } |
226 | | putc (input[i], stdout); |
227 | | } |
228 | | |
229 | | free (breaks); |
230 | | |
231 | | return 0; |
232 | | } |
233 | | else |
234 | | return 1; |
235 | | } |
236 | | |
237 | | #endif /* TEST */ |