/rust/registry/src/index.crates.io-6f17d22bba15001f/encoding_c-0.9.8/src/lib.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | #![doc(html_root_url = "https://docs.rs/encoding_c/0.9.7")] |
11 | | |
12 | | //! The C API for encoding_rs. |
13 | | //! |
14 | | //! # Mapping from Rust |
15 | | //! |
16 | | //! ## Naming convention |
17 | | //! |
18 | | //! The wrapper function for each method has a name that starts with the name |
19 | | //! of the struct lower-cased, followed by an underscore and ends with the |
20 | | //! name of the method. |
21 | | //! |
22 | | //! For example, `Encoding::for_label()` is wrapped as `encoding_for_label()`. |
23 | | //! |
24 | | //! ## Arguments |
25 | | //! |
26 | | //! Functions that wrap non-static methods take the `self` object as their |
27 | | //! first argument. |
28 | | //! |
29 | | //! Slice argument `foo` is decomposed into a pointer `foo` and a length |
30 | | //! `foo_len`. |
31 | | //! |
32 | | //! ## Return values |
33 | | //! |
34 | | //! Multiple return values become out-params. When an out-param is |
35 | | //! length-related, `foo_len` for a slice becomes a pointer in order to become |
36 | | //! an in/out-param. |
37 | | //! |
38 | | //! `DecoderResult`, `EncoderResult` and `CoderResult` become `uint32_t`. |
39 | | //! `InputEmpty` becomes `INPUT_EMPTY`. `OutputFull` becomes `OUTPUT_FULL`. |
40 | | //! `Unmappable` becomes the scalar value of the unmappable character. |
41 | | //! `Malformed` becomes a number whose lowest 8 bits, which can have the decimal |
42 | | //! value 0, 1, 2 or 3, indicate the number of bytes that were consumed after |
43 | | //! the malformed sequence and whose next-lowest 8 bits, when shifted right by |
44 | | //! 8 indicate the length of the malformed byte sequence (possible decimal |
45 | | //! values 1, 2, 3 or 4). The maximum possible sum of the two is 6. |
46 | | |
47 | | extern crate encoding_rs; |
48 | | |
49 | | use encoding_rs::*; |
50 | | |
51 | | /// Return value for `*_decode_*` and `*_encode_*` functions that indicates that |
52 | | /// the input has been exhausted. |
53 | | /// |
54 | | /// (This is zero as a micro optimization. U+0000 is never unmappable and |
55 | | /// malformed sequences always have a positive length.) |
56 | | pub const INPUT_EMPTY: u32 = 0; |
57 | | |
58 | | /// Return value for `*_decode_*` and `*_encode_*` functions that indicates that |
59 | | /// the output space has been exhausted. |
60 | | pub const OUTPUT_FULL: u32 = 0xFFFFFFFF; |
61 | | |
62 | | /// Newtype for `*const Encoding` in order to be able to implement `Sync` for |
63 | | /// it. |
64 | | pub struct ConstEncoding(*const Encoding); |
65 | | |
66 | | /// Required for `static` fields. |
67 | | unsafe impl Sync for ConstEncoding {} |
68 | | |
69 | | // BEGIN GENERATED CODE. PLEASE DO NOT EDIT. |
70 | | // Instead, please regenerate using generate-encoding-data.py |
71 | | |
72 | | /// The minimum length of buffers that may be passed to `encoding_name()`. |
73 | | pub const ENCODING_NAME_MAX_LENGTH: usize = 14; // x-mac-cyrillic |
74 | | |
75 | | /// The Big5 encoding. |
76 | | #[no_mangle] |
77 | | pub static BIG5_ENCODING: ConstEncoding = ConstEncoding(&BIG5_INIT); |
78 | | |
79 | | /// The EUC-JP encoding. |
80 | | #[no_mangle] |
81 | | pub static EUC_JP_ENCODING: ConstEncoding = ConstEncoding(&EUC_JP_INIT); |
82 | | |
83 | | /// The EUC-KR encoding. |
84 | | #[no_mangle] |
85 | | pub static EUC_KR_ENCODING: ConstEncoding = ConstEncoding(&EUC_KR_INIT); |
86 | | |
87 | | /// The GBK encoding. |
88 | | #[no_mangle] |
89 | | pub static GBK_ENCODING: ConstEncoding = ConstEncoding(&GBK_INIT); |
90 | | |
91 | | /// The IBM866 encoding. |
92 | | #[no_mangle] |
93 | | pub static IBM866_ENCODING: ConstEncoding = ConstEncoding(&IBM866_INIT); |
94 | | |
95 | | /// The ISO-2022-JP encoding. |
96 | | #[no_mangle] |
97 | | pub static ISO_2022_JP_ENCODING: ConstEncoding = ConstEncoding(&ISO_2022_JP_INIT); |
98 | | |
99 | | /// The ISO-8859-10 encoding. |
100 | | #[no_mangle] |
101 | | pub static ISO_8859_10_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_10_INIT); |
102 | | |
103 | | /// The ISO-8859-13 encoding. |
104 | | #[no_mangle] |
105 | | pub static ISO_8859_13_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_13_INIT); |
106 | | |
107 | | /// The ISO-8859-14 encoding. |
108 | | #[no_mangle] |
109 | | pub static ISO_8859_14_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_14_INIT); |
110 | | |
111 | | /// The ISO-8859-15 encoding. |
112 | | #[no_mangle] |
113 | | pub static ISO_8859_15_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_15_INIT); |
114 | | |
115 | | /// The ISO-8859-16 encoding. |
116 | | #[no_mangle] |
117 | | pub static ISO_8859_16_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_16_INIT); |
118 | | |
119 | | /// The ISO-8859-2 encoding. |
120 | | #[no_mangle] |
121 | | pub static ISO_8859_2_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_2_INIT); |
122 | | |
123 | | /// The ISO-8859-3 encoding. |
124 | | #[no_mangle] |
125 | | pub static ISO_8859_3_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_3_INIT); |
126 | | |
127 | | /// The ISO-8859-4 encoding. |
128 | | #[no_mangle] |
129 | | pub static ISO_8859_4_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_4_INIT); |
130 | | |
131 | | /// The ISO-8859-5 encoding. |
132 | | #[no_mangle] |
133 | | pub static ISO_8859_5_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_5_INIT); |
134 | | |
135 | | /// The ISO-8859-6 encoding. |
136 | | #[no_mangle] |
137 | | pub static ISO_8859_6_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_6_INIT); |
138 | | |
139 | | /// The ISO-8859-7 encoding. |
140 | | #[no_mangle] |
141 | | pub static ISO_8859_7_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_7_INIT); |
142 | | |
143 | | /// The ISO-8859-8 encoding. |
144 | | #[no_mangle] |
145 | | pub static ISO_8859_8_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_8_INIT); |
146 | | |
147 | | /// The ISO-8859-8-I encoding. |
148 | | #[no_mangle] |
149 | | pub static ISO_8859_8_I_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_8_I_INIT); |
150 | | |
151 | | /// The KOI8-R encoding. |
152 | | #[no_mangle] |
153 | | pub static KOI8_R_ENCODING: ConstEncoding = ConstEncoding(&KOI8_R_INIT); |
154 | | |
155 | | /// The KOI8-U encoding. |
156 | | #[no_mangle] |
157 | | pub static KOI8_U_ENCODING: ConstEncoding = ConstEncoding(&KOI8_U_INIT); |
158 | | |
159 | | /// The Shift_JIS encoding. |
160 | | #[no_mangle] |
161 | | pub static SHIFT_JIS_ENCODING: ConstEncoding = ConstEncoding(&SHIFT_JIS_INIT); |
162 | | |
163 | | /// The UTF-16BE encoding. |
164 | | #[no_mangle] |
165 | | pub static UTF_16BE_ENCODING: ConstEncoding = ConstEncoding(&UTF_16BE_INIT); |
166 | | |
167 | | /// The UTF-16LE encoding. |
168 | | #[no_mangle] |
169 | | pub static UTF_16LE_ENCODING: ConstEncoding = ConstEncoding(&UTF_16LE_INIT); |
170 | | |
171 | | /// The UTF-8 encoding. |
172 | | #[no_mangle] |
173 | | pub static UTF_8_ENCODING: ConstEncoding = ConstEncoding(&UTF_8_INIT); |
174 | | |
175 | | /// The gb18030 encoding. |
176 | | #[no_mangle] |
177 | | pub static GB18030_ENCODING: ConstEncoding = ConstEncoding(&GB18030_INIT); |
178 | | |
179 | | /// The macintosh encoding. |
180 | | #[no_mangle] |
181 | | pub static MACINTOSH_ENCODING: ConstEncoding = ConstEncoding(&MACINTOSH_INIT); |
182 | | |
183 | | /// The replacement encoding. |
184 | | #[no_mangle] |
185 | | pub static REPLACEMENT_ENCODING: ConstEncoding = ConstEncoding(&REPLACEMENT_INIT); |
186 | | |
187 | | /// The windows-1250 encoding. |
188 | | #[no_mangle] |
189 | | pub static WINDOWS_1250_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1250_INIT); |
190 | | |
191 | | /// The windows-1251 encoding. |
192 | | #[no_mangle] |
193 | | pub static WINDOWS_1251_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1251_INIT); |
194 | | |
195 | | /// The windows-1252 encoding. |
196 | | #[no_mangle] |
197 | | pub static WINDOWS_1252_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1252_INIT); |
198 | | |
199 | | /// The windows-1253 encoding. |
200 | | #[no_mangle] |
201 | | pub static WINDOWS_1253_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1253_INIT); |
202 | | |
203 | | /// The windows-1254 encoding. |
204 | | #[no_mangle] |
205 | | pub static WINDOWS_1254_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1254_INIT); |
206 | | |
207 | | /// The windows-1255 encoding. |
208 | | #[no_mangle] |
209 | | pub static WINDOWS_1255_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1255_INIT); |
210 | | |
211 | | /// The windows-1256 encoding. |
212 | | #[no_mangle] |
213 | | pub static WINDOWS_1256_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1256_INIT); |
214 | | |
215 | | /// The windows-1257 encoding. |
216 | | #[no_mangle] |
217 | | pub static WINDOWS_1257_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1257_INIT); |
218 | | |
219 | | /// The windows-1258 encoding. |
220 | | #[no_mangle] |
221 | | pub static WINDOWS_1258_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1258_INIT); |
222 | | |
223 | | /// The windows-874 encoding. |
224 | | #[no_mangle] |
225 | | pub static WINDOWS_874_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_874_INIT); |
226 | | |
227 | | /// The x-mac-cyrillic encoding. |
228 | | #[no_mangle] |
229 | | pub static X_MAC_CYRILLIC_ENCODING: ConstEncoding = ConstEncoding(&X_MAC_CYRILLIC_INIT); |
230 | | |
231 | | /// The x-user-defined encoding. |
232 | | #[no_mangle] |
233 | | pub static X_USER_DEFINED_ENCODING: ConstEncoding = ConstEncoding(&X_USER_DEFINED_INIT); |
234 | | |
235 | | // END GENERATED CODE |
236 | | |
237 | | #[inline(always)] |
238 | 0 | fn coder_result_to_u32(result: CoderResult) -> u32 { |
239 | 0 | match result { |
240 | 0 | CoderResult::InputEmpty => INPUT_EMPTY, |
241 | 0 | CoderResult::OutputFull => OUTPUT_FULL, |
242 | | } |
243 | 0 | } |
244 | | |
245 | | #[inline(always)] |
246 | 0 | fn decoder_result_to_u32(result: DecoderResult) -> u32 { |
247 | 0 | match result { |
248 | 0 | DecoderResult::InputEmpty => INPUT_EMPTY, |
249 | 0 | DecoderResult::OutputFull => OUTPUT_FULL, |
250 | 0 | DecoderResult::Malformed(bad, good) => ((good as u32) << 8) | (bad as u32), |
251 | | } |
252 | 0 | } |
253 | | |
254 | | #[inline(always)] |
255 | 0 | fn encoder_result_to_u32(result: EncoderResult) -> u32 { |
256 | 0 | match result { |
257 | 0 | EncoderResult::InputEmpty => INPUT_EMPTY, |
258 | 0 | EncoderResult::OutputFull => OUTPUT_FULL, |
259 | 0 | EncoderResult::Unmappable(c) => c as u32, |
260 | | } |
261 | 0 | } |
262 | | |
263 | | #[inline(always)] |
264 | 0 | fn option_to_ptr(opt: Option<&'static Encoding>) -> *const Encoding { |
265 | 0 | match opt { |
266 | 0 | None => ::std::ptr::null(), |
267 | 0 | Some(e) => e, |
268 | | } |
269 | 0 | } |
270 | | |
271 | | /// Implements the |
272 | | /// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) |
273 | | /// algorithm. |
274 | | /// |
275 | | /// If, after ASCII-lowercasing and removing leading and trailing |
276 | | /// whitespace, the argument matches a label defined in the Encoding |
277 | | /// Standard, `const Encoding*` representing the corresponding |
278 | | /// encoding is returned. If there is no match, `NULL` is returned. |
279 | | /// |
280 | | /// This is the right function to use if the action upon the method returning |
281 | | /// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`) instead. |
282 | | /// When the action upon the method returning `NULL` is not to proceed with |
283 | | /// a fallback but to refuse processing, `encoding_for_label_no_replacement()` is |
284 | | /// more appropriate. |
285 | | /// |
286 | | /// The argument buffer can be in any ASCII-compatible encoding. It is not |
287 | | /// required to be UTF-8. |
288 | | /// |
289 | | /// `label` must be non-`NULL` even if `label_len` is zero. When `label_len` |
290 | | /// is zero, it is OK for `label` to be something non-dereferencable, |
291 | | /// such as `0x1`. This is required due to Rust's optimization for slices |
292 | | /// within `Option`. |
293 | | /// |
294 | | /// # Undefined behavior |
295 | | /// |
296 | | /// UB ensues if `label` and `label_len` don't designate a valid memory block |
297 | | /// of if `label` is `NULL`. |
298 | | #[no_mangle] |
299 | 0 | pub unsafe extern "C" fn encoding_for_label(label: *const u8, label_len: usize) -> *const Encoding { |
300 | 0 | let label_slice = ::std::slice::from_raw_parts(label, label_len); |
301 | 0 | option_to_ptr(Encoding::for_label(label_slice)) |
302 | 0 | } |
303 | | |
304 | | /// This function behaves the same as `encoding_for_label()`, except when |
305 | | /// `encoding_for_label()` would return `REPLACEMENT_ENCODING`, this method |
306 | | /// returns `NULL` instead. |
307 | | /// |
308 | | /// This method is useful in scenarios where a fatal error is required |
309 | | /// upon invalid label, because in those cases the caller typically wishes |
310 | | /// to treat the labels that map to the replacement encoding as fatal |
311 | | /// errors, too. |
312 | | /// |
313 | | /// It is not OK to use this funciton when the action upon the method returning |
314 | | /// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In |
315 | | /// such a case, the `encoding_for_label()` function should be used instead |
316 | | /// in order to avoid unsafe fallback for labels that `encoding_for_label()` |
317 | | /// maps to `REPLACEMENT_ENCODING`. |
318 | | /// |
319 | | /// The argument buffer can be in any ASCII-compatible encoding. It is not |
320 | | /// required to be UTF-8. |
321 | | /// |
322 | | /// `label` must be non-`NULL` even if `label_len` is zero. When `label_len` |
323 | | /// is zero, it is OK for `label` to be something non-dereferencable, |
324 | | /// such as `0x1`. This is required due to Rust's optimization for slices |
325 | | /// within `Option`. |
326 | | /// |
327 | | /// # Undefined behavior |
328 | | /// |
329 | | /// UB ensues if `label` and `label_len` don't designate a valid memory block |
330 | | /// of if `label` is `NULL`. |
331 | | #[no_mangle] |
332 | 0 | pub unsafe extern "C" fn encoding_for_label_no_replacement( |
333 | 0 | label: *const u8, |
334 | 0 | label_len: usize, |
335 | 0 | ) -> *const Encoding { |
336 | 0 | let label_slice = ::std::slice::from_raw_parts(label, label_len); |
337 | 0 | option_to_ptr(Encoding::for_label_no_replacement(label_slice)) |
338 | 0 | } |
339 | | |
340 | | /// Performs non-incremental BOM sniffing. |
341 | | /// |
342 | | /// The argument must either be a buffer representing the entire input |
343 | | /// stream (non-streaming case) or a buffer representing at least the first |
344 | | /// three bytes of the input stream (streaming case). |
345 | | /// |
346 | | /// Returns `UTF_8_ENCODING`, `UTF_16LE_ENCODING` or `UTF_16BE_ENCODING` if the |
347 | | /// argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or `NULL` |
348 | | /// otherwise. Upon return, `*buffer_len` is the length of the BOM (zero if |
349 | | /// there is no BOM). |
350 | | /// |
351 | | /// `buffer` must be non-`NULL` even if `*buffer_len` is zero. When |
352 | | /// `*buffer_len` is zero, it is OK for `buffer` to be something |
353 | | /// non-dereferencable, such as `0x1`. This is required due to Rust's |
354 | | /// optimization for slices within `Option`. |
355 | | /// |
356 | | /// # Undefined behavior |
357 | | /// |
358 | | /// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory |
359 | | /// block of if `buffer` is `NULL`. |
360 | | #[no_mangle] |
361 | 0 | pub unsafe extern "C" fn encoding_for_bom( |
362 | 0 | buffer: *const u8, |
363 | 0 | buffer_len: *mut usize, |
364 | 0 | ) -> *const Encoding { |
365 | 0 | let buffer_slice = ::std::slice::from_raw_parts(buffer, *buffer_len); |
366 | 0 | let (encoding, bom_length) = match Encoding::for_bom(buffer_slice) { |
367 | 0 | Some((encoding, bom_length)) => (encoding as *const Encoding, bom_length), |
368 | 0 | None => (::std::ptr::null(), 0), |
369 | | }; |
370 | 0 | *buffer_len = bom_length; |
371 | 0 | encoding |
372 | 0 | } |
373 | | |
374 | | /// Writes the name of the given `Encoding` to a caller-supplied buffer as |
375 | | /// ASCII and returns the number of bytes / ASCII characters written. |
376 | | /// |
377 | | /// The output is not null-terminated. |
378 | | /// |
379 | | /// The caller _MUST_ ensure that `name_out` points to a buffer whose length |
380 | | /// is at least `ENCODING_NAME_MAX_LENGTH` bytes. |
381 | | /// |
382 | | /// # Undefined behavior |
383 | | /// |
384 | | /// UB ensues if either argument is `NULL` or if `name_out` doesn't point to |
385 | | /// a valid block of memory whose length is at least |
386 | | /// `ENCODING_NAME_MAX_LENGTH` bytes. |
387 | | #[no_mangle] |
388 | 0 | pub unsafe extern "C" fn encoding_name(encoding: *const Encoding, name_out: *mut u8) -> usize { |
389 | 0 | let bytes = (*encoding).name().as_bytes(); |
390 | 0 | ::std::ptr::copy_nonoverlapping(bytes.as_ptr(), name_out, bytes.len()); |
391 | 0 | bytes.len() |
392 | 0 | } |
393 | | |
394 | | /// Checks whether the _output encoding_ of this encoding can encode every |
395 | | /// Unicode scalar. (Only true if the output encoding is UTF-8.) |
396 | | /// |
397 | | /// # Undefined behavior |
398 | | /// |
399 | | /// UB ensues if the argument is `NULL`. |
400 | | #[no_mangle] |
401 | 0 | pub unsafe extern "C" fn encoding_can_encode_everything(encoding: *const Encoding) -> bool { |
402 | 0 | (*encoding).can_encode_everything() |
403 | 0 | } |
404 | | |
405 | | /// Checks whether the bytes 0x00...0x7F map exclusively to the characters |
406 | | /// U+0000...U+007F and vice versa. |
407 | | /// |
408 | | /// # Undefined behavior |
409 | | /// |
410 | | /// UB ensues if the argument is `NULL`. |
411 | | #[no_mangle] |
412 | 0 | pub unsafe extern "C" fn encoding_is_ascii_compatible(encoding: *const Encoding) -> bool { |
413 | 0 | (*encoding).is_ascii_compatible() |
414 | 0 | } |
415 | | |
416 | | /// Checks whether this encoding maps one byte to one Basic Multilingual |
417 | | /// Plane code point (i.e. byte length equals decoded UTF-16 length) and |
418 | | /// vice versa (for mappable characters). |
419 | | /// |
420 | | /// `true` iff this encoding is on the list of [Legacy single-byte |
421 | | /// encodings](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) |
422 | | /// in the spec or x-user-defined. |
423 | | /// |
424 | | /// # Undefined behavior |
425 | | /// |
426 | | /// UB ensues if the argument is `NULL`. |
427 | | #[no_mangle] |
428 | 0 | pub unsafe extern "C" fn encoding_is_single_byte(encoding: *const Encoding) -> bool { |
429 | 0 | (*encoding).is_single_byte() |
430 | 0 | } |
431 | | |
432 | | /// Returns the _output encoding_ of this encoding. This is UTF-8 for |
433 | | /// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise. |
434 | | /// |
435 | | /// # Undefined behavior |
436 | | /// |
437 | | /// UB ensues if the argument is `NULL`. |
438 | | #[no_mangle] |
439 | 0 | pub unsafe extern "C" fn encoding_output_encoding(encoding: *const Encoding) -> *const Encoding { |
440 | 0 | (*encoding).output_encoding() |
441 | 0 | } |
442 | | |
443 | | /// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM |
444 | | /// sniffing enabled and returns a pointer to the newly-allocated `Decoder`. |
445 | | /// |
446 | | /// BOM sniffing may cause the returned decoder to morph into a decoder |
447 | | /// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding. |
448 | | /// |
449 | | /// Once the allocated `Decoder` is no longer needed, the caller _MUST_ |
450 | | /// deallocate it by passing the pointer returned by this function to |
451 | | /// `decoder_free()`. |
452 | | /// |
453 | | /// # Undefined behavior |
454 | | /// |
455 | | /// UB ensues if the argument is `NULL`. |
456 | | #[no_mangle] |
457 | 0 | pub unsafe extern "C" fn encoding_new_decoder(encoding: *const Encoding) -> *mut Decoder { |
458 | 0 | Box::into_raw(Box::new((*encoding).new_decoder())) |
459 | 0 | } |
460 | | |
461 | | /// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM |
462 | | /// removal and returns a pointer to the newly-allocated `Decoder`. |
463 | | /// |
464 | | /// If the input starts with bytes that are the BOM for this encoding, |
465 | | /// those bytes are removed. However, the decoder never morphs into a |
466 | | /// decoder for another encoding: A BOM for another encoding is treated as |
467 | | /// (potentially malformed) input to the decoding algorithm for this |
468 | | /// encoding. |
469 | | /// |
470 | | /// Once the allocated `Decoder` is no longer needed, the caller _MUST_ |
471 | | /// deallocate it by passing the pointer returned by this function to |
472 | | /// `decoder_free()`. |
473 | | /// |
474 | | /// # Undefined behavior |
475 | | /// |
476 | | /// UB ensues if the argument is `NULL`. |
477 | | #[no_mangle] |
478 | 0 | pub unsafe extern "C" fn encoding_new_decoder_with_bom_removal( |
479 | 0 | encoding: *const Encoding, |
480 | 0 | ) -> *mut Decoder { |
481 | 0 | Box::into_raw(Box::new((*encoding).new_decoder_with_bom_removal())) |
482 | 0 | } |
483 | | |
484 | | /// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM |
485 | | /// handling disabled and returns a pointer to the newly-allocated `Decoder`. |
486 | | /// |
487 | | /// If the input starts with bytes that look like a BOM, those bytes are |
488 | | /// not treated as a BOM. (Hence, the decoder never morphs into a decoder |
489 | | /// for another encoding.) |
490 | | /// |
491 | | /// _Note:_ If the caller has performed BOM sniffing on its own but has not |
492 | | /// removed the BOM, the caller should use |
493 | | /// `encoding_new_decoder_with_bom_removal()` instead of this function to cause |
494 | | /// the BOM to be removed. |
495 | | /// |
496 | | /// Once the allocated `Decoder` is no longer needed, the caller _MUST_ |
497 | | /// deallocate it by passing the pointer returned by this function to |
498 | | /// `decoder_free()`. |
499 | | /// |
500 | | /// # Undefined behavior |
501 | | /// |
502 | | /// UB ensues if the argument is `NULL`. |
503 | | #[no_mangle] |
504 | 0 | pub unsafe extern "C" fn encoding_new_decoder_without_bom_handling( |
505 | 0 | encoding: *const Encoding, |
506 | 0 | ) -> *mut Decoder { |
507 | 0 | Box::into_raw(Box::new((*encoding).new_decoder_without_bom_handling())) |
508 | 0 | } |
509 | | |
510 | | /// Allocates a new `Decoder` for the given `Encoding` into memory provided by |
511 | | /// the caller with BOM sniffing enabled. (In practice, the target should |
512 | | /// likely be a pointer previously returned by `encoding_new_decoder()`.) |
513 | | /// |
514 | | /// Note: If the caller has already performed BOM sniffing but has |
515 | | /// not removed the BOM, the caller should still use this function in |
516 | | /// order to cause the BOM to be ignored. |
517 | | /// |
518 | | /// # Undefined behavior |
519 | | /// |
520 | | /// UB ensues if either argument is `NULL`. |
521 | | #[no_mangle] |
522 | 0 | pub unsafe extern "C" fn encoding_new_decoder_into( |
523 | 0 | encoding: *const Encoding, |
524 | 0 | decoder: *mut Decoder, |
525 | 0 | ) { |
526 | 0 | *decoder = (*encoding).new_decoder(); |
527 | 0 | } |
528 | | |
529 | | /// Allocates a new `Decoder` for the given `Encoding` into memory provided by |
530 | | /// the caller with BOM removal. |
531 | | /// |
532 | | /// If the input starts with bytes that are the BOM for this encoding, |
533 | | /// those bytes are removed. However, the decoder never morphs into a |
534 | | /// decoder for another encoding: A BOM for another encoding is treated as |
535 | | /// (potentially malformed) input to the decoding algorithm for this |
536 | | /// encoding. |
537 | | /// |
538 | | /// Once the allocated `Decoder` is no longer needed, the caller _MUST_ |
539 | | /// deallocate it by passing the pointer returned by this function to |
540 | | /// `decoder_free()`. |
541 | | /// |
542 | | /// # Undefined behavior |
543 | | /// |
544 | | /// UB ensues if either argument is `NULL`. |
545 | | #[no_mangle] |
546 | 0 | pub unsafe extern "C" fn encoding_new_decoder_with_bom_removal_into( |
547 | 0 | encoding: *const Encoding, |
548 | 0 | decoder: *mut Decoder, |
549 | 0 | ) { |
550 | 0 | *decoder = (*encoding).new_decoder_with_bom_removal(); |
551 | 0 | } |
552 | | |
553 | | /// Allocates a new `Decoder` for the given `Encoding` into memory provided by |
554 | | /// the caller with BOM handling disabled. |
555 | | /// |
556 | | /// If the input starts with bytes that look like a BOM, those bytes are |
557 | | /// not treated as a BOM. (Hence, the decoder never morphs into a decoder |
558 | | /// for another encoding.) |
559 | | /// |
560 | | /// _Note:_ If the caller has performed BOM sniffing on its own but has not |
561 | | /// removed the BOM, the caller should use |
562 | | /// `encoding_new_decoder_with_bom_removal_into()` instead of this function to |
563 | | /// cause the BOM to be removed. |
564 | | /// |
565 | | /// # Undefined behavior |
566 | | /// |
567 | | /// UB ensues if either argument is `NULL`. |
568 | | #[no_mangle] |
569 | 0 | pub unsafe extern "C" fn encoding_new_decoder_without_bom_handling_into( |
570 | 0 | encoding: *const Encoding, |
571 | 0 | decoder: *mut Decoder, |
572 | 0 | ) { |
573 | 0 | *decoder = (*encoding).new_decoder_without_bom_handling(); |
574 | 0 | } |
575 | | |
576 | | /// Allocates a new `Encoder` for the given `Encoding` on the heap and returns a |
577 | | /// pointer to the newly-allocated `Encoder`. (Exception, if the `Encoding` is |
578 | | /// `replacement`, a new `Decoder` for UTF-8 is instantiated (and that |
579 | | /// `Decoder` reports `UTF_8` as its `Encoding`). |
580 | | /// |
581 | | /// Once the allocated `Encoder` is no longer needed, the caller _MUST_ |
582 | | /// deallocate it by passing the pointer returned by this function to |
583 | | /// `encoder_free()`. |
584 | | /// |
585 | | /// # Undefined behavior |
586 | | /// |
587 | | /// UB ensues if the argument is `NULL`. |
588 | | #[no_mangle] |
589 | 0 | pub unsafe extern "C" fn encoding_new_encoder(encoding: *const Encoding) -> *mut Encoder { |
590 | 0 | Box::into_raw(Box::new((*encoding).new_encoder())) |
591 | 0 | } |
592 | | |
593 | | /// Allocates a new `Encoder` for the given `Encoding` into memory provided by |
594 | | /// the caller. (In practice, the target should likely be a pointer previously |
595 | | /// returned by `encoding_new_encoder()`.) |
596 | | /// |
597 | | /// # Undefined behavior |
598 | | /// |
599 | | /// UB ensues if either argument is `NULL`. |
600 | | #[no_mangle] |
601 | 0 | pub unsafe extern "C" fn encoding_new_encoder_into( |
602 | 0 | encoding: *const Encoding, |
603 | 0 | encoder: *mut Encoder, |
604 | 0 | ) { |
605 | 0 | *encoder = (*encoding).new_encoder(); |
606 | 0 | } |
607 | | |
608 | | /// Validates UTF-8. |
609 | | /// |
610 | | /// Returns the index of the first byte that makes the input malformed as |
611 | | /// UTF-8 or `buffer_len` if `buffer` is entirely valid. |
612 | | /// |
613 | | /// `buffer` must be non-`NULL` even if `buffer_len` is zero. When |
614 | | /// `buffer_len` is zero, it is OK for `buffer` to be something |
615 | | /// non-dereferencable, such as `0x1`. This is required due to Rust's |
616 | | /// optimization for slices within `Option`. |
617 | | /// |
618 | | /// # Undefined behavior |
619 | | /// |
620 | | /// UB ensues if `buffer` and `buffer_len` don't designate a valid memory |
621 | | /// block of if `buffer` is `NULL`. |
622 | | #[no_mangle] |
623 | 0 | pub unsafe extern "C" fn encoding_utf8_valid_up_to(buffer: *const u8, buffer_len: usize) -> usize { |
624 | 0 | let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len); |
625 | 0 | Encoding::utf8_valid_up_to(buffer_slice) |
626 | 0 | } |
627 | | |
628 | | /// Validates ASCII. |
629 | | /// |
630 | | /// Returns the index of the first byte that makes the input malformed as |
631 | | /// ASCII or `buffer_len` if `buffer` is entirely valid. |
632 | | /// |
633 | | /// `buffer` must be non-`NULL` even if `buffer_len` is zero. When |
634 | | /// `buffer_len` is zero, it is OK for `buffer` to be something |
635 | | /// non-dereferencable, such as `0x1`. This is required due to Rust's |
636 | | /// optimization for slices within `Option`. |
637 | | /// |
638 | | /// # Undefined behavior |
639 | | /// |
640 | | /// UB ensues if `buffer` and `buffer_len` don't designate a valid memory |
641 | | /// block of if `buffer` is `NULL`. |
642 | | #[no_mangle] |
643 | 258M | pub unsafe extern "C" fn encoding_ascii_valid_up_to(buffer: *const u8, buffer_len: usize) -> usize { |
644 | 258M | let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len); |
645 | 258M | Encoding::ascii_valid_up_to(buffer_slice) |
646 | 258M | } |
647 | | |
648 | | /// Validates ISO-2022-JP ASCII-state data. |
649 | | /// |
650 | | /// Returns the index of the first byte that makes the input not representable |
651 | | /// in the ASCII state of ISO-2022-JP or `buffer_len` if `buffer` is entirely |
652 | | /// representable in the ASCII state of ISO-2022-JP. |
653 | | /// |
654 | | /// `buffer` must be non-`NULL` even if `buffer_len` is zero. When |
655 | | /// `buffer_len` is zero, it is OK for `buffer` to be something |
656 | | /// non-dereferencable, such as `0x1`. This is required due to Rust's |
657 | | /// optimization for slices within `Option`. |
658 | | /// |
659 | | /// # Undefined behavior |
660 | | /// |
661 | | /// UB ensues if `buffer` and `buffer_len` don't designate a valid memory |
662 | | /// block of if `buffer` is `NULL`. |
663 | | #[no_mangle] |
664 | 0 | pub unsafe extern "C" fn encoding_iso_2022_jp_ascii_valid_up_to( |
665 | 0 | buffer: *const u8, |
666 | 0 | buffer_len: usize, |
667 | 0 | ) -> usize { |
668 | 0 | let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len); |
669 | 0 | Encoding::iso_2022_jp_ascii_valid_up_to(buffer_slice) |
670 | 0 | } |
671 | | |
672 | | /// Deallocates a `Decoder` previously allocated by `encoding_new_decoder()`. |
673 | | /// |
674 | | /// # Undefined behavior |
675 | | /// |
676 | | /// UB ensues if the argument is `NULL`. |
677 | | #[no_mangle] |
678 | 0 | pub unsafe extern "C" fn decoder_free(decoder: *mut Decoder) { |
679 | 0 | let _ = Box::from_raw(decoder); |
680 | 0 | } |
681 | | |
682 | | /// The `Encoding` this `Decoder` is for. |
683 | | /// |
684 | | /// BOM sniffing can change the return value of this method during the life |
685 | | /// of the decoder. |
686 | | /// |
687 | | /// # Undefined behavior |
688 | | /// |
689 | | /// UB ensues if the argument is `NULL`. |
690 | | #[no_mangle] |
691 | 0 | pub unsafe extern "C" fn decoder_encoding(decoder: *const Decoder) -> *const Encoding { |
692 | 0 | (*decoder).encoding() |
693 | 0 | } |
694 | | |
695 | | /// Query the worst-case UTF-8 output size _with replacement_. |
696 | | /// |
697 | | /// Returns the size of the output buffer in UTF-8 code units (`uint8_t`) |
698 | | /// that will not overflow given the current state of the decoder and |
699 | | /// `byte_length` number of additional input bytes when decoding with |
700 | | /// errors handled by outputting a REPLACEMENT CHARACTER for each malformed |
701 | | /// sequence or `SIZE_MAX` if `size_t` would overflow. |
702 | | /// |
703 | | /// # Undefined behavior |
704 | | /// |
705 | | /// UB ensues if `decoder` is `NULL`. |
706 | | #[no_mangle] |
707 | 0 | pub unsafe extern "C" fn decoder_max_utf8_buffer_length( |
708 | 0 | decoder: *const Decoder, |
709 | 0 | byte_length: usize, |
710 | 0 | ) -> usize { |
711 | 0 | (*decoder) |
712 | 0 | .max_utf8_buffer_length(byte_length) |
713 | 0 | .unwrap_or(::std::usize::MAX) |
714 | 0 | } |
715 | | |
716 | | /// Query the worst-case UTF-8 output size _without replacement_. |
717 | | /// |
718 | | /// Returns the size of the output buffer in UTF-8 code units (`uint8_t`) |
719 | | /// that will not overflow given the current state of the decoder and |
720 | | /// `byte_length` number of additional input bytes when decoding without |
721 | | /// replacement error handling or `SIZE_MAX` if `size_t` would overflow. |
722 | | /// |
723 | | /// Note that this value may be too small for the `_with_replacement` case. |
724 | | /// Use `decoder_max_utf8_buffer_length()` for that case. |
725 | | /// |
726 | | /// # Undefined behavior |
727 | | /// |
728 | | /// UB ensues if `decoder` is `NULL`. |
729 | | #[no_mangle] |
730 | 0 | pub unsafe extern "C" fn decoder_max_utf8_buffer_length_without_replacement( |
731 | 0 | decoder: *const Decoder, |
732 | 0 | byte_length: usize, |
733 | 0 | ) -> usize { |
734 | 0 | (*decoder) |
735 | 0 | .max_utf8_buffer_length_without_replacement(byte_length) |
736 | 0 | .unwrap_or(::std::usize::MAX) |
737 | 0 | } |
738 | | |
739 | | /// Incrementally decode a byte stream into UTF-8 with malformed sequences |
740 | | /// replaced with the REPLACEMENT CHARACTER. |
741 | | /// |
742 | | /// See the top-level FFI documentation for documentation for how the |
743 | | /// `decoder_decode_*` functions are mapped from Rust and the documentation |
744 | | /// for the [`Decoder`][1] struct for the semantics. |
745 | | /// |
746 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
747 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
748 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
749 | | /// optimization for slices within `Option`. |
750 | | /// |
751 | | /// # Undefined behavior |
752 | | /// |
753 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
754 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
755 | | /// designate a valid block of memory. |
756 | | /// |
757 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html |
758 | | #[no_mangle] |
759 | 0 | pub unsafe extern "C" fn decoder_decode_to_utf8( |
760 | 0 | decoder: *mut Decoder, |
761 | 0 | src: *const u8, |
762 | 0 | src_len: *mut usize, |
763 | 0 | dst: *mut u8, |
764 | 0 | dst_len: *mut usize, |
765 | 0 | last: bool, |
766 | 0 | had_replacements: *mut bool, |
767 | 0 | ) -> u32 { |
768 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
769 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
770 | 0 | let (result, read, written, replaced) = (*decoder).decode_to_utf8(src_slice, dst_slice, last); |
771 | 0 | *src_len = read; |
772 | 0 | *dst_len = written; |
773 | 0 | *had_replacements = replaced; |
774 | 0 | coder_result_to_u32(result) |
775 | 0 | } |
776 | | |
777 | | /// Incrementally decode a byte stream into UTF-8 _without replacement_. |
778 | | /// |
779 | | /// See the top-level FFI documentation for documentation for how the |
780 | | /// `decoder_decode_*` functions are mapped from Rust and the documentation |
781 | | /// for the [`Decoder`][1] struct for the semantics. |
782 | | /// |
783 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
784 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
785 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
786 | | /// optimization for slices within `Option`. |
787 | | /// |
788 | | /// # Undefined behavior |
789 | | /// |
790 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
791 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
792 | | /// designate a valid block of memory. |
793 | | /// |
794 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html |
795 | | #[no_mangle] |
796 | 0 | pub unsafe extern "C" fn decoder_decode_to_utf8_without_replacement( |
797 | 0 | decoder: *mut Decoder, |
798 | 0 | src: *const u8, |
799 | 0 | src_len: *mut usize, |
800 | 0 | dst: *mut u8, |
801 | 0 | dst_len: *mut usize, |
802 | 0 | last: bool, |
803 | 0 | ) -> u32 { |
804 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
805 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
806 | 0 | let (result, read, written) = |
807 | 0 | (*decoder).decode_to_utf8_without_replacement(src_slice, dst_slice, last); |
808 | 0 | *src_len = read; |
809 | 0 | *dst_len = written; |
810 | 0 | decoder_result_to_u32(result) |
811 | 0 | } |
812 | | |
813 | | /// Query the worst-case UTF-16 output size (with or without replacement). |
814 | | /// |
815 | | /// Returns the size of the output buffer in UTF-16 code units (`char16_t`) |
816 | | /// that will not overflow given the current state of the decoder and |
817 | | /// `byte_length` number of additional input bytes or `SIZE_MAX` if `size_t` |
818 | | /// would overflow. |
819 | | /// |
820 | | /// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the |
821 | | /// return value of this method applies also in the |
822 | | /// `_without_replacement` case. |
823 | | /// |
824 | | /// # Undefined behavior |
825 | | /// |
826 | | /// UB ensues if `decoder` is `NULL`. |
827 | | #[no_mangle] |
828 | 0 | pub unsafe extern "C" fn decoder_max_utf16_buffer_length( |
829 | 0 | decoder: *const Decoder, |
830 | 0 | u16_length: usize, |
831 | 0 | ) -> usize { |
832 | 0 | (*decoder) |
833 | 0 | .max_utf16_buffer_length(u16_length) |
834 | 0 | .unwrap_or(::std::usize::MAX) |
835 | 0 | } |
836 | | |
837 | | /// Incrementally decode a byte stream into UTF-16 with malformed sequences |
838 | | /// replaced with the REPLACEMENT CHARACTER. |
839 | | /// |
840 | | /// See the top-level FFI documentation for documentation for how the |
841 | | /// `decoder_decode_*` functions are mapped from Rust and the documentation |
842 | | /// for the [`Decoder`][1] struct for the semantics. |
843 | | /// |
844 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
845 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
846 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
847 | | /// optimization for slices within `Option`. |
848 | | /// |
849 | | /// # Undefined behavior |
850 | | /// |
851 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
852 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
853 | | /// designate a valid block of memory. |
854 | | /// |
855 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html |
856 | | #[no_mangle] |
857 | 0 | pub unsafe extern "C" fn decoder_decode_to_utf16( |
858 | 0 | decoder: *mut Decoder, |
859 | 0 | src: *const u8, |
860 | 0 | src_len: *mut usize, |
861 | 0 | dst: *mut u16, |
862 | 0 | dst_len: *mut usize, |
863 | 0 | last: bool, |
864 | 0 | had_replacements: *mut bool, |
865 | 0 | ) -> u32 { |
866 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
867 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
868 | 0 | let (result, read, written, replaced) = (*decoder).decode_to_utf16(src_slice, dst_slice, last); |
869 | 0 | *src_len = read; |
870 | 0 | *dst_len = written; |
871 | 0 | *had_replacements = replaced; |
872 | 0 | coder_result_to_u32(result) |
873 | 0 | } |
874 | | |
875 | | /// Incrementally decode a byte stream into UTF-16 _without replacement_. |
876 | | /// |
877 | | /// See the top-level FFI documentation for documentation for how the |
878 | | /// `decoder_decode_*` functions are mapped from Rust and the documentation |
879 | | /// for the [`Decoder`][1] struct for the semantics. |
880 | | /// |
881 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
882 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
883 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
884 | | /// optimization for slices within `Option`. |
885 | | /// |
886 | | /// # Undefined behavior |
887 | | /// |
888 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
889 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
890 | | /// designate a valid block of memory. |
891 | | /// |
892 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html |
893 | | #[no_mangle] |
894 | 0 | pub unsafe extern "C" fn decoder_decode_to_utf16_without_replacement( |
895 | 0 | decoder: *mut Decoder, |
896 | 0 | src: *const u8, |
897 | 0 | src_len: *mut usize, |
898 | 0 | dst: *mut u16, |
899 | 0 | dst_len: *mut usize, |
900 | 0 | last: bool, |
901 | 0 | ) -> u32 { |
902 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
903 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
904 | 0 | let (result, read, written) = |
905 | 0 | (*decoder).decode_to_utf16_without_replacement(src_slice, dst_slice, last); |
906 | 0 | *src_len = read; |
907 | 0 | *dst_len = written; |
908 | 0 | decoder_result_to_u32(result) |
909 | 0 | } |
910 | | |
911 | | /// Checks for compatibility with storing Unicode scalar values as unsigned |
912 | | /// bytes taking into account the state of the decoder. |
913 | | /// |
914 | | /// Returns `SIZE_MAX` if the decoder is not in a neutral state, including waiting |
915 | | /// for the BOM, or if the encoding is never Latin1-byte-compatible. |
916 | | /// |
917 | | /// Otherwise returns the index of the first byte whose unsigned value doesn't |
918 | | /// directly correspond to the decoded Unicode scalar value, or the length |
919 | | /// of the input if all bytes in the input decode directly to scalar values |
920 | | /// corresponding to the unsigned byte values. |
921 | | /// |
922 | | /// Does not change the state of the decoder. |
923 | | /// |
924 | | /// Do not use this unless you are supporting SpiderMonkey/V8-style string |
925 | | /// storage optimizations. |
926 | | /// |
927 | | /// # Undefined behavior |
928 | | /// |
929 | | /// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory |
930 | | /// block of if `buffer` is `NULL`. |
931 | | #[no_mangle] |
932 | 0 | pub unsafe extern "C" fn decoder_latin1_byte_compatible_up_to( |
933 | 0 | decoder: *const Decoder, |
934 | 0 | buffer: *const u8, |
935 | 0 | buffer_len: usize, |
936 | 0 | ) -> usize { |
937 | 0 | (*decoder) |
938 | 0 | .latin1_byte_compatible_up_to(::std::slice::from_raw_parts(buffer, buffer_len)) |
939 | 0 | .unwrap_or(::std::usize::MAX) |
940 | 0 | } |
941 | | |
942 | | /// Deallocates an `Encoder` previously allocated by `encoding_new_encoder()`. |
943 | | /// |
944 | | /// # Undefined behavior |
945 | | /// |
946 | | /// UB ensues if the argument is `NULL`. |
947 | | #[no_mangle] |
948 | 0 | pub unsafe extern "C" fn encoder_free(encoder: *mut Encoder) { |
949 | 0 | let _ = Box::from_raw(encoder); |
950 | 0 | } |
951 | | |
952 | | /// The `Encoding` this `Encoder` is for. |
953 | | /// |
954 | | /// # Undefined behavior |
955 | | /// |
956 | | /// UB ensues if the argument is `NULL`. |
957 | | #[no_mangle] |
958 | 0 | pub unsafe extern "C" fn encoder_encoding(encoder: *const Encoder) -> *const Encoding { |
959 | 0 | (*encoder).encoding() |
960 | 0 | } |
961 | | |
962 | | /// Returns `true` if this is an ISO-2022-JP encoder that's not in the |
963 | | /// ASCII state and `false` otherwise. |
964 | | /// |
965 | | /// # Undefined behavior |
966 | | /// |
967 | | /// UB ensues if the argument is `NULL`. |
968 | | #[no_mangle] |
969 | 0 | pub unsafe extern "C" fn encoder_has_pending_state(encoder: *const Encoder) -> bool { |
970 | 0 | (*encoder).has_pending_state() |
971 | 0 | } |
972 | | |
973 | | /// Query the worst-case output size when encoding from UTF-8 with |
974 | | /// replacement. |
975 | | /// |
976 | | /// Returns the size of the output buffer in bytes that will not overflow |
977 | | /// given the current state of the encoder and `byte_length` number of |
978 | | /// additional input code units if there are no unmappable characters in |
979 | | /// the input or `SIZE_MAX` if `size_t` would overflow. |
980 | | #[no_mangle] |
981 | 0 | pub unsafe extern "C" fn encoder_max_buffer_length_from_utf8_if_no_unmappables( |
982 | 0 | encoder: *const Encoder, |
983 | 0 | byte_length: usize, |
984 | 0 | ) -> usize { |
985 | 0 | (*encoder) |
986 | 0 | .max_buffer_length_from_utf8_if_no_unmappables(byte_length) |
987 | 0 | .unwrap_or(::std::usize::MAX) |
988 | 0 | } |
989 | | |
990 | | /// Query the worst-case output size when encoding from UTF-8 without |
991 | | /// replacement. |
992 | | /// |
993 | | /// Returns the size of the output buffer in bytes that will not overflow |
994 | | /// given the current state of the encoder and `byte_length` number of |
995 | | /// additional input code units or `SIZE_MAX` if `size_t` would overflow. |
996 | | #[no_mangle] |
997 | 0 | pub unsafe extern "C" fn encoder_max_buffer_length_from_utf8_without_replacement( |
998 | 0 | encoder: *const Encoder, |
999 | 0 | byte_length: usize, |
1000 | 0 | ) -> usize { |
1001 | 0 | (*encoder) |
1002 | 0 | .max_buffer_length_from_utf8_without_replacement(byte_length) |
1003 | 0 | .unwrap_or(::std::usize::MAX) |
1004 | 0 | } |
1005 | | |
1006 | | /// Incrementally encode into byte stream from UTF-8 with unmappable |
1007 | | /// characters replaced with HTML (decimal) numeric character references. |
1008 | | /// |
1009 | | /// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe! |
1010 | | /// If in doubt, check the validity of input before using! |
1011 | | /// |
1012 | | /// See the top-level FFI documentation for documentation for how the |
1013 | | /// `encoder_encode_*` functions are mapped from Rust and the documentation |
1014 | | /// for the [`Encoder`][1] struct for the semantics. |
1015 | | /// |
1016 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
1017 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
1018 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
1019 | | /// optimization for slices within `Option`. |
1020 | | /// |
1021 | | /// # Undefined behavior |
1022 | | /// |
1023 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
1024 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
1025 | | /// designate a valid block of memory. |
1026 | | /// |
1027 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html |
1028 | | #[no_mangle] |
1029 | 0 | pub unsafe extern "C" fn encoder_encode_from_utf8( |
1030 | 0 | encoder: *mut Encoder, |
1031 | 0 | src: *const u8, |
1032 | 0 | src_len: *mut usize, |
1033 | 0 | dst: *mut u8, |
1034 | 0 | dst_len: *mut usize, |
1035 | 0 | last: bool, |
1036 | 0 | had_replacements: *mut bool, |
1037 | 0 | ) -> u32 { |
1038 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
1039 | 0 | let string = ::std::str::from_utf8_unchecked(src_slice); |
1040 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
1041 | 0 | let (result, read, written, replaced) = (*encoder).encode_from_utf8(string, dst_slice, last); |
1042 | 0 | *src_len = read; |
1043 | 0 | *dst_len = written; |
1044 | 0 | *had_replacements = replaced; |
1045 | 0 | coder_result_to_u32(result) |
1046 | 0 | } |
1047 | | |
1048 | | /// Incrementally encode into byte stream from UTF-8 _without replacement_. |
1049 | | /// |
1050 | | /// See the top-level FFI documentation for documentation for how the |
1051 | | /// `encoder_encode_*` functions are mapped from Rust and the documentation |
1052 | | /// for the [`Encoder`][1] struct for the semantics. |
1053 | | /// |
1054 | | /// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe! |
1055 | | /// If in doubt, check the validity of input before using! |
1056 | | /// |
1057 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
1058 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
1059 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
1060 | | /// optimization for slices within `Option`. |
1061 | | /// |
1062 | | /// # Undefined behavior |
1063 | | /// |
1064 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
1065 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
1066 | | /// designate a valid block of memory. |
1067 | | /// |
1068 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html |
1069 | | #[no_mangle] |
1070 | 0 | pub unsafe extern "C" fn encoder_encode_from_utf8_without_replacement( |
1071 | 0 | encoder: *mut Encoder, |
1072 | 0 | src: *const u8, |
1073 | 0 | src_len: *mut usize, |
1074 | 0 | dst: *mut u8, |
1075 | 0 | dst_len: *mut usize, |
1076 | 0 | last: bool, |
1077 | 0 | ) -> u32 { |
1078 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
1079 | 0 | let string = ::std::str::from_utf8_unchecked(src_slice); |
1080 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
1081 | 0 | let (result, read, written) = |
1082 | 0 | (*encoder).encode_from_utf8_without_replacement(string, dst_slice, last); |
1083 | 0 | *src_len = read; |
1084 | 0 | *dst_len = written; |
1085 | 0 | encoder_result_to_u32(result) |
1086 | 0 | } |
1087 | | |
1088 | | /// Query the worst-case output size when encoding from UTF-16 with |
1089 | | /// replacement. |
1090 | | /// |
1091 | | /// Returns the size of the output buffer in bytes that will not overflow |
1092 | | /// given the current state of the encoder and `u16_length` number of |
1093 | | /// additional input code units if there are no unmappable characters in |
1094 | | /// the input or `SIZE_MAX` if `size_t` would overflow. |
1095 | | #[no_mangle] |
1096 | 0 | pub unsafe extern "C" fn encoder_max_buffer_length_from_utf16_if_no_unmappables( |
1097 | 0 | encoder: *const Encoder, |
1098 | 0 | u16_length: usize, |
1099 | 0 | ) -> usize { |
1100 | 0 | (*encoder) |
1101 | 0 | .max_buffer_length_from_utf16_if_no_unmappables(u16_length) |
1102 | 0 | .unwrap_or(::std::usize::MAX) |
1103 | 0 | } |
1104 | | |
1105 | | /// Query the worst-case output size when encoding from UTF-16 without |
1106 | | /// replacement. |
1107 | | /// |
1108 | | /// Returns the size of the output buffer in bytes that will not overflow |
1109 | | /// given the current state of the encoder and `u16_length` number of |
1110 | | /// additional input code units or `SIZE_MAX` if `size_t` would overflow. |
1111 | | #[no_mangle] |
1112 | 0 | pub unsafe extern "C" fn encoder_max_buffer_length_from_utf16_without_replacement( |
1113 | 0 | encoder: *const Encoder, |
1114 | 0 | u16_length: usize, |
1115 | 0 | ) -> usize { |
1116 | 0 | (*encoder) |
1117 | 0 | .max_buffer_length_from_utf16_without_replacement(u16_length) |
1118 | 0 | .unwrap_or(::std::usize::MAX) |
1119 | 0 | } |
1120 | | |
1121 | | /// Incrementally encode into byte stream from UTF-16 with unmappable |
1122 | | /// characters replaced with HTML (decimal) numeric character references. |
1123 | | /// |
1124 | | /// See the top-level FFI documentation for documentation for how the |
1125 | | /// `encoder_encode_*` functions are mapped from Rust and the documentation |
1126 | | /// for the [`Encoder`][1] struct for the semantics. |
1127 | | /// |
1128 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
1129 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
1130 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
1131 | | /// optimization for slices within `Option`. |
1132 | | /// |
1133 | | /// # Undefined behavior |
1134 | | /// |
1135 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
1136 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
1137 | | /// designate a valid block of memory. |
1138 | | /// |
1139 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html |
1140 | | #[no_mangle] |
1141 | 0 | pub unsafe extern "C" fn encoder_encode_from_utf16( |
1142 | 0 | encoder: *mut Encoder, |
1143 | 0 | src: *const u16, |
1144 | 0 | src_len: *mut usize, |
1145 | 0 | dst: *mut u8, |
1146 | 0 | dst_len: *mut usize, |
1147 | 0 | last: bool, |
1148 | 0 | had_replacements: *mut bool, |
1149 | 0 | ) -> u32 { |
1150 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
1151 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
1152 | 0 | let (result, read, written, replaced) = |
1153 | 0 | (*encoder).encode_from_utf16(src_slice, dst_slice, last); |
1154 | 0 | *src_len = read; |
1155 | 0 | *dst_len = written; |
1156 | 0 | *had_replacements = replaced; |
1157 | 0 | coder_result_to_u32(result) |
1158 | 0 | } |
1159 | | |
1160 | | /// Incrementally encode into byte stream from UTF-16 _without replacement_. |
1161 | | /// |
1162 | | /// See the top-level FFI documentation for documentation for how the |
1163 | | /// `encoder_encode_*` functions are mapped from Rust and the documentation |
1164 | | /// for the [`Encoder`][1] struct for the semantics. |
1165 | | /// |
1166 | | /// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero, |
1167 | | /// it is OK for `src` to be something non-dereferencable, such as `0x1`. |
1168 | | /// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's |
1169 | | /// optimization for slices within `Option`. |
1170 | | /// |
1171 | | /// # Undefined behavior |
1172 | | /// |
1173 | | /// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len` |
1174 | | /// don't designate a valid block of memory or `dst` and `dst_len` don't |
1175 | | /// designate a valid block of memory. |
1176 | | /// |
1177 | | /// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html |
1178 | | #[no_mangle] |
1179 | 0 | pub unsafe extern "C" fn encoder_encode_from_utf16_without_replacement( |
1180 | 0 | encoder: *mut Encoder, |
1181 | 0 | src: *const u16, |
1182 | 0 | src_len: *mut usize, |
1183 | 0 | dst: *mut u8, |
1184 | 0 | dst_len: *mut usize, |
1185 | 0 | last: bool, |
1186 | 0 | ) -> u32 { |
1187 | 0 | let src_slice = ::std::slice::from_raw_parts(src, *src_len); |
1188 | 0 | let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len); |
1189 | 0 | let (result, read, written) = |
1190 | 0 | (*encoder).encode_from_utf16_without_replacement(src_slice, dst_slice, last); |
1191 | 0 | *src_len = read; |
1192 | 0 | *dst_len = written; |
1193 | 0 | encoder_result_to_u32(result) |
1194 | 0 | } |