/src/leptonica/src/utils2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file utils2.c |
29 | | * <pre> |
30 | | * |
31 | | * ------------------------------------------ |
32 | | * This file has these utilities: |
33 | | * - safe string operations |
34 | | * - find/replace operations on strings |
35 | | * - read/write between file and memory |
36 | | * - multi-platform file and directory operations |
37 | | * - file name operations |
38 | | * ------------------------------------------ |
39 | | * |
40 | | * Safe string procs |
41 | | * char *stringNew() |
42 | | * l_int32 stringCopy() |
43 | | * l_int32 stringCopySegment() |
44 | | * l_int32 stringReplace() |
45 | | * l_int32 stringLength() |
46 | | * l_int32 stringCat() |
47 | | * char *stringConcatNew() |
48 | | * char *stringJoin() |
49 | | * l_int32 stringJoinIP() |
50 | | * char *stringReverse() |
51 | | * char *strtokSafe() |
52 | | * l_int32 stringSplitOnToken() |
53 | | * |
54 | | * Find and replace string and array procs |
55 | | * l_int32 stringCheckForChars() |
56 | | * char *stringRemoveChars() |
57 | | * char *stringReplaceEachSubstr() |
58 | | * char *stringReplaceSubstr() |
59 | | * L_DNA *stringFindEachSubstr() |
60 | | * l_int32 stringFindSubstr() |
61 | | * l_uint8 *arrayReplaceEachSequence() |
62 | | * L_DNA *arrayFindEachSequence() |
63 | | * l_int32 arrayFindSequence() |
64 | | * |
65 | | * Safe realloc |
66 | | * void *reallocNew() |
67 | | * |
68 | | * Read and write between file and memory |
69 | | * l_uint8 *l_binaryRead() |
70 | | * l_uint8 *l_binaryReadStream() |
71 | | * l_uint8 *l_binaryReadSelect() |
72 | | * l_uint8 *l_binaryReadSelectStream() |
73 | | * l_int32 l_binaryWrite() |
74 | | * l_int32 nbytesInFile() |
75 | | * l_int32 fnbytesInFile() |
76 | | * |
77 | | * Copy and compare in memory |
78 | | * l_uint8 *l_binaryCopy() |
79 | | * l_uint8 *l_binaryCompare() |
80 | | * |
81 | | * File copy operations |
82 | | * l_int32 fileCopy() |
83 | | * l_int32 fileConcatenate() |
84 | | * l_int32 fileAppendString() |
85 | | * |
86 | | * File split operations |
87 | | * l_int32 fileSplitLinesUniform() |
88 | | * |
89 | | * Multi-platform functions for opening file streams |
90 | | * FILE *fopenReadStream() |
91 | | * FILE *fopenWriteStream() |
92 | | * FILE *fopenReadFromMemory() |
93 | | * |
94 | | * Opening a Windows tmpfile for writing |
95 | | * FILE *fopenWriteWinTempfile() |
96 | | * |
97 | | * Multi-platform functions that avoid C-runtime boundary crossing |
98 | | * with Windows DLLs (use in programs only) |
99 | | * FILE *lept_fopen() |
100 | | * l_int32 lept_fclose() |
101 | | * void *lept_calloc() |
102 | | * void lept_free() |
103 | | * |
104 | | * Multi-platform file system operations in temp directories |
105 | | * l_int32 lept_mkdir() |
106 | | * l_int32 lept_rmdir() |
107 | | * l_int32 lept_direxists() |
108 | | * l_int32 lept_mv() |
109 | | * l_int32 lept_rm_match() |
110 | | * l_int32 lept_rm() |
111 | | * l_int32 lept_rmfile() |
112 | | * l_int32 lept_cp() |
113 | | * |
114 | | * Special debug/test function for calling 'system' |
115 | | * l_int32 callSystemDebug() |
116 | | * |
117 | | * General file name operations |
118 | | * l_int32 splitPathAtDirectory() |
119 | | * l_int32 splitPathAtExtension() |
120 | | * char *pathJoin() |
121 | | * char *appendSubdirs() |
122 | | * |
123 | | * Special file name operations |
124 | | * l_int32 convertSepCharsInPath() |
125 | | * char *genPathname() |
126 | | * l_int32 makeTempDirname() |
127 | | * l_int32 modifyTrailingSlash() |
128 | | * char *l_makeTempFilename() |
129 | | * l_int32 extractNumberFromFilename() |
130 | | * |
131 | | * |
132 | | * Notes on multi-platform development |
133 | | * ----------------------------------- |
134 | | * This is important: |
135 | | * (1) With the exception of splitPathAtDirectory(), splitPathAtExtension() |
136 | | * and genPathname(), all input pathnames must have unix separators. |
137 | | * (2) On macOS, iOS and Windows, for read or write to "/tmp/..." |
138 | | * the filename is rewritten to use the OS specific temp directory: |
139 | | * /tmp ==> [Temp]/... |
140 | | * (3) This filename rewrite, along with the conversion from unix |
141 | | * to OS specific pathnames, happens in genPathname(). |
142 | | * (4) Use fopenReadStream() and fopenWriteStream() to open files, |
143 | | * because these use genPathname() to find the platform-dependent |
144 | | * filenames. Likewise for l_binaryRead() and l_binaryWrite(). |
145 | | * (5) For moving, copying and removing files and directories that are in |
146 | | * subdirectories of /tmp, use the lept_*() file system shell wrappers: |
147 | | * lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp(). |
148 | | * (6) For programs use the lept_fopen(), lept_fclose(), lept_calloc() |
149 | | * and lept_free() C library wrappers. These work properly on Windows, |
150 | | * where the same DLL must perform complementary operations on |
151 | | * file streams (open/close) and heap memory (malloc/free). |
152 | | * (7) Why read and write files to temp directories? |
153 | | * The library needs the ability to read and write ephemeral |
154 | | * files to default places, both for generating debugging output |
155 | | * and for supporting regression tests. Applications also need |
156 | | * this ability for debugging. |
157 | | * (8) Why do the pathname rewrite on macOS, iOS and Windows? |
158 | | * The goal is to have the library, and programs using the library, |
159 | | * run on multiple platforms without changes. The location of |
160 | | * temporary files depends on the platform as well as the user's |
161 | | * configuration. Temp files on some operating systems are in some |
162 | | * directory not known a priori. To make everything work seamlessly on |
163 | | * any OS, every time you open a file for reading or writing, |
164 | | * use a special function such as fopenReadStream() or |
165 | | * fopenWriteStream(); these call genPathname() to ensure that |
166 | | * if it is a temp file, the correct path is used. To indicate |
167 | | * that this is a temp file, the application is written with the |
168 | | * root directory of the path in a canonical form: "/tmp". |
169 | | * (9) Why is it that multi-platform directory functions like lept_mkdir() |
170 | | * and lept_rmdir(), as well as associated file functions like |
171 | | * lept_rm(), lept_mv() and lept_cp(), only work in the temp dir? |
172 | | * These functions were designed to provide easy manipulation of |
173 | | * temp files. The restriction to temp files is for safety -- to |
174 | | * prevent an accidental deletion of important files. For example, |
175 | | * lept_rmdir() first deletes all files in a specified subdirectory |
176 | | * of temp, and then removes the directory. |
177 | | * |
178 | | * </pre> |
179 | | */ |
180 | | |
181 | | #ifdef HAVE_CONFIG_H |
182 | | #include <config_auto.h> |
183 | | #endif /* HAVE_CONFIG_H */ |
184 | | |
185 | | #ifdef _MSC_VER |
186 | | #include <process.h> |
187 | | #include <direct.h> |
188 | | #define getcwd _getcwd /* fix MSVC warning */ |
189 | | #else |
190 | | #include <unistd.h> |
191 | | #endif /* _MSC_VER */ |
192 | | |
193 | | #ifdef _WIN32 |
194 | | #include <windows.h> |
195 | | #include <fcntl.h> /* _O_CREAT, ... */ |
196 | | #include <io.h> /* _open */ |
197 | | #include <sys/stat.h> /* _S_IREAD, _S_IWRITE */ |
198 | | #else |
199 | | #include <sys/stat.h> /* for stat, mkdir(2) */ |
200 | | #include <sys/types.h> |
201 | | #endif |
202 | | |
203 | | #ifdef __APPLE__ |
204 | | #include <unistd.h> |
205 | | #include <errno.h> |
206 | | #endif |
207 | | |
208 | | #include <string.h> |
209 | | #include <stddef.h> |
210 | | #include "allheaders.h" |
211 | | |
212 | | #if defined(__APPLE__) || defined(_WIN32) |
213 | | /* Rewrite paths starting with /tmp for macOS, iOS and Windows. */ |
214 | | #define REWRITE_TMP |
215 | | #endif |
216 | | |
217 | | /*--------------------------------------------------------------------* |
218 | | * Safe string operations * |
219 | | *--------------------------------------------------------------------*/ |
220 | | /*! |
221 | | * \brief stringNew() |
222 | | * |
223 | | * \param[in] src |
224 | | * \return dest copy of %src string, or NULL on error |
225 | | */ |
226 | | char * |
227 | | stringNew(const char *src) |
228 | 20.3k | { |
229 | 20.3k | l_int32 len; |
230 | 20.3k | char *dest; |
231 | | |
232 | 20.3k | if (!src) { |
233 | 0 | L_WARNING("src not defined\n", __func__); |
234 | 0 | return NULL; |
235 | 0 | } |
236 | | |
237 | 20.3k | len = strlen(src); |
238 | 20.3k | if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) |
239 | 0 | return (char *)ERROR_PTR("dest not made", __func__, NULL); |
240 | | |
241 | 20.3k | stringCopy(dest, src, len); |
242 | 20.3k | return dest; |
243 | 20.3k | } |
244 | | |
245 | | |
246 | | /*! |
247 | | * \brief stringCopy() |
248 | | * |
249 | | * \param[in] dest existing byte buffer |
250 | | * \param[in] src string [optional] can be null |
251 | | * \param[in] n max number of characters to copy |
252 | | * \return 0 if OK, 1 on error |
253 | | * |
254 | | * <pre> |
255 | | * Notes: |
256 | | * (1) Relatively safe wrapper for strncpy, that checks the input, |
257 | | * and does not complain if %src is null or %n < 1. |
258 | | * If %n < 1, this is a no-op. |
259 | | * (2) %dest needs to be at least %n bytes in size. |
260 | | * (3) We don't call strncpy() because valgrind complains about |
261 | | * use of uninitialized values. |
262 | | * </pre> |
263 | | */ |
264 | | l_ok |
265 | | stringCopy(char *dest, |
266 | | const char *src, |
267 | | l_int32 n) |
268 | 28.5k | { |
269 | 28.5k | l_int32 i; |
270 | | |
271 | 28.5k | if (!dest) |
272 | 0 | return ERROR_INT("dest not defined", __func__, 1); |
273 | 28.5k | if (!src || n < 1) |
274 | 0 | return 0; |
275 | | |
276 | | /* Implementation of strncpy that valgrind doesn't complain about */ |
277 | 339k | for (i = 0; i < n && src[i] != '\0'; i++) |
278 | 310k | dest[i] = src[i]; |
279 | 28.5k | for (; i < n; i++) |
280 | 0 | dest[i] = '\0'; |
281 | 28.5k | return 0; |
282 | 28.5k | } |
283 | | |
284 | | |
285 | | /*! |
286 | | * \brief stringCopySegment() |
287 | | * |
288 | | * |
289 | | * \param[in] src string |
290 | | * \param[in] start byte position at start of segment |
291 | | * \param[in] nbytes number of bytes in the segment; use 0 to go to end |
292 | | * \return copy of segment, or NULL on error |
293 | | * |
294 | | * <pre> |
295 | | * Notes: |
296 | | * (1) This is a variant of stringNew() that makes a new string |
297 | | * from a segment of the input string. The segment is specified |
298 | | * by the starting position and the number of bytes. |
299 | | * (2) The start location %start must be within the string %src. |
300 | | * (3) The copy is truncated to the end of the source string. |
301 | | * Use %nbytes = 0 to copy to the end of %src. |
302 | | * </pre> |
303 | | */ |
304 | | char * |
305 | | stringCopySegment(const char *src, |
306 | | l_int32 start, |
307 | | l_int32 nbytes) |
308 | 0 | { |
309 | 0 | char *dest; |
310 | 0 | l_int32 len; |
311 | |
|
312 | 0 | if (!src) |
313 | 0 | return (char *)ERROR_PTR("src not defined", __func__, NULL); |
314 | 0 | len = strlen(src); |
315 | 0 | if (start < 0 || start > len - 1) |
316 | 0 | return (char *)ERROR_PTR("invalid start", __func__, NULL); |
317 | 0 | if (nbytes <= 0) /* copy to the end */ |
318 | 0 | nbytes = len - start; |
319 | 0 | if (start + nbytes > len) /* truncate to the end */ |
320 | 0 | nbytes = len - start; |
321 | 0 | if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL) |
322 | 0 | return (char *)ERROR_PTR("dest not made", __func__, NULL); |
323 | 0 | stringCopy(dest, src + start, nbytes); |
324 | 0 | return dest; |
325 | 0 | } |
326 | | |
327 | | |
328 | | /*! |
329 | | * \brief stringReplace() |
330 | | * |
331 | | * \param[out] pdest string copy |
332 | | * \param[in] src [optional] string; can be null |
333 | | * \return 0 if OK; 1 on error |
334 | | * |
335 | | * <pre> |
336 | | * Notes: |
337 | | * (1) Frees any existing dest string |
338 | | * (2) Puts a copy of src string in the dest |
339 | | * (3) If either or both strings are null, does something reasonable. |
340 | | * </pre> |
341 | | */ |
342 | | l_ok |
343 | | stringReplace(char **pdest, |
344 | | const char *src) |
345 | 15.5k | { |
346 | 15.5k | if (!pdest) |
347 | 0 | return ERROR_INT("pdest not defined", __func__, 1); |
348 | | |
349 | 15.5k | if (*pdest) |
350 | 0 | LEPT_FREE(*pdest); |
351 | | |
352 | 15.5k | if (src) |
353 | 0 | *pdest = stringNew(src); |
354 | 15.5k | else |
355 | 15.5k | *pdest = NULL; |
356 | 15.5k | return 0; |
357 | 15.5k | } |
358 | | |
359 | | |
360 | | /*! |
361 | | * \brief stringLength() |
362 | | * |
363 | | * \param[in] src string can be null or NULL-terminated string |
364 | | * \param[in] size number of bytes to check; e.g., size of src buffer |
365 | | * \return length of src in bytes; 0 if no bytes are found; |
366 | | * %size on error when NUL byte is not found. |
367 | | * |
368 | | * <pre> |
369 | | * Notes: |
370 | | * (1) Safe implementation of strlen that only checks %size bytes |
371 | | * for trailing NUL. |
372 | | * (2) Valid returned string lengths are between 0 and size - 1. |
373 | | * If %size bytes are checked without finding a NUL byte, then |
374 | | * an error is indicated by returning %size. |
375 | | * </pre> |
376 | | */ |
377 | | l_int32 |
378 | | stringLength(const char *src, |
379 | | size_t size) |
380 | 0 | { |
381 | 0 | l_int32 i; |
382 | |
|
383 | 0 | if (!src) |
384 | 0 | return 0; |
385 | 0 | if (size < 1) |
386 | 0 | return ERROR_INT("size < 1; too small", __func__, 0); |
387 | | |
388 | 0 | for (i = 0; i < size; i++) { |
389 | 0 | if (src[i] == '\0') |
390 | 0 | return i; |
391 | 0 | } |
392 | | |
393 | | /* Didn't find a NUL byte */ |
394 | 0 | L_ERROR("NUL byte not found in %zu bytes\n", __func__, size); |
395 | 0 | return size; |
396 | 0 | } |
397 | | |
398 | | |
399 | | /*! |
400 | | * \brief stringCat() |
401 | | * |
402 | | * \param[in] dest null-terminated byte buffer |
403 | | * \param[in] size size of dest buffer |
404 | | * \param[in] src string can be null or NULL-terminated string |
405 | | * \return number of bytes added to dest; -1 on error |
406 | | * |
407 | | * <pre> |
408 | | * Notes: |
409 | | * (1) Alternative implementation of strncat, that checks the input, |
410 | | * is easier to use (since the size of the dest buffer is specified |
411 | | * rather than the number of bytes to copy), and does not complain |
412 | | * if %src is null. |
413 | | * (2) Never writes past end of dest. |
414 | | * (3) If there is not enough room to append the src, which is an error, |
415 | | * it does nothing. |
416 | | * (4) N.B. The order of 2nd and 3rd args is reversed from that in |
417 | | * strncat, as in the Windows function strcat_s(). |
418 | | * </pre> |
419 | | */ |
420 | | l_int32 |
421 | | stringCat(char *dest, |
422 | | size_t size, |
423 | | const char *src) |
424 | 0 | { |
425 | 0 | l_int32 i, n; |
426 | 0 | l_int32 lendest, lensrc; |
427 | |
|
428 | 0 | if (!dest) |
429 | 0 | return ERROR_INT("dest not defined", __func__, -1); |
430 | 0 | if (size < 1) |
431 | 0 | return ERROR_INT("size < 1; too small", __func__, -1); |
432 | 0 | if (!src) |
433 | 0 | return 0; |
434 | | |
435 | 0 | lendest = stringLength(dest, size); |
436 | 0 | if (lendest == size) |
437 | 0 | return ERROR_INT("no terminating nul byte", __func__, -1); |
438 | 0 | lensrc = stringLength(src, size); |
439 | 0 | if (lensrc == 0) |
440 | 0 | return 0; /* nothing added to dest */ |
441 | 0 | n = (lendest + lensrc > size - 1) ? 0 : lensrc; |
442 | 0 | if (n == 0) |
443 | 0 | return ERROR_INT("dest too small for append", __func__, -1); |
444 | | |
445 | 0 | for (i = 0; i < n; i++) |
446 | 0 | dest[lendest + i] = src[i]; |
447 | 0 | dest[lendest + n] = '\0'; |
448 | 0 | return n; |
449 | 0 | } |
450 | | |
451 | | |
452 | | /*! |
453 | | * \brief stringConcatNew() |
454 | | * |
455 | | * \param[in] first first string in list |
456 | | * \param[in] ... NULL-terminated list of strings |
457 | | * \return result new string concatenating the input strings, or |
458 | | * NULL if first == NULL |
459 | | * |
460 | | * <pre> |
461 | | * Notes: |
462 | | * (1) The last arg in the list of strings must be NULL. |
463 | | * (2) Caller must free the returned string. |
464 | | * </pre> |
465 | | */ |
466 | | char * |
467 | | stringConcatNew(const char *first, ...) |
468 | 0 | { |
469 | 0 | size_t len; |
470 | 0 | char *result, *ptr; |
471 | 0 | const char *arg; |
472 | 0 | va_list args; |
473 | |
|
474 | 0 | if (!first) return NULL; |
475 | | |
476 | | /* Find the length of the output string */ |
477 | 0 | va_start(args, first); |
478 | 0 | len = strlen(first); |
479 | 0 | while ((arg = va_arg(args, const char *)) != NULL) |
480 | 0 | len += strlen(arg); |
481 | 0 | va_end(args); |
482 | 0 | result = (char *)LEPT_CALLOC(len + 1, sizeof(char)); |
483 | | |
484 | | /* Concatenate the args */ |
485 | 0 | va_start(args, first); |
486 | 0 | ptr = result; |
487 | 0 | arg = first; |
488 | 0 | while (*arg) |
489 | 0 | *ptr++ = *arg++; |
490 | 0 | while ((arg = va_arg(args, const char *)) != NULL) { |
491 | 0 | while (*arg) |
492 | 0 | *ptr++ = *arg++; |
493 | 0 | } |
494 | 0 | va_end(args); |
495 | 0 | return result; |
496 | 0 | } |
497 | | |
498 | | |
499 | | /*! |
500 | | * \brief stringJoin() |
501 | | * |
502 | | * \param[in] src1 [optional] string; can be null |
503 | | * \param[in] src2 [optional] string; can be null |
504 | | * \return concatenated string, or NULL on error |
505 | | * |
506 | | * <pre> |
507 | | * Notes: |
508 | | * (1) This is a safe version of strcat; it makes a new string. |
509 | | * (2) It is not an error if either or both of the strings |
510 | | * are empty, or if either or both of the pointers are null. |
511 | | * </pre> |
512 | | */ |
513 | | char * |
514 | | stringJoin(const char *src1, |
515 | | const char *src2) |
516 | 0 | { |
517 | 0 | char *dest; |
518 | 0 | l_int32 srclen1, srclen2, destlen; |
519 | |
|
520 | 0 | srclen1 = (src1) ? strlen(src1) : 0; |
521 | 0 | srclen2 = (src2) ? strlen(src2) : 0; |
522 | 0 | destlen = srclen1 + srclen2 + 3; |
523 | |
|
524 | 0 | if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL) |
525 | 0 | return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL); |
526 | | |
527 | 0 | if (src1) |
528 | 0 | stringCat(dest, destlen, src1); |
529 | 0 | if (src2) |
530 | 0 | stringCat(dest, destlen, src2); |
531 | 0 | return dest; |
532 | 0 | } |
533 | | |
534 | | |
535 | | /*! |
536 | | * \brief stringJoinIP() |
537 | | * |
538 | | * \param[in,out] psrc1 address of string src1; cannot be on the stack |
539 | | * \param[in] src2 [optional] string; can be null |
540 | | * \return 0 if OK, 1 on error |
541 | | * |
542 | | * <pre> |
543 | | * Notes: |
544 | | * (1) This is a safe in-place version of strcat. The contents of |
545 | | * src1 is replaced by the concatenation of src1 and src2. |
546 | | * (2) It is not an error if either or both of the strings |
547 | | * are empty (""), or if the pointers to the strings (*psrc1, src2) |
548 | | * are null. |
549 | | * (3) src1 should be initialized to null or an empty string |
550 | | * before the first call. Use one of these: |
551 | | * char *src1 = NULL; |
552 | | * char *src1 = stringNew(""); |
553 | | * Then call with: |
554 | | * stringJoinIP(&src1, src2); |
555 | | * (4) This can also be implemented as a macro: |
556 | | * \code |
557 | | * #define stringJoinIP(src1, src2) \ |
558 | | * {tmpstr = stringJoin((src1),(src2)); \ |
559 | | * LEPT_FREE(src1); \ |
560 | | * (src1) = tmpstr;} |
561 | | * \endcode |
562 | | * (5) Another function to consider for joining many strings is |
563 | | * stringConcatNew(). |
564 | | * </pre> |
565 | | */ |
566 | | l_ok |
567 | | stringJoinIP(char **psrc1, |
568 | | const char *src2) |
569 | 0 | { |
570 | 0 | char *tmpstr; |
571 | |
|
572 | 0 | if (!psrc1) |
573 | 0 | return ERROR_INT("&src1 not defined", __func__, 1); |
574 | | |
575 | 0 | tmpstr = stringJoin(*psrc1, src2); |
576 | 0 | LEPT_FREE(*psrc1); |
577 | 0 | *psrc1 = tmpstr; |
578 | 0 | return 0; |
579 | 0 | } |
580 | | |
581 | | |
582 | | /*! |
583 | | * \brief stringReverse() |
584 | | * |
585 | | * \param[in] src string |
586 | | * \return dest newly-allocated reversed string |
587 | | */ |
588 | | char * |
589 | | stringReverse(const char *src) |
590 | 0 | { |
591 | 0 | char *dest; |
592 | 0 | l_int32 i, len; |
593 | |
|
594 | 0 | if (!src) |
595 | 0 | return (char *)ERROR_PTR("src not defined", __func__, NULL); |
596 | 0 | len = strlen(src); |
597 | 0 | if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) |
598 | 0 | return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL); |
599 | 0 | for (i = 0; i < len; i++) |
600 | 0 | dest[i] = src[len - 1 - i]; |
601 | |
|
602 | 0 | return dest; |
603 | 0 | } |
604 | | |
605 | | |
606 | | /*! |
607 | | * \brief strtokSafe() |
608 | | * |
609 | | * \param[in] cstr input string to be sequentially parsed; |
610 | | * use NULL after the first call |
611 | | * \param[in] seps a string of character separators |
612 | | * \param[out] psaveptr ptr to the next char after |
613 | | * the last encountered separator |
614 | | * \return substr a new string that is copied from the previous |
615 | | * saveptr up to but not including the next |
616 | | * separator character, or NULL if end of cstr. |
617 | | * |
618 | | * <pre> |
619 | | * Notes: |
620 | | * (1) This is a thread-safe implementation of strtok. |
621 | | * (2) It has the same interface as strtok_r. |
622 | | * (3) It differs from strtok_r in usage in two respects: |
623 | | * (a) the input string is not altered |
624 | | * (b) each returned substring is newly allocated and must |
625 | | * be freed after use. |
626 | | * (4) Let me repeat that. This is "safe" because the input |
627 | | * string is not altered and because each returned string |
628 | | * is newly allocated on the heap. |
629 | | * (5) It is here because, surprisingly, some C libraries don't |
630 | | * include strtok_r. |
631 | | * (6) Important usage points: |
632 | | * ~ Input the string to be parsed on the first invocation. |
633 | | * ~ Then input NULL after that; the value returned in saveptr |
634 | | * is used in all subsequent calls. |
635 | | * (7) This is only slightly slower than strtok_r. |
636 | | * </pre> |
637 | | */ |
638 | | char * |
639 | | strtokSafe(char *cstr, |
640 | | const char *seps, |
641 | | char **psaveptr) |
642 | 2.54k | { |
643 | 2.54k | char nextc; |
644 | 2.54k | char *start, *substr; |
645 | 2.54k | l_int32 istart, i, j, nchars; |
646 | | |
647 | 2.54k | if (!seps) |
648 | 0 | return (char *)ERROR_PTR("seps not defined", __func__, NULL); |
649 | 2.54k | if (!psaveptr) |
650 | 0 | return (char *)ERROR_PTR("&saveptr not defined", __func__, NULL); |
651 | | |
652 | 2.54k | if (!cstr) { |
653 | 1.69k | start = *psaveptr; |
654 | 1.69k | } else { |
655 | 847 | start = cstr; |
656 | 847 | *psaveptr = NULL; |
657 | 847 | } |
658 | 2.54k | if (!start) /* nothing to do */ |
659 | 847 | return NULL; |
660 | | |
661 | | /* First time, scan for the first non-sep character */ |
662 | 1.69k | istart = 0; |
663 | 1.69k | if (cstr) { |
664 | 847 | for (istart = 0;; istart++) { |
665 | 847 | if ((nextc = start[istart]) == '\0') { |
666 | 0 | *psaveptr = NULL; /* in case caller doesn't check ret value */ |
667 | 0 | return NULL; |
668 | 0 | } |
669 | 847 | if (!strchr(seps, nextc)) |
670 | 847 | break; |
671 | 847 | } |
672 | 847 | } |
673 | | |
674 | | /* Scan through, looking for a sep character; if none is |
675 | | * found, 'i' will be at the end of the string. */ |
676 | 10.1k | for (i = istart;; i++) { |
677 | 10.1k | if ((nextc = start[i]) == '\0') |
678 | 847 | break; |
679 | 9.31k | if (strchr(seps, nextc)) |
680 | 847 | break; |
681 | 9.31k | } |
682 | | |
683 | | /* Save the substring */ |
684 | 1.69k | nchars = i - istart; |
685 | 1.69k | substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char)); |
686 | 1.69k | stringCopy(substr, start + istart, nchars); |
687 | | |
688 | | /* Look for the next non-sep character. |
689 | | * If this is the last substring, return a null saveptr. */ |
690 | 2.54k | for (j = i;; j++) { |
691 | 2.54k | if ((nextc = start[j]) == '\0') { |
692 | 847 | *psaveptr = NULL; /* no more non-sep characters */ |
693 | 847 | break; |
694 | 847 | } |
695 | 1.69k | if (!strchr(seps, nextc)) { |
696 | 847 | *psaveptr = start + j; /* start here on next call */ |
697 | 847 | break; |
698 | 847 | } |
699 | 1.69k | } |
700 | | |
701 | 1.69k | return substr; |
702 | 1.69k | } |
703 | | |
704 | | |
705 | | /*! |
706 | | * \brief stringSplitOnToken() |
707 | | * |
708 | | * \param[in] cstr input string to be split; not altered |
709 | | * \param[in] seps a string of character separators |
710 | | * \param[out] phead ptr to copy of the input string, up to |
711 | | * the first separator token encountered |
712 | | * \param[out] ptail ptr to copy of the part of the input string |
713 | | * starting with the first non-separator character |
714 | | * that occurs after the first separator is found |
715 | | * \return 0 if OK, 1 on error |
716 | | * |
717 | | * <pre> |
718 | | * Notes: |
719 | | * (1) The input string is not altered; all split parts are new strings. |
720 | | * (2) The split occurs around the first consecutive sequence of |
721 | | * tokens encountered. |
722 | | * (3) The head goes from the beginning of the string up to |
723 | | * but not including the first token found. |
724 | | * (4) The tail contains the second part of the string, starting |
725 | | * with the first char in that part that is NOT a token. |
726 | | * (5) If no separator token is found, 'head' contains a copy |
727 | | * of the input string and 'tail' is null. |
728 | | * </pre> |
729 | | */ |
730 | | l_ok |
731 | | stringSplitOnToken(char *cstr, |
732 | | const char *seps, |
733 | | char **phead, |
734 | | char **ptail) |
735 | 0 | { |
736 | 0 | char *saveptr; |
737 | |
|
738 | 0 | if (!phead) |
739 | 0 | return ERROR_INT("&head not defined", __func__, 1); |
740 | 0 | if (!ptail) |
741 | 0 | return ERROR_INT("&tail not defined", __func__, 1); |
742 | 0 | *phead = *ptail = NULL; |
743 | 0 | if (!cstr) |
744 | 0 | return ERROR_INT("cstr not defined", __func__, 1); |
745 | 0 | if (!seps) |
746 | 0 | return ERROR_INT("seps not defined", __func__, 1); |
747 | | |
748 | 0 | *phead = strtokSafe(cstr, seps, &saveptr); |
749 | 0 | if (saveptr) |
750 | 0 | *ptail = stringNew(saveptr); |
751 | 0 | return 0; |
752 | 0 | } |
753 | | |
754 | | |
755 | | /*--------------------------------------------------------------------* |
756 | | * Find and replace procs * |
757 | | *--------------------------------------------------------------------*/ |
758 | | /*! |
759 | | * \brief stringCheckForChars() |
760 | | * |
761 | | * \param[in] src input string; can be of zero length |
762 | | * \param[in] chars string of chars to be searched for in %src |
763 | | * \param[out] pfound 1 if any characters are found; 0 otherwise |
764 | | * \return 0 if OK, 1 on error |
765 | | * |
766 | | * <pre> |
767 | | * Notes: |
768 | | * (1) This can be used to sanitize an operation by checking for |
769 | | * special characters that don't belong in a string. |
770 | | * </pre> |
771 | | */ |
772 | | l_ok |
773 | | stringCheckForChars(const char *src, |
774 | | const char *chars, |
775 | | l_int32 *pfound) |
776 | 0 | { |
777 | 0 | char ch; |
778 | 0 | l_int32 i, n; |
779 | |
|
780 | 0 | if (!pfound) |
781 | 0 | return ERROR_INT("&found not defined", __func__, 1); |
782 | 0 | *pfound = FALSE; |
783 | 0 | if (!src || !chars) |
784 | 0 | return ERROR_INT("src and chars not both defined", __func__, 1); |
785 | | |
786 | 0 | n = strlen(src); |
787 | 0 | for (i = 0; i < n; i++) { |
788 | 0 | ch = src[i]; |
789 | 0 | if (strchr(chars, ch)) { |
790 | 0 | *pfound = TRUE; |
791 | 0 | break; |
792 | 0 | } |
793 | 0 | } |
794 | 0 | return 0; |
795 | 0 | } |
796 | | |
797 | | |
798 | | /*! |
799 | | * \brief stringRemoveChars() |
800 | | * |
801 | | * \param[in] src input string; can be of zero length |
802 | | * \param[in] remchars string of chars to be removed from src |
803 | | * \return dest string with specified chars removed, or NULL on error |
804 | | */ |
805 | | char * |
806 | | stringRemoveChars(const char *src, |
807 | | const char *remchars) |
808 | 3.38k | { |
809 | 3.38k | char ch; |
810 | 3.38k | char *dest; |
811 | 3.38k | l_int32 nsrc, i, k; |
812 | | |
813 | 3.38k | if (!src) |
814 | 0 | return (char *)ERROR_PTR("src not defined", __func__, NULL); |
815 | 3.38k | if (!remchars) |
816 | 0 | return stringNew(src); |
817 | | |
818 | 3.38k | if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL) |
819 | 0 | return (char *)ERROR_PTR("dest not made", __func__, NULL); |
820 | 3.38k | nsrc = strlen(src); |
821 | 20.3k | for (i = 0, k = 0; i < nsrc; i++) { |
822 | 16.9k | ch = src[i]; |
823 | 16.9k | if (!strchr(remchars, ch)) |
824 | 13.5k | dest[k++] = ch; |
825 | 16.9k | } |
826 | | |
827 | 3.38k | return dest; |
828 | 3.38k | } |
829 | | |
830 | | |
831 | | /*! |
832 | | * \brief stringReplaceEachSubstr() |
833 | | * |
834 | | * \param[in] src input string; can be of zero length |
835 | | * \param[in] sub1 substring to be replaced |
836 | | * \param[in] sub2 substring to put in; can be "" |
837 | | * \param[out] pcount [optional] the number of times that sub1 |
838 | | * is found in src; 0 if not found |
839 | | * \return dest string with substring replaced, or NULL if the |
840 | | * substring not found or on error. |
841 | | * |
842 | | * <pre> |
843 | | * Notes: |
844 | | * (1) This is a wrapper for simple string substitution that uses |
845 | | * the more general function arrayReplaceEachSequence(). |
846 | | * (2) This finds every non-overlapping occurrence of %sub1 in |
847 | | * %src, and replaces it with %sub2. By "non-overlapping" |
848 | | * we mean that after it finds each match, it removes the |
849 | | * matching characters, replaces with the substitution string |
850 | | * (if not empty), and continues. For example, if you replace |
851 | | * 'aa' by 'X' in 'baaabbb', you find one match at position 1 |
852 | | * and return 'bXabbb'. |
853 | | * (3) To only remove each instance of sub1, use "" for sub2 |
854 | | * (4) Returns a copy of %src if sub1 and sub2 are the same. |
855 | | * (5) If the input %src is binary data that can have null characters, |
856 | | * use arrayReplaceEachSequence() directly. |
857 | | * </pre> |
858 | | */ |
859 | | char * |
860 | | stringReplaceEachSubstr(const char *src, |
861 | | const char *sub1, |
862 | | const char *sub2, |
863 | | l_int32 *pcount) |
864 | 0 | { |
865 | 0 | size_t datalen; |
866 | |
|
867 | 0 | if (pcount) *pcount = 0; |
868 | 0 | if (!src || !sub1 || !sub2) |
869 | 0 | return (char *)ERROR_PTR("src, sub1, sub2 not all defined", |
870 | 0 | __func__, NULL); |
871 | | |
872 | 0 | if (strlen(sub2) > 0) { |
873 | 0 | return (char *)arrayReplaceEachSequence( |
874 | 0 | (const l_uint8 *)src, strlen(src), |
875 | 0 | (const l_uint8 *)sub1, strlen(sub1), |
876 | 0 | (const l_uint8 *)sub2, strlen(sub2), |
877 | 0 | &datalen, pcount); |
878 | 0 | } else { /* empty replacement string; removal only */ |
879 | 0 | return (char *)arrayReplaceEachSequence( |
880 | 0 | (const l_uint8 *)src, strlen(src), |
881 | 0 | (const l_uint8 *)sub1, strlen(sub1), |
882 | 0 | NULL, 0, &datalen, pcount); |
883 | 0 | } |
884 | 0 | } |
885 | | |
886 | | |
887 | | /*! |
888 | | * \brief stringReplaceSubstr() |
889 | | * |
890 | | * \param[in] src input string; can be of zero length |
891 | | * \param[in] sub1 substring to be replaced |
892 | | * \param[in] sub2 substring to put in; can be "" |
893 | | * \param[in,out] ploc [optional] input start location for search; |
894 | | * returns the loc after replacement |
895 | | * \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise |
896 | | * \return dest string with substring replaced, or NULL on error. |
897 | | * |
898 | | * <pre> |
899 | | * Notes: |
900 | | * (1) Replaces the first instance. |
901 | | * (2) To remove sub1 without replacement, use "" for sub2. |
902 | | * (3) Returns a copy of %src if either no instance of %sub1 is found, |
903 | | * or if %sub1 and %sub2 are the same. |
904 | | * (4) If %ploc == NULL, the search will start at the beginning of %src. |
905 | | * If %ploc != NULL, *ploc must be initialized to the byte offset |
906 | | * within %src from which the search starts. To search the |
907 | | * string from the beginning, set %loc = 0 and input &loc. |
908 | | * After finding %sub1 and replacing it with %sub2, %loc will be |
909 | | * returned as the next position after %sub2 in the output string. |
910 | | * (5) Note that the output string also includes all the characters |
911 | | * from the input string that occur after the single substitution. |
912 | | * </pre> |
913 | | */ |
914 | | char * |
915 | | stringReplaceSubstr(const char *src, |
916 | | const char *sub1, |
917 | | const char *sub2, |
918 | | l_int32 *ploc, |
919 | | l_int32 *pfound) |
920 | 0 | { |
921 | 0 | const char *ptr; |
922 | 0 | char *dest; |
923 | 0 | l_int32 nsrc, nsub1, nsub2, len, npre, loc; |
924 | |
|
925 | 0 | if (pfound) *pfound = 0; |
926 | 0 | if (!src || !sub1 || !sub2) |
927 | 0 | return (char *)ERROR_PTR("src, sub1, sub2 not all defined", |
928 | 0 | __func__, NULL); |
929 | | |
930 | 0 | if (ploc) |
931 | 0 | loc = *ploc; |
932 | 0 | else |
933 | 0 | loc = 0; |
934 | 0 | if (!strcmp(sub1, sub2)) |
935 | 0 | return stringNew(src); |
936 | 0 | if ((ptr = strstr(src + loc, sub1)) == NULL) |
937 | 0 | return stringNew(src); |
938 | 0 | if (pfound) *pfound = 1; |
939 | |
|
940 | 0 | nsrc = strlen(src); |
941 | 0 | nsub1 = strlen(sub1); |
942 | 0 | nsub2 = strlen(sub2); |
943 | 0 | len = nsrc + nsub2 - nsub1; |
944 | 0 | if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) |
945 | 0 | return (char *)ERROR_PTR("dest not made", __func__, NULL); |
946 | 0 | npre = ptr - src; |
947 | 0 | memcpy(dest, src, npre); |
948 | 0 | strcpy(dest + npre, sub2); |
949 | 0 | strcpy(dest + npre + nsub2, ptr + nsub1); |
950 | 0 | if (ploc) *ploc = npre + nsub2; |
951 | 0 | return dest; |
952 | 0 | } |
953 | | |
954 | | |
955 | | /*! |
956 | | * \brief stringFindEachSubstr() |
957 | | * |
958 | | * \param[in] src input string; can be of zero length |
959 | | * \param[in] sub substring to be searched for |
960 | | * \return dna of offsets where the sequence is found, or NULL if |
961 | | * none are found or on error |
962 | | * |
963 | | * <pre> |
964 | | * Notes: |
965 | | * (1) This finds every non-overlapping occurrence in %src of %sub. |
966 | | * After it finds each match, it moves forward in %src by the length |
967 | | * of %sub before continuing the search. So for example, |
968 | | * if you search for the sequence 'aa' in the data 'baaabbb', |
969 | | * you find one match at position 1. |
970 | | |
971 | | * </pre> |
972 | | */ |
973 | | L_DNA * |
974 | | stringFindEachSubstr(const char *src, |
975 | | const char *sub) |
976 | 0 | { |
977 | 0 | if (!src || !sub) |
978 | 0 | return (L_DNA *)ERROR_PTR("src, sub not both defined", __func__, NULL); |
979 | | |
980 | 0 | return arrayFindEachSequence((const l_uint8 *)src, strlen(src), |
981 | 0 | (const l_uint8 *)sub, strlen(sub)); |
982 | 0 | } |
983 | | |
984 | | |
985 | | /*! |
986 | | * \brief stringFindSubstr() |
987 | | * |
988 | | * \param[in] src input string; can be of zero length |
989 | | * \param[in] sub substring to be searched for; must not be empty |
990 | | * \param[out] ploc [optional] location of substring in src |
991 | | * \return 1 if found; 0 if not found or on error |
992 | | * |
993 | | * <pre> |
994 | | * Notes: |
995 | | * (1) This is a wrapper around strstr(). It finds the first |
996 | | * instance of %sub in %src. If the substring is not found |
997 | | * and the location is returned, it has the value -1. |
998 | | * (2) Both %src and %sub must be defined, and %sub must have |
999 | | * length of at least 1. |
1000 | | * </pre> |
1001 | | */ |
1002 | | l_int32 |
1003 | | stringFindSubstr(const char *src, |
1004 | | const char *sub, |
1005 | | l_int32 *ploc) |
1006 | 0 | { |
1007 | 0 | const char *ptr; |
1008 | |
|
1009 | 0 | if (ploc) *ploc = -1; |
1010 | 0 | if (!src || !sub) |
1011 | 0 | return ERROR_INT("src and sub not both defined", __func__, 0); |
1012 | 0 | if (strlen(sub) == 0) |
1013 | 0 | return ERROR_INT("substring length 0", __func__, 0); |
1014 | 0 | if (strlen(src) == 0) |
1015 | 0 | return 0; |
1016 | | |
1017 | 0 | if ((ptr = strstr(src, sub)) == NULL) /* not found */ |
1018 | 0 | return 0; |
1019 | | |
1020 | 0 | if (ploc) |
1021 | 0 | *ploc = ptr - src; |
1022 | 0 | return 1; |
1023 | 0 | } |
1024 | | |
1025 | | |
1026 | | /*! |
1027 | | * \brief arrayReplaceEachSequence() |
1028 | | * |
1029 | | * \param[in] datas source byte array |
1030 | | * \param[in] dataslen length of source data, in bytes |
1031 | | * \param[in] seq subarray of bytes to find in source data |
1032 | | * \param[in] seqlen length of subarray, in bytes |
1033 | | * \param[in] newseq replacement subarray; can be null |
1034 | | * \param[in] newseqlen length of replacement subarray, in bytes |
1035 | | * \param[out] pdatadlen length of dest byte array, in bytes |
1036 | | * \param[out] pcount [optional] the number of times that sub1 |
1037 | | * is found in src; 0 if not found |
1038 | | * \return datad with all all subarrays replaced (or removed) |
1039 | | * |
1040 | | * <pre> |
1041 | | * Notes: |
1042 | | * (1) The byte arrays %datas, %seq and %newseq are not C strings, |
1043 | | * because they can contain null bytes. Therefore, for each |
1044 | | * we must give the length of the array. |
1045 | | * (2) If %newseq == NULL, this just removes all instances of %seq. |
1046 | | * Otherwise, it replaces every non-overlapping occurrence of |
1047 | | * %seq in %datas with %newseq. A new array %datad and its |
1048 | | * size are returned. See arrayFindEachSequence() for more |
1049 | | * details on finding non-overlapping occurrences. |
1050 | | * (3) If no instances of %seq are found, this returns a copy of %datas. |
1051 | | * (4) The returned %datad is null terminated. |
1052 | | * (5) Can use stringReplaceEachSubstr() if using C strings. |
1053 | | * </pre> |
1054 | | */ |
1055 | | l_uint8 * |
1056 | | arrayReplaceEachSequence(const l_uint8 *datas, |
1057 | | size_t dataslen, |
1058 | | const l_uint8 *seq, |
1059 | | size_t seqlen, |
1060 | | const l_uint8 *newseq, |
1061 | | size_t newseqlen, |
1062 | | size_t *pdatadlen, |
1063 | | l_int32 *pcount) |
1064 | 0 | { |
1065 | 0 | l_uint8 *datad; |
1066 | 0 | size_t newsize; |
1067 | 0 | l_int32 n, i, j, di, si, index, incr; |
1068 | 0 | L_DNA *da; |
1069 | |
|
1070 | 0 | if (pcount) *pcount = 0; |
1071 | 0 | if (!datas || !seq) |
1072 | 0 | return (l_uint8 *)ERROR_PTR("datas & seq not both defined", |
1073 | 0 | __func__, NULL); |
1074 | 0 | if (!pdatadlen) |
1075 | 0 | return (l_uint8 *)ERROR_PTR("&datadlen not defined", __func__, NULL); |
1076 | 0 | *pdatadlen = 0; |
1077 | | |
1078 | | /* Identify the locations of the sequence. If there are none, |
1079 | | * return a copy of %datas. */ |
1080 | 0 | if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) { |
1081 | 0 | *pdatadlen = dataslen; |
1082 | 0 | return l_binaryCopy(datas, dataslen); |
1083 | 0 | } |
1084 | | |
1085 | | /* Allocate the output data; insure null termination */ |
1086 | 0 | n = l_dnaGetCount(da); |
1087 | 0 | if (pcount) *pcount = n; |
1088 | 0 | if (!newseq) newseqlen = 0; |
1089 | 0 | newsize = dataslen + n * (newseqlen - seqlen) + 4; |
1090 | 0 | if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) { |
1091 | 0 | l_dnaDestroy(&da); |
1092 | 0 | return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL); |
1093 | 0 | } |
1094 | | |
1095 | | /* Replace each sequence instance with a new sequence */ |
1096 | 0 | l_dnaGetIValue(da, 0, &si); |
1097 | 0 | for (i = 0, di = 0, index = 0; i < dataslen; i++) { |
1098 | 0 | if (i == si) { |
1099 | 0 | index++; |
1100 | 0 | if (index < n) { |
1101 | 0 | l_dnaGetIValue(da, index, &si); |
1102 | 0 | incr = L_MIN(seqlen, si - i); /* amount to remove from datas */ |
1103 | 0 | } else { |
1104 | 0 | incr = seqlen; |
1105 | 0 | } |
1106 | 0 | i += incr - 1; /* jump over the matched sequence in datas */ |
1107 | 0 | if (newseq) { /* add new sequence to datad */ |
1108 | 0 | for (j = 0; j < newseqlen; j++) |
1109 | 0 | datad[di++] = newseq[j]; |
1110 | 0 | } |
1111 | 0 | } else { |
1112 | 0 | datad[di++] = datas[i]; |
1113 | 0 | } |
1114 | 0 | } |
1115 | |
|
1116 | 0 | *pdatadlen = di; |
1117 | 0 | l_dnaDestroy(&da); |
1118 | 0 | return datad; |
1119 | 0 | } |
1120 | | |
1121 | | |
1122 | | /*! |
1123 | | * \brief arrayFindEachSequence() |
1124 | | * |
1125 | | * \param[in] data byte array |
1126 | | * \param[in] datalen length of data, in bytes |
1127 | | * \param[in] sequence subarray of bytes to find in data |
1128 | | * \param[in] seqlen length of sequence, in bytes |
1129 | | * \return dna of offsets where the sequence is found, or NULL if |
1130 | | * none are found or on error |
1131 | | * |
1132 | | * <pre> |
1133 | | * Notes: |
1134 | | * (1) The byte arrays %data and %sequence are not C strings, |
1135 | | * because they can contain null bytes. Therefore, for each |
1136 | | * we must give the length of the array. |
1137 | | * (2) This finds every non-overlapping occurrence in %data of %sequence. |
1138 | | * After it finds each match, it moves forward by the length |
1139 | | * of the sequence before continuing the search. So for example, |
1140 | | * if you search for the sequence 'aa' in the data 'baaabbb', |
1141 | | * you find one match at position 1. |
1142 | | * </pre> |
1143 | | */ |
1144 | | L_DNA * |
1145 | | arrayFindEachSequence(const l_uint8 *data, |
1146 | | size_t datalen, |
1147 | | const l_uint8 *sequence, |
1148 | | size_t seqlen) |
1149 | 0 | { |
1150 | 0 | l_int32 start, offset, realoffset, found; |
1151 | 0 | L_DNA *da; |
1152 | |
|
1153 | 0 | if (!data || !sequence) |
1154 | 0 | return (L_DNA *)ERROR_PTR("data & sequence not both defined", |
1155 | 0 | __func__, NULL); |
1156 | | |
1157 | 0 | da = l_dnaCreate(0); |
1158 | 0 | start = 0; |
1159 | 0 | while (1) { |
1160 | 0 | arrayFindSequence(data + start, datalen - start, sequence, seqlen, |
1161 | 0 | &offset, &found); |
1162 | 0 | if (found == FALSE) |
1163 | 0 | break; |
1164 | | |
1165 | 0 | realoffset = start + offset; |
1166 | 0 | l_dnaAddNumber(da, realoffset); |
1167 | 0 | start = realoffset + seqlen; |
1168 | 0 | if (start >= datalen) |
1169 | 0 | break; |
1170 | 0 | } |
1171 | |
|
1172 | 0 | if (l_dnaGetCount(da) == 0) |
1173 | 0 | l_dnaDestroy(&da); |
1174 | 0 | return da; |
1175 | 0 | } |
1176 | | |
1177 | | |
1178 | | /*! |
1179 | | * \brief arrayFindSequence() |
1180 | | * |
1181 | | * \param[in] data byte array |
1182 | | * \param[in] datalen length of data, in bytes |
1183 | | * \param[in] sequence subarray of bytes to find in data |
1184 | | * \param[in] seqlen length of sequence, in bytes |
1185 | | * \param[out] poffset offset from beginning of |
1186 | | * data where the sequence begins |
1187 | | * \param[out] pfound 1 if sequence is found; 0 otherwise |
1188 | | * \return 0 if OK, 1 on error |
1189 | | * |
1190 | | * <pre> |
1191 | | * Notes: |
1192 | | * (1) The byte arrays 'data' and 'sequence' are in general not C strings, |
1193 | | * because they can contain null bytes. Therefore, for each |
1194 | | * we must give the length of the array. |
1195 | | * (2) This searches for the first occurrence in %data of %sequence, |
1196 | | * which consists of %seqlen bytes. The parameter %seqlen |
1197 | | * must not exceed the actual length of the %sequence byte array. |
1198 | | * (3) If either byte array is a C string, cast the array to |
1199 | | * (const l_uint8 *) and use strlen() on the string for its length. |
1200 | | * (4) If the sequence is not found, the offset will be 0, so you |
1201 | | * must check %found. |
1202 | | * </pre> |
1203 | | */ |
1204 | | l_ok |
1205 | | arrayFindSequence(const l_uint8 *data, |
1206 | | size_t datalen, |
1207 | | const l_uint8 *sequence, |
1208 | | size_t seqlen, |
1209 | | l_int32 *poffset, |
1210 | | l_int32 *pfound) |
1211 | 0 | { |
1212 | 0 | l_int32 i, j, found, lastpos; |
1213 | |
|
1214 | 0 | if (poffset) *poffset = 0; |
1215 | 0 | if (pfound) *pfound = FALSE; |
1216 | 0 | if (!data || !sequence) |
1217 | 0 | return ERROR_INT("data & sequence not both defined", __func__, 1); |
1218 | 0 | if (!poffset || !pfound) |
1219 | 0 | return ERROR_INT("&offset and &found not defined", __func__, 1); |
1220 | | |
1221 | 0 | lastpos = datalen - seqlen + 1; |
1222 | 0 | found = FALSE; |
1223 | 0 | for (i = 0; i < lastpos; i++) { |
1224 | 0 | for (j = 0; j < seqlen; j++) { |
1225 | 0 | if (data[i + j] != sequence[j]) |
1226 | 0 | break; |
1227 | 0 | if (j == seqlen - 1) |
1228 | 0 | found = TRUE; |
1229 | 0 | } |
1230 | 0 | if (found == TRUE) |
1231 | 0 | break; |
1232 | 0 | } |
1233 | |
|
1234 | 0 | if (found == TRUE) { |
1235 | 0 | *poffset = i; |
1236 | 0 | *pfound = TRUE; |
1237 | 0 | } |
1238 | 0 | return 0; |
1239 | 0 | } |
1240 | | |
1241 | | |
1242 | | /*--------------------------------------------------------------------* |
1243 | | * Safe realloc * |
1244 | | *--------------------------------------------------------------------*/ |
1245 | | /*! |
1246 | | * \brief reallocNew() |
1247 | | * |
1248 | | * \param[in,out] pindata nulls indata before reallocing |
1249 | | * \param[in] oldsize size of input data to be copied, in bytes |
1250 | | * \param[in] newsize size of buffer to be reallocated in bytes |
1251 | | * \return ptr to new data, or NULL on error |
1252 | | * |
1253 | | * Action: !N.B. 3) and (4! |
1254 | | * 1 Allocates memory, initialized to 0 |
1255 | | * 2 Copies as much of the input data as possible |
1256 | | * to the new block, truncating the copy if necessary |
1257 | | * 3 Frees the input data |
1258 | | * 4 Zeroes the input data ptr |
1259 | | * |
1260 | | * <pre> |
1261 | | * Notes: |
1262 | | * (1) If newsize == 0, frees input data and nulls ptr |
1263 | | * (2) If input data is null, only callocs new memory |
1264 | | * (3) This differs from realloc in that it always allocates |
1265 | | * new memory (if newsize > 0) and initializes it to 0, |
1266 | | * it requires the amount of old data to be copied, |
1267 | | * and it takes the address of the input ptr and |
1268 | | * nulls the handle. |
1269 | | * </pre> |
1270 | | */ |
1271 | | void * |
1272 | | reallocNew(void **pindata, |
1273 | | size_t oldsize, |
1274 | | size_t newsize) |
1275 | 755 | { |
1276 | 755 | size_t minsize; |
1277 | 755 | void *indata; |
1278 | 755 | void *newdata; |
1279 | | |
1280 | 755 | if (!pindata) |
1281 | 0 | return ERROR_PTR("input data not defined", __func__, NULL); |
1282 | 755 | indata = *pindata; |
1283 | | |
1284 | 755 | if (newsize == 0) { /* nonstandard usage */ |
1285 | 0 | if (indata) { |
1286 | 0 | LEPT_FREE(indata); |
1287 | 0 | *pindata = NULL; |
1288 | 0 | } |
1289 | 0 | return NULL; |
1290 | 0 | } |
1291 | | |
1292 | 755 | if (!indata) { /* nonstandard usage */ |
1293 | 0 | if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) |
1294 | 0 | return ERROR_PTR("newdata not made", __func__, NULL); |
1295 | 0 | return newdata; |
1296 | 0 | } |
1297 | | |
1298 | | /* Standard usage */ |
1299 | 755 | if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) |
1300 | 0 | return ERROR_PTR("newdata not made", __func__, NULL); |
1301 | 755 | minsize = L_MIN(oldsize, newsize); |
1302 | 755 | memcpy(newdata, indata, minsize); |
1303 | 755 | LEPT_FREE(indata); |
1304 | 755 | *pindata = NULL; |
1305 | 755 | return newdata; |
1306 | 755 | } |
1307 | | |
1308 | | |
1309 | | /*--------------------------------------------------------------------* |
1310 | | * Read and write between file and memory * |
1311 | | *--------------------------------------------------------------------*/ |
1312 | | /*! |
1313 | | * \brief l_binaryRead() |
1314 | | * |
1315 | | * \param[in] filename |
1316 | | * \param[out] pnbytes number of bytes read |
1317 | | * \return data, or NULL on error |
1318 | | */ |
1319 | | l_uint8 * |
1320 | | l_binaryRead(const char *filename, |
1321 | | size_t *pnbytes) |
1322 | 0 | { |
1323 | 0 | l_uint8 *data; |
1324 | 0 | FILE *fp; |
1325 | |
|
1326 | 0 | if (!pnbytes) |
1327 | 0 | return (l_uint8 *)ERROR_PTR("pnbytes not defined", __func__, NULL); |
1328 | 0 | *pnbytes = 0; |
1329 | 0 | if (!filename) |
1330 | 0 | return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL); |
1331 | | |
1332 | 0 | if ((fp = fopenReadStream(filename)) == NULL) |
1333 | 0 | return (l_uint8 *)ERROR_PTR_1("file stream not opened", |
1334 | 0 | filename, __func__, NULL); |
1335 | 0 | data = l_binaryReadStream(fp, pnbytes); |
1336 | 0 | fclose(fp); |
1337 | 0 | return data; |
1338 | 0 | } |
1339 | | |
1340 | | |
1341 | | /*! |
1342 | | * \brief l_binaryReadStream() |
1343 | | * |
1344 | | * \param[in] fp file stream opened to read; can be stdin |
1345 | | * \param[out] pnbytes number of bytes read |
1346 | | * \return null-terminated array, or NULL on error; reading 0 bytes |
1347 | | * is not an error |
1348 | | * |
1349 | | * <pre> |
1350 | | * Notes: |
1351 | | * (1) The returned array is terminated with a null byte so that it can |
1352 | | * be used to read ascii data from a file into a proper C string. |
1353 | | * (2) This can be used to capture data that is piped in via stdin, |
1354 | | * because it does not require seeking within the file. |
1355 | | * (3) For example, you can read an image from stdin into memory |
1356 | | * using shell redirection, with one of these shell commands: |
1357 | | * \code |
1358 | | * cat <imagefile> | readprog |
1359 | | * readprog < <imagefile> |
1360 | | * \endcode |
1361 | | * where readprog is: |
1362 | | * \code |
1363 | | * l_uint8 *data = l_binaryReadStream(stdin, &nbytes); |
1364 | | * Pix *pix = pixReadMem(data, nbytes); |
1365 | | * \endcode |
1366 | | * </pre> |
1367 | | */ |
1368 | | l_uint8 * |
1369 | | l_binaryReadStream(FILE *fp, |
1370 | | size_t *pnbytes) |
1371 | 0 | { |
1372 | 0 | l_uint8 *data; |
1373 | 0 | l_int32 seekable, navail, nadd, nread; |
1374 | 0 | L_BBUFFER *bb; |
1375 | |
|
1376 | 0 | if (!pnbytes) |
1377 | 0 | return (l_uint8 *)ERROR_PTR("&nbytes not defined", __func__, NULL); |
1378 | 0 | *pnbytes = 0; |
1379 | 0 | if (!fp) |
1380 | 0 | return (l_uint8 *)ERROR_PTR("fp not defined", __func__, NULL); |
1381 | | |
1382 | | /* Test if the stream is seekable, by attempting to seek to |
1383 | | * the start of data. This is a no-op. If it is seekable, use |
1384 | | * l_binaryReadSelectStream() to determine the size of the |
1385 | | * data to be read in advance. */ |
1386 | 0 | seekable = (ftell(fp) == 0) ? 1 : 0; |
1387 | 0 | if (seekable) |
1388 | 0 | return l_binaryReadSelectStream(fp, 0, 0, pnbytes); |
1389 | | |
1390 | | /* If it is not seekable, use the bbuffer to realloc memory |
1391 | | * as needed during reading. */ |
1392 | 0 | bb = bbufferCreate(NULL, 4096); |
1393 | 0 | while (1) { |
1394 | 0 | navail = bb->nalloc - bb->n; |
1395 | 0 | if (navail < 4096) { |
1396 | 0 | nadd = L_MAX(bb->nalloc, 4096); |
1397 | 0 | bbufferExtendArray(bb, nadd); |
1398 | 0 | } |
1399 | 0 | nread = fread((void *)(bb->array + bb->n), 1, 4096, fp); |
1400 | 0 | bb->n += nread; |
1401 | 0 | if (nread != 4096) break; |
1402 | 0 | } |
1403 | | |
1404 | | /* Copy the data to a new array sized for the data, because |
1405 | | * the bbuffer array can be nearly twice the size we need. */ |
1406 | 0 | if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) { |
1407 | 0 | memcpy(data, bb->array, bb->n); |
1408 | 0 | *pnbytes = bb->n; |
1409 | 0 | } else { |
1410 | 0 | L_ERROR("calloc fail for data\n", __func__); |
1411 | 0 | } |
1412 | |
|
1413 | 0 | bbufferDestroy(&bb); |
1414 | 0 | return data; |
1415 | 0 | } |
1416 | | |
1417 | | |
1418 | | /*! |
1419 | | * \brief l_binaryReadSelect() |
1420 | | * |
1421 | | * \param[in] filename |
1422 | | * \param[in] start first byte to read |
1423 | | * \param[in] nbytes number of bytes to read; use 0 to read to end of file |
1424 | | * \param[out] pnread number of bytes actually read |
1425 | | * \return data, or NULL on error |
1426 | | * |
1427 | | * <pre> |
1428 | | * Notes: |
1429 | | * (1) The returned array is terminated with a null byte so that it can |
1430 | | * be used to read ascii data from a file into a proper C string. |
1431 | | * </pre> |
1432 | | */ |
1433 | | l_uint8 * |
1434 | | l_binaryReadSelect(const char *filename, |
1435 | | size_t start, |
1436 | | size_t nbytes, |
1437 | | size_t *pnread) |
1438 | 0 | { |
1439 | 0 | l_uint8 *data; |
1440 | 0 | FILE *fp; |
1441 | |
|
1442 | 0 | if (!pnread) |
1443 | 0 | return (l_uint8 *)ERROR_PTR("pnread not defined", __func__, NULL); |
1444 | 0 | *pnread = 0; |
1445 | 0 | if (!filename) |
1446 | 0 | return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL); |
1447 | | |
1448 | 0 | if ((fp = fopenReadStream(filename)) == NULL) |
1449 | 0 | return (l_uint8 *)ERROR_PTR_1("file stream not opened", |
1450 | 0 | filename, __func__, NULL); |
1451 | 0 | data = l_binaryReadSelectStream(fp, start, nbytes, pnread); |
1452 | 0 | fclose(fp); |
1453 | 0 | return data; |
1454 | 0 | } |
1455 | | |
1456 | | |
1457 | | /*! |
1458 | | * \brief l_binaryReadSelectStream() |
1459 | | * |
1460 | | * \param[in] fp file stream |
1461 | | * \param[in] start first byte to read |
1462 | | * \param[in] nbytes number of bytes to read; use 0 to read to end of file |
1463 | | * \param[out] pnread number of bytes actually read |
1464 | | * \return null-terminated array, or NULL on error; reading 0 bytes |
1465 | | * is not an error |
1466 | | * |
1467 | | * <pre> |
1468 | | * Notes: |
1469 | | * (1) The returned array is terminated with a null byte so that it can |
1470 | | * be used to read ascii data from a file into a proper C string. |
1471 | | * If the file to be read is empty and %start == 0, an array |
1472 | | * with a single null byte is returned. |
1473 | | * (2) Side effect: the stream pointer is re-positioned to the |
1474 | | * beginning of the file. |
1475 | | * </pre> |
1476 | | */ |
1477 | | l_uint8 * |
1478 | | l_binaryReadSelectStream(FILE *fp, |
1479 | | size_t start, |
1480 | | size_t nbytes, |
1481 | | size_t *pnread) |
1482 | 0 | { |
1483 | 0 | l_uint8 *data; |
1484 | 0 | size_t bytesleft, bytestoread, nread, filebytes; |
1485 | |
|
1486 | 0 | if (!pnread) |
1487 | 0 | return (l_uint8 *)ERROR_PTR("&nread not defined", __func__, NULL); |
1488 | 0 | *pnread = 0; |
1489 | 0 | if (!fp) |
1490 | 0 | return (l_uint8 *)ERROR_PTR("stream not defined", __func__, NULL); |
1491 | | |
1492 | | /* Verify and adjust the parameters if necessary */ |
1493 | 0 | fseek(fp, 0, SEEK_END); /* EOF */ |
1494 | 0 | filebytes = ftell(fp); |
1495 | 0 | fseek(fp, 0, SEEK_SET); |
1496 | 0 | if (start > filebytes) { |
1497 | 0 | L_ERROR("start = %zu but filebytes = %zu\n", __func__, |
1498 | 0 | start, filebytes); |
1499 | 0 | return NULL; |
1500 | 0 | } |
1501 | 0 | if (filebytes == 0) /* start == 0; nothing to read; return null byte */ |
1502 | 0 | return (l_uint8 *)LEPT_CALLOC(1, 1); |
1503 | 0 | bytesleft = filebytes - start; /* greater than 0 */ |
1504 | 0 | if (nbytes == 0) nbytes = bytesleft; |
1505 | 0 | bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft; |
1506 | | |
1507 | | /* Read the data */ |
1508 | 0 | if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL) |
1509 | 0 | return (l_uint8 *)ERROR_PTR("calloc fail for data", __func__, NULL); |
1510 | 0 | fseek(fp, start, SEEK_SET); |
1511 | 0 | nread = fread(data, 1, bytestoread, fp); |
1512 | 0 | if (nbytes != nread) |
1513 | 0 | L_INFO("%zu bytes requested; %zu bytes read\n", __func__, |
1514 | 0 | nbytes, nread); |
1515 | 0 | *pnread = nread; |
1516 | 0 | fseek(fp, 0, SEEK_SET); |
1517 | 0 | return data; |
1518 | 0 | } |
1519 | | |
1520 | | |
1521 | | /*! |
1522 | | * \brief l_binaryWrite() |
1523 | | * |
1524 | | * \param[in] filename output file |
1525 | | * \param[in] operation "w" for write; "a" for append |
1526 | | * \param[in] data binary data to be written |
1527 | | * \param[in] nbytes size of data array |
1528 | | * \return 0 if OK; 1 on error |
1529 | | */ |
1530 | | l_ok |
1531 | | l_binaryWrite(const char *filename, |
1532 | | const char *operation, |
1533 | | const void *data, |
1534 | | size_t nbytes) |
1535 | 0 | { |
1536 | 0 | char actualOperation[20]; |
1537 | 0 | FILE *fp; |
1538 | |
|
1539 | 0 | if (!filename) |
1540 | 0 | return ERROR_INT("filename not defined", __func__, 1); |
1541 | 0 | if (!operation) |
1542 | 0 | return ERROR_INT("operation not defined", __func__, 1); |
1543 | 0 | if (!data) |
1544 | 0 | return ERROR_INT("data not defined", __func__, 1); |
1545 | 0 | if (nbytes <= 0) |
1546 | 0 | return ERROR_INT("nbytes must be > 0", __func__, 1); |
1547 | | |
1548 | 0 | if (strcmp(operation, "w") && strcmp(operation, "a")) |
1549 | 0 | return ERROR_INT("operation not one of {'w','a'}", __func__, 1); |
1550 | | |
1551 | | /* The 'b' flag to fopen() is ignored for all POSIX |
1552 | | * conforming systems. However, Windows needs the 'b' flag. */ |
1553 | 0 | stringCopy(actualOperation, operation, 2); |
1554 | 0 | stringCat(actualOperation, 20, "b"); |
1555 | |
|
1556 | 0 | if ((fp = fopenWriteStream(filename, actualOperation)) == NULL) |
1557 | 0 | return ERROR_INT_1("stream not opened", filename, __func__, 1); |
1558 | 0 | fwrite(data, 1, nbytes, fp); |
1559 | 0 | fclose(fp); |
1560 | 0 | return 0; |
1561 | 0 | } |
1562 | | |
1563 | | |
1564 | | /*! |
1565 | | * \brief nbytesInFile() |
1566 | | * |
1567 | | * \param[in] filename |
1568 | | * \return nbytes in file; 0 on error |
1569 | | */ |
1570 | | size_t |
1571 | | nbytesInFile(const char *filename) |
1572 | 0 | { |
1573 | 0 | size_t nbytes; |
1574 | 0 | FILE *fp; |
1575 | |
|
1576 | 0 | if (!filename) |
1577 | 0 | return ERROR_INT("filename not defined", __func__, 0); |
1578 | 0 | if ((fp = fopenReadStream(filename)) == NULL) |
1579 | 0 | return ERROR_INT_1("stream not opened", filename, __func__, 0); |
1580 | 0 | nbytes = fnbytesInFile(fp); |
1581 | 0 | fclose(fp); |
1582 | 0 | return nbytes; |
1583 | 0 | } |
1584 | | |
1585 | | |
1586 | | /*! |
1587 | | * \brief fnbytesInFile() |
1588 | | * |
1589 | | * \param[in] fp file stream |
1590 | | * \return nbytes in file; 0 on error |
1591 | | */ |
1592 | | size_t |
1593 | | fnbytesInFile(FILE *fp) |
1594 | 0 | { |
1595 | 0 | l_int64 pos, nbytes; |
1596 | |
|
1597 | 0 | if (!fp) |
1598 | 0 | return ERROR_INT("stream not open", __func__, 0); |
1599 | | |
1600 | 0 | pos = ftell(fp); /* initial position */ |
1601 | 0 | if (pos < 0) |
1602 | 0 | return ERROR_INT("seek position must be > 0", __func__, 0); |
1603 | 0 | fseek(fp, 0, SEEK_END); /* EOF */ |
1604 | 0 | nbytes = ftell(fp); |
1605 | 0 | if (nbytes < 0) |
1606 | 0 | return ERROR_INT("nbytes is < 0", __func__, 0); |
1607 | 0 | fseek(fp, pos, SEEK_SET); /* back to initial position */ |
1608 | 0 | return nbytes; |
1609 | 0 | } |
1610 | | |
1611 | | |
1612 | | /*--------------------------------------------------------------------* |
1613 | | * Copy and compare in memory * |
1614 | | *--------------------------------------------------------------------*/ |
1615 | | /*! |
1616 | | * \brief l_binaryCopy() |
1617 | | * |
1618 | | * \param[in] datas |
1619 | | * \param[in] size of data array |
1620 | | * \return datad on heap, or NULL on error |
1621 | | * |
1622 | | * <pre> |
1623 | | * Notes: |
1624 | | * (1) We add 4 bytes to the zeroed output because in some cases |
1625 | | * (e.g., string handling) it is important to have the data |
1626 | | * be null terminated. This guarantees that after the memcpy, |
1627 | | * the result is automatically null terminated. |
1628 | | * </pre> |
1629 | | */ |
1630 | | l_uint8 * |
1631 | | l_binaryCopy(const l_uint8 *datas, |
1632 | | size_t size) |
1633 | 0 | { |
1634 | 0 | l_uint8 *datad; |
1635 | |
|
1636 | 0 | if (!datas) |
1637 | 0 | return (l_uint8 *)ERROR_PTR("datas not defined", __func__, NULL); |
1638 | | |
1639 | 0 | if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL) |
1640 | 0 | return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL); |
1641 | 0 | memcpy(datad, datas, size); |
1642 | 0 | return datad; |
1643 | 0 | } |
1644 | | |
1645 | | |
1646 | | /*! |
1647 | | * \brief l_binaryCompare() |
1648 | | * |
1649 | | * \param[in] data1 |
1650 | | * \param[in] size1 of data1 |
1651 | | * \param[in] data2 |
1652 | | * \param[in] size2 of data1 |
1653 | | * \param[out] psame (1 if the same, 0 if different) |
1654 | | * \return 0 if OK, 1 on error |
1655 | | * |
1656 | | * <pre> |
1657 | | * Notes: |
1658 | | * (1) This can also be used to compare C strings str1 and str2. |
1659 | | * If the string lengths are not known, use strlen(): |
1660 | | * l_binaryCompare((l_uint8 *)str1, strlen(str1), |
1661 | | (l_uint8 *)str2, strlen(str2)); |
1662 | | * </pre> |
1663 | | */ |
1664 | | l_ok |
1665 | | l_binaryCompare(const l_uint8 *data1, |
1666 | | size_t size1, |
1667 | | const l_uint8 *data2, |
1668 | | size_t size2, |
1669 | | l_int32 *psame) |
1670 | 0 | { |
1671 | 0 | l_int32 i; |
1672 | |
|
1673 | 0 | if (!psame) |
1674 | 0 | return ERROR_INT("&same not defined", __func__, 1); |
1675 | 0 | *psame = FALSE; |
1676 | 0 | if (!data1 || !data2) |
1677 | 0 | return ERROR_INT("data1 and data2 not both defined", __func__, 1); |
1678 | 0 | if (size1 != size2) return 0; |
1679 | 0 | for (i = 0; i < size1; i++) { |
1680 | 0 | if (data1[i] != data2[i]) |
1681 | 0 | return 0; |
1682 | 0 | } |
1683 | 0 | *psame = TRUE; |
1684 | 0 | return 0; |
1685 | 0 | } |
1686 | | |
1687 | | |
1688 | | /*--------------------------------------------------------------------* |
1689 | | * File copy operations * |
1690 | | *--------------------------------------------------------------------*/ |
1691 | | /*! |
1692 | | * \brief fileCopy() |
1693 | | * |
1694 | | * \param[in] srcfile copy from this file |
1695 | | * \param[in] newfile copy to this file |
1696 | | * \return 0 if OK, 1 on error |
1697 | | */ |
1698 | | l_ok |
1699 | | fileCopy(const char *srcfile, |
1700 | | const char *newfile) |
1701 | 0 | { |
1702 | 0 | l_int32 ret; |
1703 | 0 | size_t nbytes; |
1704 | 0 | l_uint8 *data; |
1705 | |
|
1706 | 0 | if (!srcfile) |
1707 | 0 | return ERROR_INT("srcfile not defined", __func__, 1); |
1708 | 0 | if (!newfile) |
1709 | 0 | return ERROR_INT("newfile not defined", __func__, 1); |
1710 | | |
1711 | 0 | if ((data = l_binaryRead(srcfile, &nbytes)) == NULL) |
1712 | 0 | return ERROR_INT("data not returned", __func__, 1); |
1713 | 0 | ret = l_binaryWrite(newfile, "w", data, nbytes); |
1714 | 0 | LEPT_FREE(data); |
1715 | 0 | return ret; |
1716 | 0 | } |
1717 | | |
1718 | | |
1719 | | /*! |
1720 | | * \brief fileConcatenate() |
1721 | | * |
1722 | | * \param[in] srcfile append data from this file |
1723 | | * \param[in] destfile add data to this file |
1724 | | * \return 0 if OK, 1 on error |
1725 | | */ |
1726 | | l_ok |
1727 | | fileConcatenate(const char *srcfile, |
1728 | | const char *destfile) |
1729 | 0 | { |
1730 | 0 | size_t nbytes; |
1731 | 0 | l_uint8 *data; |
1732 | |
|
1733 | 0 | if (!srcfile) |
1734 | 0 | return ERROR_INT("srcfile not defined", __func__, 1); |
1735 | 0 | if (!destfile) |
1736 | 0 | return ERROR_INT("destfile not defined", __func__, 1); |
1737 | | |
1738 | 0 | data = l_binaryRead(srcfile, &nbytes); |
1739 | 0 | l_binaryWrite(destfile, "a", data, nbytes); |
1740 | 0 | LEPT_FREE(data); |
1741 | 0 | return 0; |
1742 | 0 | } |
1743 | | |
1744 | | |
1745 | | /*! |
1746 | | * \brief fileAppendString() |
1747 | | * |
1748 | | * \param[in] filename |
1749 | | * \param[in] str string to append to file |
1750 | | * \return 0 if OK, 1 on error |
1751 | | */ |
1752 | | l_ok |
1753 | | fileAppendString(const char *filename, |
1754 | | const char *str) |
1755 | 0 | { |
1756 | 0 | FILE *fp; |
1757 | |
|
1758 | 0 | if (!filename) |
1759 | 0 | return ERROR_INT("filename not defined", __func__, 1); |
1760 | 0 | if (!str) |
1761 | 0 | return ERROR_INT("str not defined", __func__, 1); |
1762 | | |
1763 | 0 | if ((fp = fopenWriteStream(filename, "a")) == NULL) |
1764 | 0 | return ERROR_INT_1("stream not opened", filename, __func__, 1); |
1765 | 0 | fprintf(fp, "%s", str); |
1766 | 0 | fclose(fp); |
1767 | 0 | return 0; |
1768 | 0 | } |
1769 | | |
1770 | | |
1771 | | /*--------------------------------------------------------------------* |
1772 | | * File split operations * |
1773 | | *--------------------------------------------------------------------*/ |
1774 | | /*! |
1775 | | * \brief fileSplitLinesUniform() |
1776 | | * |
1777 | | * \param[in] filename input file |
1778 | | * \param[in] n number of output files (>= 1) |
1779 | | * \param[in] save_empty 1 to save empty lines; 0 to remove them |
1780 | | * \param[in] rootpath root pathname of output files |
1781 | | * \param[in] ext output extension, including the '.'; can be NULL |
1782 | | * \return 0 if OK, 1 on error |
1783 | | * |
1784 | | * <pre> |
1785 | | * Notes: |
1786 | | * (1) This splits an input text file into %n files with roughly |
1787 | | * equal numbers of text lines in each file. |
1788 | | * (2) if %save_empty == 1, empty lines are included, and concatention |
1789 | | * of the text in the split files will be identical to the original. |
1790 | | * (3) The output filenames are in the form: |
1791 | | * <rootpath>_N.<ext>, N = 1, ... n |
1792 | | * (4) This handles the temp directory pathname conversion where needed: |
1793 | | * /tmp ==> [OS specific temp directory] |
1794 | | * (5) Files can also be sharded into sets of lines by the program 'split': |
1795 | | * split -n l/<n> <filename> |
1796 | | * Using 'split', the resulting files have approximately equal |
1797 | | * numbers of bytes, rather than equal numbers of lines. |
1798 | | * </pre> |
1799 | | */ |
1800 | | l_ok |
1801 | | fileSplitLinesUniform(const char *filename, |
1802 | | l_int32 n, |
1803 | | l_int32 save_empty, |
1804 | | const char *rootpath, |
1805 | | const char *ext) |
1806 | 0 | { |
1807 | 0 | l_int32 i, totlines, nlines, index; |
1808 | 0 | size_t nbytes; |
1809 | 0 | l_uint8 *data; |
1810 | 0 | char *str; |
1811 | 0 | char outname[512]; |
1812 | 0 | NUMA *na; |
1813 | 0 | SARRAY *sa; |
1814 | |
|
1815 | 0 | if (!filename) |
1816 | 0 | return ERROR_INT("filename not defined", __func__, 1); |
1817 | 0 | if (!rootpath) |
1818 | 0 | return ERROR_INT("rootpath not defined", __func__, 1); |
1819 | 0 | if (n <= 0) |
1820 | 0 | return ERROR_INT("n must be > 0", __func__, 1); |
1821 | 0 | if (save_empty != 0 && save_empty != 1) |
1822 | 0 | return ERROR_INT("save_empty not 0 or 1", __func__, 1); |
1823 | | |
1824 | | /* Make sarray of lines; the newlines are stripped off */ |
1825 | 0 | if ((data = l_binaryRead(filename, &nbytes)) == NULL) |
1826 | 0 | return ERROR_INT("data not read", __func__, 1); |
1827 | 0 | sa = sarrayCreateLinesFromString((const char *)data, save_empty); |
1828 | 0 | LEPT_FREE(data); |
1829 | 0 | if (!sa) |
1830 | 0 | return ERROR_INT("sa not made", __func__, 1); |
1831 | 0 | totlines = sarrayGetCount(sa); |
1832 | 0 | if (n > totlines) { |
1833 | 0 | sarrayDestroy(&sa); |
1834 | 0 | L_ERROR("num files = %d > num lines = %d\n", __func__, n, totlines); |
1835 | 0 | return 1; |
1836 | 0 | } |
1837 | | |
1838 | | /* Write n sets of lines to n files, adding the newlines back */ |
1839 | 0 | na = numaGetUniformBinSizes(totlines, n); |
1840 | 0 | index = 0; |
1841 | 0 | for (i = 0; i < n; i++) { |
1842 | 0 | if (ext == NULL) |
1843 | 0 | snprintf(outname, sizeof(outname), "%s_%d", rootpath, i); |
1844 | 0 | else |
1845 | 0 | snprintf(outname, sizeof(outname), "%s_%d%s", rootpath, i, ext); |
1846 | 0 | numaGetIValue(na, i, &nlines); |
1847 | 0 | str = sarrayToStringRange(sa, index, nlines, 1); /* add newlines */ |
1848 | 0 | l_binaryWrite(outname, "w", str, strlen(str)); |
1849 | 0 | LEPT_FREE(str); |
1850 | 0 | index += nlines; |
1851 | 0 | } |
1852 | 0 | numaDestroy(&na); |
1853 | 0 | sarrayDestroy(&sa); |
1854 | 0 | return 0; |
1855 | 0 | } |
1856 | | |
1857 | | |
1858 | | /*--------------------------------------------------------------------* |
1859 | | * Multi-platform functions for opening file streams * |
1860 | | *--------------------------------------------------------------------*/ |
1861 | | /*! |
1862 | | * \brief fopenReadStream() |
1863 | | * |
1864 | | * \param[in] filename |
1865 | | * \return stream, or NULL on error |
1866 | | * |
1867 | | * <pre> |
1868 | | * Notes: |
1869 | | * (1) This should be used whenever you want to run fopen() to |
1870 | | * read from a stream. Never call fopen() directory. |
1871 | | * (2) This handles the temp directory pathname conversion where needed: |
1872 | | * /tmp ==> [OS specific temp directory] |
1873 | | * </pre> |
1874 | | */ |
1875 | | FILE * |
1876 | | fopenReadStream(const char *filename) |
1877 | 6.51k | { |
1878 | 6.51k | char *fname, *tail; |
1879 | 6.51k | FILE *fp; |
1880 | | |
1881 | 6.51k | if (!filename) |
1882 | 0 | return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); |
1883 | | |
1884 | | /* Try input filename */ |
1885 | 6.51k | fname = genPathname(filename, NULL); |
1886 | 6.51k | fp = fopen(fname, "rb"); |
1887 | 6.51k | LEPT_FREE(fname); |
1888 | 6.51k | if (fp) return fp; |
1889 | | |
1890 | | /* Else, strip directory and try locally */ |
1891 | 6.51k | splitPathAtDirectory(filename, NULL, &tail); |
1892 | 6.51k | if (!tail) |
1893 | 0 | return (FILE*)ERROR_PTR_1("tail not found", filename, __func__, NULL); |
1894 | 6.51k | fp = fopen(tail, "rb"); |
1895 | 6.51k | if (!fp) |
1896 | 6.51k | L_ERROR("failed to open locally with tail %s for filename %s\n", |
1897 | 6.51k | __func__, tail, filename); |
1898 | 6.51k | LEPT_FREE(tail); |
1899 | 6.51k | return fp; |
1900 | 6.51k | } |
1901 | | |
1902 | | |
1903 | | /*! |
1904 | | * \brief fopenWriteStream() |
1905 | | * |
1906 | | * \param[in] filename |
1907 | | * \param[in] modestring |
1908 | | * \return stream, or NULL on error |
1909 | | * |
1910 | | * <pre> |
1911 | | * Notes: |
1912 | | * (1) This should be used whenever you want to run fopen() to |
1913 | | * write or append to a stream. Never call fopen() directory. |
1914 | | * (2) This handles the temp directory pathname conversion where needed: |
1915 | | * /tmp ==> [OS specific temp directory] |
1916 | | * </pre> |
1917 | | */ |
1918 | | FILE * |
1919 | | fopenWriteStream(const char *filename, |
1920 | | const char *modestring) |
1921 | 0 | { |
1922 | 0 | char *fname; |
1923 | 0 | FILE *fp; |
1924 | |
|
1925 | 0 | if (!filename) |
1926 | 0 | return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); |
1927 | | |
1928 | 0 | fname = genPathname(filename, NULL); |
1929 | 0 | fp = fopen(fname, modestring); |
1930 | 0 | if (!fp) |
1931 | 0 | fp = (FILE *)ERROR_PTR_1("stream not opened", fname, __func__, NULL); |
1932 | 0 | LEPT_FREE(fname); |
1933 | 0 | return fp; |
1934 | 0 | } |
1935 | | |
1936 | | |
1937 | | /*! |
1938 | | * \brief fopenReadFromMemory() |
1939 | | * |
1940 | | * \param[in] data, size |
1941 | | * \return file stream, or NULL on error |
1942 | | * |
1943 | | * <pre> |
1944 | | * Notes: |
1945 | | * (1) Work-around if fmemopen() not available. |
1946 | | * (2) Windows tmpfile() writes into the root C:\ directory, which |
1947 | | * requires admin privileges. This also works around that. |
1948 | | * </pre> |
1949 | | */ |
1950 | | FILE * |
1951 | | fopenReadFromMemory(const l_uint8 *data, |
1952 | | size_t size) |
1953 | 0 | { |
1954 | 0 | FILE *fp; |
1955 | |
|
1956 | 0 | if (!data) |
1957 | 0 | return (FILE *)ERROR_PTR("data not defined", __func__, NULL); |
1958 | | |
1959 | 0 | #if HAVE_FMEMOPEN |
1960 | 0 | if ((fp = fmemopen((void *)data, size, "rb")) == NULL) |
1961 | 0 | return (FILE *)ERROR_PTR("stream not opened", __func__, NULL); |
1962 | | #else /* write to tmp file */ |
1963 | | L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__); |
1964 | | #ifdef _WIN32 |
1965 | | if ((fp = fopenWriteWinTempfile()) == NULL) |
1966 | | return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL); |
1967 | | #else |
1968 | | if ((fp = tmpfile()) == NULL) |
1969 | | return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL); |
1970 | | #endif /* _WIN32 */ |
1971 | | fwrite(data, 1, size, fp); |
1972 | | rewind(fp); |
1973 | | #endif /* HAVE_FMEMOPEN */ |
1974 | | |
1975 | 0 | return fp; |
1976 | 0 | } |
1977 | | |
1978 | | |
1979 | | /*--------------------------------------------------------------------* |
1980 | | * Opening a Windows tmpfile for writing * |
1981 | | *--------------------------------------------------------------------*/ |
1982 | | /*! |
1983 | | * \brief fopenWriteWinTempfile() |
1984 | | * |
1985 | | * \return file stream, or NULL on error |
1986 | | * |
1987 | | * <pre> |
1988 | | * Notes: |
1989 | | * (1) The Windows version of tmpfile() writes into the root |
1990 | | * C:\ directory, which requires admin privileges. This |
1991 | | * function provides an alternative implementation. |
1992 | | * </pre> |
1993 | | */ |
1994 | | FILE * |
1995 | | fopenWriteWinTempfile(void) |
1996 | 0 | { |
1997 | | #ifdef _WIN32 |
1998 | | l_int32 handle; |
1999 | | FILE *fp; |
2000 | | char *filename; |
2001 | | |
2002 | | if ((filename = l_makeTempFilename()) == NULL) { |
2003 | | L_ERROR("l_makeTempFilename failed, %s\n", __func__, strerror(errno)); |
2004 | | return NULL; |
2005 | | } |
2006 | | |
2007 | | handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED | |
2008 | | _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE); |
2009 | | lept_free(filename); |
2010 | | if (handle == -1) { |
2011 | | L_ERROR("_open failed, %s\n", __func__, strerror(errno)); |
2012 | | return NULL; |
2013 | | } |
2014 | | |
2015 | | if ((fp = _fdopen(handle, "r+b")) == NULL) { |
2016 | | L_ERROR("_fdopen failed, %s\n", __func__, strerror(errno)); |
2017 | | return NULL; |
2018 | | } |
2019 | | |
2020 | | return fp; |
2021 | | #else |
2022 | 0 | return NULL; |
2023 | 0 | #endif /* _WIN32 */ |
2024 | 0 | } |
2025 | | |
2026 | | |
2027 | | /*--------------------------------------------------------------------* |
2028 | | * Multi-platform functions that avoid C-runtime boundary * |
2029 | | * crossing for applications with Windows DLLs * |
2030 | | *--------------------------------------------------------------------*/ |
2031 | | /* |
2032 | | * Problems arise when pointers to streams and data are passed |
2033 | | * between two Windows DLLs that have been generated with different |
2034 | | * C runtimes. To avoid this, leptonica provides wrappers for |
2035 | | * several C library calls. |
2036 | | */ |
2037 | | /*! |
2038 | | * \brief lept_fopen() |
2039 | | * |
2040 | | * \param[in] filename |
2041 | | * \param[in] mode same as for fopen(); e.g., "rb" |
2042 | | * \return stream or NULL on error |
2043 | | * |
2044 | | * <pre> |
2045 | | * Notes: |
2046 | | * (1) This must be used by any application that passes |
2047 | | * a file handle to a leptonica Windows DLL. |
2048 | | * </pre> |
2049 | | */ |
2050 | | FILE * |
2051 | | lept_fopen(const char *filename, |
2052 | | const char *mode) |
2053 | 0 | { |
2054 | 0 | if (!filename) |
2055 | 0 | return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); |
2056 | 0 | if (!mode) |
2057 | 0 | return (FILE *)ERROR_PTR("mode not defined", __func__, NULL); |
2058 | | |
2059 | 0 | if (stringFindSubstr(mode, "r", NULL)) |
2060 | 0 | return fopenReadStream(filename); |
2061 | 0 | else |
2062 | 0 | return fopenWriteStream(filename, mode); |
2063 | 0 | } |
2064 | | |
2065 | | |
2066 | | /*! |
2067 | | * \brief lept_fclose() |
2068 | | * |
2069 | | * \param[in] fp file stream |
2070 | | * \return 0 if OK, 1 on error |
2071 | | * |
2072 | | * <pre> |
2073 | | * Notes: |
2074 | | * (1) This should be used by any application that accepts |
2075 | | * a file handle generated by a leptonica Windows DLL. |
2076 | | * </pre> |
2077 | | */ |
2078 | | l_ok |
2079 | | lept_fclose(FILE *fp) |
2080 | 0 | { |
2081 | 0 | if (!fp) |
2082 | 0 | return ERROR_INT("stream not defined", __func__, 1); |
2083 | | |
2084 | 0 | return fclose(fp); |
2085 | 0 | } |
2086 | | |
2087 | | |
2088 | | /*! |
2089 | | * \brief lept_calloc() |
2090 | | * |
2091 | | * \param[in] nmemb number of members |
2092 | | * \param[in] size of each member |
2093 | | * \return void ptr, or NULL on error |
2094 | | * |
2095 | | * <pre> |
2096 | | * Notes: |
2097 | | * (1) For safety with Windows DLLs, this can be used in conjunction |
2098 | | * with lept_free() to avoid C-runtime boundary problems. |
2099 | | * Just use these two functions throughout your application. |
2100 | | * </pre> |
2101 | | */ |
2102 | | void * |
2103 | | lept_calloc(size_t nmemb, |
2104 | | size_t size) |
2105 | 0 | { |
2106 | 0 | if (nmemb <= 0 || size <= 0) |
2107 | 0 | return NULL; |
2108 | 0 | return LEPT_CALLOC(nmemb, size); |
2109 | 0 | } |
2110 | | |
2111 | | |
2112 | | /*! |
2113 | | * \brief lept_free() |
2114 | | * |
2115 | | * \param[in] ptr |
2116 | | * |
2117 | | * <pre> |
2118 | | * Notes: |
2119 | | * (1) This should be used by any application that accepts |
2120 | | * heap data allocated by a leptonica Windows DLL. |
2121 | | * </pre> |
2122 | | */ |
2123 | | void |
2124 | | lept_free(void *ptr) |
2125 | 0 | { |
2126 | 0 | if (!ptr) return; |
2127 | 0 | LEPT_FREE(ptr); |
2128 | 0 | } |
2129 | | |
2130 | | |
2131 | | /*--------------------------------------------------------------------* |
2132 | | * Multi-platform file system operations * |
2133 | | * [ These only write to /tmp or its subdirectories ] * |
2134 | | *--------------------------------------------------------------------*/ |
2135 | | /*! |
2136 | | * \brief lept_mkdir() |
2137 | | * |
2138 | | * \param[in] subdir of /tmp or its OS specific equivalent |
2139 | | * \return 0 on success, non-zero on failure |
2140 | | * |
2141 | | * <pre> |
2142 | | * Notes: |
2143 | | * (1) %subdir is a partial path that can consist of one or more |
2144 | | * directories. |
2145 | | * (2) This makes any subdirectories of /tmp that are required. |
2146 | | * (3) The root temp directory is: |
2147 | | * /tmp (unix) [default] |
2148 | | * [Temp] (Windows) |
2149 | | * </pre> |
2150 | | */ |
2151 | | l_int32 |
2152 | | lept_mkdir(const char *subdir) |
2153 | 0 | { |
2154 | 0 | char *dir, *tmpdir; |
2155 | 0 | l_int32 i, n; |
2156 | 0 | l_int32 ret = 0; |
2157 | 0 | SARRAY *sa; |
2158 | | #ifdef _WIN32 |
2159 | | l_uint32 attributes; |
2160 | | #endif /* _WIN32 */ |
2161 | |
|
2162 | 0 | if (!LeptDebugOK) { |
2163 | 0 | L_INFO("making named temp subdirectory %s is disabled\n", |
2164 | 0 | __func__, subdir); |
2165 | 0 | return 0; |
2166 | 0 | } |
2167 | | |
2168 | 0 | if (!subdir) |
2169 | 0 | return ERROR_INT("subdir not defined", __func__, 1); |
2170 | 0 | if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) |
2171 | 0 | return ERROR_INT("subdir not an actual subdirectory", __func__, 1); |
2172 | | |
2173 | 0 | sa = sarrayCreate(0); |
2174 | 0 | sarraySplitString(sa, subdir, "/"); |
2175 | 0 | n = sarrayGetCount(sa); |
2176 | 0 | dir = genPathname("/tmp", NULL); |
2177 | | /* Make sure the tmp directory exists */ |
2178 | 0 | #ifndef _WIN32 |
2179 | 0 | ret = mkdir(dir, 0777); |
2180 | | #else |
2181 | | attributes = GetFileAttributesA(dir); |
2182 | | if (attributes == INVALID_FILE_ATTRIBUTES) |
2183 | | ret = (CreateDirectoryA(dir, NULL) ? 0 : 1); |
2184 | | #endif |
2185 | | /* Make all the subdirectories */ |
2186 | 0 | for (i = 0; i < n; i++) { |
2187 | 0 | tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY)); |
2188 | 0 | #ifndef _WIN32 |
2189 | 0 | ret += mkdir(tmpdir, 0777); |
2190 | | #else |
2191 | | if (CreateDirectoryA(tmpdir, NULL) == 0) |
2192 | | ret += (GetLastError() != ERROR_ALREADY_EXISTS); |
2193 | | #endif |
2194 | 0 | LEPT_FREE(dir); |
2195 | 0 | dir = tmpdir; |
2196 | 0 | } |
2197 | 0 | LEPT_FREE(dir); |
2198 | 0 | sarrayDestroy(&sa); |
2199 | 0 | if (ret > 0) |
2200 | 0 | L_ERROR("failure to create %d directories\n", __func__, ret); |
2201 | 0 | return ret; |
2202 | 0 | } |
2203 | | |
2204 | | |
2205 | | /*! |
2206 | | * \brief lept_rmdir() |
2207 | | * |
2208 | | * \param[in] subdir of /tmp or its OS specific equivalent |
2209 | | * \return 0 on success, non-zero on failure |
2210 | | * |
2211 | | * <pre> |
2212 | | * Notes: |
2213 | | * (1) %subdir is a partial path that can consist of one or more |
2214 | | * directories. |
2215 | | * (2) This removes all files from the specified subdirectory of |
2216 | | * the root temp directory: |
2217 | | * /tmp (unix) |
2218 | | * [Temp] (Windows) |
2219 | | * and then removes the subdirectory. |
2220 | | * (3) The combination |
2221 | | * lept_rmdir(subdir); |
2222 | | * lept_mkdir(subdir); |
2223 | | * is guaranteed to give you an empty subdirectory. |
2224 | | * </pre> |
2225 | | */ |
2226 | | l_int32 |
2227 | | lept_rmdir(const char *subdir) |
2228 | 0 | { |
2229 | 0 | char *dir, *fname, *fullname; |
2230 | 0 | l_int32 exists, ret, i, nfiles; |
2231 | 0 | SARRAY *sa; |
2232 | | #ifdef _WIN32 |
2233 | | char *newpath; |
2234 | | #else |
2235 | 0 | char *realdir; |
2236 | 0 | #endif /* _WIN32 */ |
2237 | |
|
2238 | 0 | if (!subdir) |
2239 | 0 | return ERROR_INT("subdir not defined", __func__, 1); |
2240 | 0 | if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) |
2241 | 0 | return ERROR_INT("subdir not an actual subdirectory", __func__, 1); |
2242 | | |
2243 | | /* Find the temp subdirectory */ |
2244 | 0 | dir = pathJoin("/tmp", subdir); |
2245 | 0 | if (!dir) |
2246 | 0 | return ERROR_INT("directory name not made", __func__, 1); |
2247 | 0 | lept_direxists(dir, &exists); |
2248 | 0 | if (!exists) { /* fail silently */ |
2249 | 0 | LEPT_FREE(dir); |
2250 | 0 | return 0; |
2251 | 0 | } |
2252 | | |
2253 | | /* List all the files in that directory */ |
2254 | 0 | if ((sa = getFilenamesInDirectory(dir)) == NULL) { |
2255 | 0 | L_ERROR("directory %s does not exist!\n", __func__, dir); |
2256 | 0 | LEPT_FREE(dir); |
2257 | 0 | return 1; |
2258 | 0 | } |
2259 | 0 | nfiles = sarrayGetCount(sa); |
2260 | |
|
2261 | 0 | for (i = 0; i < nfiles; i++) { |
2262 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
2263 | 0 | fullname = genPathname(dir, fname); |
2264 | 0 | remove(fullname); |
2265 | 0 | LEPT_FREE(fullname); |
2266 | 0 | } |
2267 | |
|
2268 | 0 | #ifndef _WIN32 |
2269 | 0 | realdir = genPathname("/tmp", subdir); |
2270 | 0 | ret = rmdir(realdir); |
2271 | 0 | LEPT_FREE(realdir); |
2272 | | #else |
2273 | | newpath = genPathname(dir, NULL); |
2274 | | ret = (RemoveDirectoryA(newpath) ? 0 : 1); |
2275 | | LEPT_FREE(newpath); |
2276 | | #endif /* !_WIN32 */ |
2277 | |
|
2278 | 0 | sarrayDestroy(&sa); |
2279 | 0 | LEPT_FREE(dir); |
2280 | 0 | return ret; |
2281 | 0 | } |
2282 | | |
2283 | | |
2284 | | /*! |
2285 | | * \brief lept_direxists() |
2286 | | * |
2287 | | * \param[in] dir |
2288 | | * \param[out] pexists 1 if it exists; 0 otherwise |
2289 | | * \return void |
2290 | | * |
2291 | | * <pre> |
2292 | | * Notes: |
2293 | | * (1) Always use unix pathname separators. |
2294 | | * (2) By calling genPathname(), if the pathname begins with "/tmp" |
2295 | | * this does an automatic directory translation for operating |
2296 | | * systems that use a different path for /tmp. |
2297 | | * </pre> |
2298 | | */ |
2299 | | void |
2300 | | lept_direxists(const char *dir, |
2301 | | l_int32 *pexists) |
2302 | 0 | { |
2303 | 0 | char *realdir; |
2304 | |
|
2305 | 0 | if (!pexists) return; |
2306 | 0 | *pexists = 0; |
2307 | 0 | if (!dir) return; |
2308 | 0 | if ((realdir = genPathname(dir, NULL)) == NULL) |
2309 | 0 | return; |
2310 | | |
2311 | 0 | #ifndef _WIN32 |
2312 | 0 | { |
2313 | 0 | struct stat s; |
2314 | 0 | l_int32 err = stat(realdir, &s); |
2315 | 0 | if (err != -1 && S_ISDIR(s.st_mode)) |
2316 | 0 | *pexists = 1; |
2317 | 0 | } |
2318 | | #else /* _WIN32 */ |
2319 | | { |
2320 | | l_uint32 attributes; |
2321 | | attributes = GetFileAttributesA(realdir); |
2322 | | if (attributes != INVALID_FILE_ATTRIBUTES && |
2323 | | (attributes & FILE_ATTRIBUTE_DIRECTORY)) |
2324 | | *pexists = 1; |
2325 | | } |
2326 | | #endif /* _WIN32 */ |
2327 | |
|
2328 | 0 | LEPT_FREE(realdir); |
2329 | 0 | } |
2330 | | |
2331 | | |
2332 | | /*! |
2333 | | * \brief lept_rm_match() |
2334 | | * |
2335 | | * \param[in] subdir [optional] if NULL, the removed files are in /tmp |
2336 | | * \param[in] substr [optional] pattern to match in filename |
2337 | | * \return 0 on success, non-zero on failure |
2338 | | * |
2339 | | * <pre> |
2340 | | * Notes: |
2341 | | * (1) This removes the matched files in /tmp or a subdirectory of /tmp. |
2342 | | * Use NULL for %subdir if the files are in /tmp. |
2343 | | * (2) If %substr == NULL, this removes all files in the directory. |
2344 | | * If %substr == "" (empty), this removes no files. |
2345 | | * If both %subdir == NULL and %substr == NULL, this removes |
2346 | | * all files in /tmp. |
2347 | | * (3) Use unix pathname separators. |
2348 | | * (4) By calling genPathname(), if the pathname begins with "/tmp" |
2349 | | * this does an automatic directory translation for operating |
2350 | | * systems that use a different path for /tmp. |
2351 | | * (5) Error conditions: |
2352 | | * * returns -1 if the directory is not found |
2353 | | * * returns the number of files (> 0) that it was unable to remove. |
2354 | | * </pre> |
2355 | | */ |
2356 | | l_int32 |
2357 | | lept_rm_match(const char *subdir, |
2358 | | const char *substr) |
2359 | 0 | { |
2360 | 0 | char *path, *fname; |
2361 | 0 | char tempdir[256]; |
2362 | 0 | l_int32 i, n, ret; |
2363 | 0 | SARRAY *sa; |
2364 | |
|
2365 | 0 | makeTempDirname(tempdir, sizeof(tempdir), subdir); |
2366 | 0 | if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL) |
2367 | 0 | return ERROR_INT("sa not made", __func__, -1); |
2368 | 0 | n = sarrayGetCount(sa); |
2369 | 0 | if (n == 0) { |
2370 | 0 | L_WARNING("no matching files found\n", __func__); |
2371 | 0 | sarrayDestroy(&sa); |
2372 | 0 | return 0; |
2373 | 0 | } |
2374 | | |
2375 | 0 | ret = 0; |
2376 | 0 | for (i = 0; i < n; i++) { |
2377 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
2378 | 0 | path = genPathname(fname, NULL); |
2379 | 0 | if (lept_rmfile(path) != 0) { |
2380 | 0 | L_ERROR("failed to remove %s\n", __func__, path); |
2381 | 0 | ret++; |
2382 | 0 | } |
2383 | 0 | LEPT_FREE(path); |
2384 | 0 | } |
2385 | 0 | sarrayDestroy(&sa); |
2386 | 0 | return ret; |
2387 | 0 | } |
2388 | | |
2389 | | |
2390 | | /*! |
2391 | | * \brief lept_rm() |
2392 | | * |
2393 | | * \param[in] subdir [optional] subdir of '/tmp'; can be NULL |
2394 | | * \param[in] tail filename without the directory |
2395 | | * \return 0 on success, non-zero on failure |
2396 | | * |
2397 | | * <pre> |
2398 | | * Notes: |
2399 | | * (1) By calling genPathname(), this does an automatic directory |
2400 | | * translation on operating systems which use a different path. |
2401 | | * </pre> |
2402 | | */ |
2403 | | l_int32 |
2404 | | lept_rm(const char *subdir, |
2405 | | const char *tail) |
2406 | 0 | { |
2407 | 0 | char *path; |
2408 | 0 | char newtemp[256]; |
2409 | 0 | l_int32 ret; |
2410 | |
|
2411 | 0 | if (!tail || strlen(tail) == 0) |
2412 | 0 | return ERROR_INT("tail undefined or empty", __func__, 1); |
2413 | | |
2414 | 0 | if (makeTempDirname(newtemp, sizeof(newtemp), subdir)) |
2415 | 0 | return ERROR_INT("temp dirname not made", __func__, 1); |
2416 | 0 | path = genPathname(newtemp, tail); |
2417 | 0 | ret = lept_rmfile(path); |
2418 | 0 | LEPT_FREE(path); |
2419 | 0 | return ret; |
2420 | 0 | } |
2421 | | |
2422 | | |
2423 | | /*! |
2424 | | * \brief |
2425 | | * |
2426 | | * lept_rmfile() |
2427 | | * |
2428 | | * \param[in] filepath full path to file including the directory |
2429 | | * \return 0 on success, non-zero on failure |
2430 | | * |
2431 | | * <pre> |
2432 | | * Notes: |
2433 | | * (1) This removes the named file. |
2434 | | * (2) Use unix pathname separators. |
2435 | | * (3) There is no name translation. |
2436 | | * (4) Unlike the other lept_* functions in this section, this can remove |
2437 | | * any file -- it is not restricted to files that are in /tmp or a |
2438 | | * subdirectory of it. |
2439 | | * (5) For files in /tmp or a subdirectory of it, this does an automatic |
2440 | | * directory translation for operating systems that use a different |
2441 | | * path for /tmp. |
2442 | | * </pre> |
2443 | | */ |
2444 | | l_int32 |
2445 | | lept_rmfile(const char *filepath) |
2446 | 0 | { |
2447 | 0 | l_int32 ret; |
2448 | |
|
2449 | 0 | if (!filepath || strlen(filepath) == 0) |
2450 | 0 | return ERROR_INT("filepath undefined or empty", __func__, 1); |
2451 | | |
2452 | 0 | #ifndef _WIN32 |
2453 | 0 | ret = remove(filepath); |
2454 | | #else |
2455 | | /* Set attributes to allow deletion of read-only files */ |
2456 | | SetFileAttributesA(filepath, FILE_ATTRIBUTE_NORMAL); |
2457 | | ret = DeleteFileA(filepath) ? 0 : 1; |
2458 | | #endif /* !_WIN32 */ |
2459 | |
|
2460 | 0 | return ret; |
2461 | 0 | } |
2462 | | |
2463 | | |
2464 | | /*! |
2465 | | * \brief lept_mv() |
2466 | | * |
2467 | | * \param[in] srcfile |
2468 | | * \param[in] newdir [optional]; can be NULL |
2469 | | * \param[in] newtail [optional]; can be NULL |
2470 | | * \param[out] pnewpath [optional] of actual path; can be NULL |
2471 | | * \return 0 on success, non-zero on failure |
2472 | | * |
2473 | | * <pre> |
2474 | | * Notes: |
2475 | | * (1) This moves %srcfile to /tmp or to a subdirectory of /tmp. |
2476 | | * (2) %srcfile can either be a full path or relative to the |
2477 | | * current directory. |
2478 | | * (3) %newdir can either specify an existing subdirectory of /tmp |
2479 | | * or can be NULL. In the latter case, the file will be written |
2480 | | * into /tmp. |
2481 | | * (4) %newtail can either specify a filename tail or, if NULL, |
2482 | | * the filename is taken from src-tail, the tail of %srcfile. |
2483 | | * (5) For debugging, the computed newpath can be returned. It must |
2484 | | * be freed by the caller. |
2485 | | * (6) Reminders: |
2486 | | * (a) specify files using unix pathnames |
2487 | | * (b) this does an automatic directory translation on operating |
2488 | | * systems that use a different path for /tmp. |
2489 | | * (7) Examples: |
2490 | | * * newdir = NULL, newtail = NULL ==> /tmp/src-tail |
2491 | | * * newdir = NULL, newtail = abc ==> /tmp/abc |
2492 | | * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail |
2493 | | * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc |
2494 | | * </pre> |
2495 | | */ |
2496 | | l_int32 |
2497 | | lept_mv(const char *srcfile, |
2498 | | const char *newdir, |
2499 | | const char *newtail, |
2500 | | char **pnewpath) |
2501 | 0 | { |
2502 | 0 | char *srcpath, *newpath, *dir, *srctail; |
2503 | 0 | char newtemp[256]; |
2504 | 0 | l_int32 ret; |
2505 | |
|
2506 | 0 | if (!srcfile) |
2507 | 0 | return ERROR_INT("srcfile not defined", __func__, 1); |
2508 | | |
2509 | | /* Require output pathname to be in /tmp/ or a subdirectory */ |
2510 | 0 | if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) |
2511 | 0 | return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1); |
2512 | | |
2513 | | /* Get canonical src pathname */ |
2514 | 0 | splitPathAtDirectory(srcfile, &dir, &srctail); |
2515 | |
|
2516 | 0 | #ifndef _WIN32 |
2517 | 0 | srcpath = pathJoin(dir, srctail); |
2518 | 0 | LEPT_FREE(dir); |
2519 | | |
2520 | | /* Generate output pathname */ |
2521 | 0 | if (!newtail || newtail[0] == '\0') |
2522 | 0 | newpath = pathJoin(newtemp, srctail); |
2523 | 0 | else |
2524 | 0 | newpath = pathJoin(newtemp, newtail); |
2525 | 0 | LEPT_FREE(srctail); |
2526 | | |
2527 | | /* Overwrite any existing file at 'newpath' */ |
2528 | 0 | ret = fileCopy(srcpath, newpath); |
2529 | 0 | if (!ret) { /* and remove srcfile */ |
2530 | 0 | char *realpath = genPathname(srcpath, NULL); |
2531 | 0 | remove(realpath); |
2532 | 0 | LEPT_FREE(realpath); |
2533 | 0 | } |
2534 | | #else |
2535 | | srcpath = genPathname(dir, srctail); |
2536 | | LEPT_FREE(dir); |
2537 | | |
2538 | | /* Generate output pathname */ |
2539 | | if (!newtail || newtail[0] == '\0') |
2540 | | newpath = genPathname(newtemp, srctail); |
2541 | | else |
2542 | | newpath = genPathname(newtemp, newtail); |
2543 | | LEPT_FREE(srctail); |
2544 | | |
2545 | | /* Overwrite any existing file at 'newpath' */ |
2546 | | ret = MoveFileExA(srcpath, newpath, |
2547 | | MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1; |
2548 | | #endif /* ! _WIN32 */ |
2549 | |
|
2550 | 0 | LEPT_FREE(srcpath); |
2551 | 0 | if (pnewpath) |
2552 | 0 | *pnewpath = newpath; |
2553 | 0 | else |
2554 | 0 | LEPT_FREE(newpath); |
2555 | 0 | return ret; |
2556 | 0 | } |
2557 | | |
2558 | | |
2559 | | /*! |
2560 | | * \brief lept_cp() |
2561 | | * |
2562 | | * \param[in] srcfile |
2563 | | * \param[in] newdir [optional]; can be NULL |
2564 | | * \param[in] newtail [optional]; can be NULL |
2565 | | * \param[out] pnewpath [optional] of actual path; can be NULL |
2566 | | * \return 0 on success, non-zero on failure |
2567 | | * |
2568 | | * <pre> |
2569 | | * Notes: |
2570 | | * (1) This copies %srcfile to /tmp or to a subdirectory of /tmp. |
2571 | | * (2) %srcfile can either be a full path or relative to the |
2572 | | * current directory. |
2573 | | * (3) %newdir can either specify an existing subdirectory of /tmp, |
2574 | | * or can be NULL. In the latter case, the file will be written |
2575 | | * into /tmp. |
2576 | | * (4) %newtail can either specify a filename tail or, if NULL, |
2577 | | * the filename is taken from src-tail, the tail of %srcfile. |
2578 | | * (5) For debugging, the computed newpath can be returned. It must |
2579 | | * be freed by the caller. |
2580 | | * (6) Reminders: |
2581 | | * (a) specify files using unix pathnames |
2582 | | * (b) this does an automatic directory translation for operating |
2583 | | * systems that use a different path for /tmp |
2584 | | * (7) Examples: |
2585 | | * * newdir = NULL, newtail = NULL ==> /tmp/src-tail |
2586 | | * * newdir = NULL, newtail = abc ==> /tmp/abc |
2587 | | * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail |
2588 | | * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc |
2589 | | * |
2590 | | * </pre> |
2591 | | */ |
2592 | | l_int32 |
2593 | | lept_cp(const char *srcfile, |
2594 | | const char *newdir, |
2595 | | const char *newtail, |
2596 | | char **pnewpath) |
2597 | 0 | { |
2598 | 0 | char *srcpath, *newpath, *dir, *srctail; |
2599 | 0 | char newtemp[256]; |
2600 | 0 | l_int32 ret; |
2601 | |
|
2602 | 0 | if (!srcfile) |
2603 | 0 | return ERROR_INT("srcfile not defined", __func__, 1); |
2604 | | |
2605 | | /* Require output pathname to be in /tmp or a subdirectory */ |
2606 | 0 | if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) |
2607 | 0 | return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1); |
2608 | | |
2609 | | /* Get canonical src pathname */ |
2610 | 0 | splitPathAtDirectory(srcfile, &dir, &srctail); |
2611 | |
|
2612 | 0 | #ifndef _WIN32 |
2613 | 0 | srcpath = pathJoin(dir, srctail); |
2614 | 0 | LEPT_FREE(dir); |
2615 | | |
2616 | | /* Generate output pathname */ |
2617 | 0 | if (!newtail || newtail[0] == '\0') |
2618 | 0 | newpath = pathJoin(newtemp, srctail); |
2619 | 0 | else |
2620 | 0 | newpath = pathJoin(newtemp, newtail); |
2621 | 0 | LEPT_FREE(srctail); |
2622 | | |
2623 | | /* Overwrite any existing file at 'newpath' */ |
2624 | 0 | ret = fileCopy(srcpath, newpath); |
2625 | | #else |
2626 | | srcpath = genPathname(dir, srctail); |
2627 | | LEPT_FREE(dir); |
2628 | | |
2629 | | /* Generate output pathname */ |
2630 | | if (!newtail || newtail[0] == '\0') |
2631 | | newpath = genPathname(newtemp, srctail); |
2632 | | else |
2633 | | newpath = genPathname(newtemp, newtail); |
2634 | | LEPT_FREE(srctail); |
2635 | | |
2636 | | /* Overwrite any existing file at 'newpath' */ |
2637 | | ret = CopyFileA(srcpath, newpath, FALSE) ? 0 : 1; |
2638 | | #endif /* !_WIN32 */ |
2639 | |
|
2640 | 0 | LEPT_FREE(srcpath); |
2641 | 0 | if (pnewpath) |
2642 | 0 | *pnewpath = newpath; |
2643 | 0 | else |
2644 | 0 | LEPT_FREE(newpath); |
2645 | 0 | return ret; |
2646 | 0 | } |
2647 | | |
2648 | | |
2649 | | /*--------------------------------------------------------------------* |
2650 | | * Special debug/test function for calling 'system' * |
2651 | | *--------------------------------------------------------------------*/ |
2652 | | #if defined(__APPLE__) |
2653 | | #include "TargetConditionals.h" |
2654 | | #endif /* __APPLE__ */ |
2655 | | |
2656 | | /*! |
2657 | | * \brief callSystemDebug() |
2658 | | * |
2659 | | * \param[in] cmd command to be exec'd |
2660 | | * \return 0 on success |
2661 | | * |
2662 | | * <pre> |
2663 | | * Notes: |
2664 | | * (1) The C library 'system' call is only made through this function. |
2665 | | * It only works in debug/test mode, where the global variable |
2666 | | * LeptDebugOK == TRUE. This variable is set to FALSE in the |
2667 | | * library as distributed, and calling this function will |
2668 | | * generate an error message. |
2669 | | * </pre> |
2670 | | */ |
2671 | | l_int32 |
2672 | | callSystemDebug(const char *cmd) |
2673 | 0 | { |
2674 | 0 | l_int32 ret; |
2675 | |
|
2676 | 0 | if (!cmd) { |
2677 | 0 | L_ERROR("cmd not defined\n", __func__); |
2678 | 0 | return 1; |
2679 | 0 | } |
2680 | 0 | if (LeptDebugOK == FALSE) { |
2681 | 0 | L_INFO("'system' calls are disabled\n", __func__); |
2682 | 0 | return 1; |
2683 | 0 | } |
2684 | | |
2685 | | #if defined(__APPLE__) /* iOS 11 does not support system() */ |
2686 | | |
2687 | | #if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) /* Mac OS X */ |
2688 | | ret = system(cmd); |
2689 | | #elif TARGET_OS_IPHONE || defined(OS_IOS) /* iOS */ |
2690 | | L_ERROR("iOS 11 does not support system()\n", __func__); |
2691 | | #endif /* TARGET_OS_OSX */ |
2692 | | |
2693 | | #else /* ! __APPLE__ */ |
2694 | | |
2695 | 0 | ret = system(cmd); |
2696 | |
|
2697 | 0 | #endif /* __APPLE__ */ |
2698 | |
|
2699 | 0 | return ret; |
2700 | 0 | } |
2701 | | |
2702 | | |
2703 | | /*--------------------------------------------------------------------* |
2704 | | * General file name operations * |
2705 | | *--------------------------------------------------------------------*/ |
2706 | | /*! |
2707 | | * \brief splitPathAtDirectory() |
2708 | | * |
2709 | | * \param[in] pathname full path; can be a directory |
2710 | | * \param[out] pdir [optional] root directory name of |
2711 | | * input path, including trailing '/' |
2712 | | * \param[out] ptail [optional] path tail, which is either |
2713 | | * the file name within the root directory or |
2714 | | * the last sub-directory in the path |
2715 | | * \return 0 if OK, 1 on error |
2716 | | * |
2717 | | * <pre> |
2718 | | * Notes: |
2719 | | * (1) If you only want the tail, input null for the root directory ptr. |
2720 | | * (2) If you only want the root directory name, input null for the |
2721 | | * tail ptr. |
2722 | | * (3) This function makes decisions based only on the lexical |
2723 | | * structure of the input. Examples: |
2724 | | * /usr/tmp/abc.d --> dir: /usr/tmp/ tail: abc.d |
2725 | | * /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] |
2726 | | * /usr/tmp --> dir: /usr/ tail: tmp |
2727 | | * abc.d --> dir: [empty string] tail: abc.d |
2728 | | * (4 Consider the first example above: /usr/tmp/abc.d. |
2729 | | * Suppose you want the stem of the file, abc, without either |
2730 | | * the directory or the extension. This can be extracted in two steps: |
2731 | | * splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail); |
2732 | | * [sets tail: "abc.d"] |
2733 | | * splitPathAtExtension(tail, &basename, NULL); |
2734 | | * [sets basename: "abc"] |
2735 | | * (5) The input can have either forward (unix) or backward (win) |
2736 | | * slash separators. The output has unix separators. |
2737 | | * Note that Win32 pathname functions generally accept both |
2738 | | * slash forms, but the Windows command line interpreter |
2739 | | * only accepts backward slashes, because forward slashes are |
2740 | | * used to demarcate switches (vs. dashes in unix). |
2741 | | * </pre> |
2742 | | */ |
2743 | | l_ok |
2744 | | splitPathAtDirectory(const char *pathname, |
2745 | | char **pdir, |
2746 | | char **ptail) |
2747 | 6.51k | { |
2748 | 6.51k | char *cpathname, *lastslash; |
2749 | | |
2750 | 6.51k | if (!pdir && !ptail) |
2751 | 0 | return ERROR_INT("null input for both strings", __func__, 1); |
2752 | 6.51k | if (pdir) *pdir = NULL; |
2753 | 6.51k | if (ptail) *ptail = NULL; |
2754 | 6.51k | if (!pathname) |
2755 | 0 | return ERROR_INT("pathname not defined", __func__, 1); |
2756 | | |
2757 | 6.51k | cpathname = stringNew(pathname); |
2758 | 6.51k | convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR); |
2759 | 6.51k | lastslash = strrchr(cpathname, '/'); |
2760 | 6.51k | if (lastslash) { |
2761 | 6.51k | if (ptail) |
2762 | 6.51k | *ptail = stringNew(lastslash + 1); |
2763 | 6.51k | if (pdir) { |
2764 | 0 | *(lastslash + 1) = '\0'; |
2765 | 0 | *pdir = cpathname; |
2766 | 6.51k | } else { |
2767 | 6.51k | LEPT_FREE(cpathname); |
2768 | 6.51k | } |
2769 | 6.51k | } else { /* no directory */ |
2770 | 0 | if (pdir) |
2771 | 0 | *pdir = stringNew(""); |
2772 | 0 | if (ptail) |
2773 | 0 | *ptail = cpathname; |
2774 | 0 | else |
2775 | 0 | LEPT_FREE(cpathname); |
2776 | 0 | } |
2777 | | |
2778 | 6.51k | return 0; |
2779 | 6.51k | } |
2780 | | |
2781 | | |
2782 | | /*! |
2783 | | * \brief splitPathAtExtension() |
2784 | | * |
2785 | | * \param[in] pathname full path; can be a directory |
2786 | | * \param[out] pbasename [optional] pathname not including the |
2787 | | * last dot and characters after that |
2788 | | * \param[out] pextension [optional] path extension, which is |
2789 | | * the last dot and the characters after it. If |
2790 | | * there is no extension, it returns the empty string |
2791 | | * \return 0 if OK, 1 on error |
2792 | | * |
2793 | | * <pre> |
2794 | | * Notes: |
2795 | | * (1) If you only want the extension, input null for the basename ptr. |
2796 | | * (2) If you only want the basename without extension, input null |
2797 | | * for the extension ptr. |
2798 | | * (3) This function makes decisions based only on the lexical |
2799 | | * structure of the input. Examples: |
2800 | | * /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg |
2801 | | * /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg |
2802 | | * /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str] |
2803 | | * ./.jpg --> basename: ./ ext: .jpg |
2804 | | * (4) The input can have either forward (unix) or backward (win) |
2805 | | * slash separators. The output has unix separators. |
2806 | | * (5) Note that basename, as used here, is different from the result |
2807 | | * of the unix program 'basename'. Here, basename is the entire |
2808 | | * pathname up to a final extension and its preceding dot. |
2809 | | * </pre> |
2810 | | */ |
2811 | | l_ok |
2812 | | splitPathAtExtension(const char *pathname, |
2813 | | char **pbasename, |
2814 | | char **pextension) |
2815 | 0 | { |
2816 | 0 | char *tail, *dir, *lastdot; |
2817 | 0 | char empty[4] = ""; |
2818 | |
|
2819 | 0 | if (!pbasename && !pextension) |
2820 | 0 | return ERROR_INT("null input for both strings", __func__, 1); |
2821 | 0 | if (pbasename) *pbasename = NULL; |
2822 | 0 | if (pextension) *pextension = NULL; |
2823 | 0 | if (!pathname) |
2824 | 0 | return ERROR_INT("pathname not defined", __func__, 1); |
2825 | | |
2826 | | /* Split out the directory first */ |
2827 | 0 | splitPathAtDirectory(pathname, &dir, &tail); |
2828 | | |
2829 | | /* Then look for a "." in the tail part. |
2830 | | * This way we ignore all "." in the directory. */ |
2831 | 0 | if ((lastdot = strrchr(tail, '.'))) { |
2832 | 0 | if (pextension) |
2833 | 0 | *pextension = stringNew(lastdot); |
2834 | 0 | if (pbasename) { |
2835 | 0 | *lastdot = '\0'; |
2836 | 0 | *pbasename = stringJoin(dir, tail); |
2837 | 0 | } |
2838 | 0 | } else { |
2839 | 0 | if (pextension) |
2840 | 0 | *pextension = stringNew(empty); |
2841 | 0 | if (pbasename) |
2842 | 0 | *pbasename = stringNew(pathname); |
2843 | 0 | } |
2844 | 0 | LEPT_FREE(dir); |
2845 | 0 | LEPT_FREE(tail); |
2846 | 0 | return 0; |
2847 | 0 | } |
2848 | | |
2849 | | |
2850 | | /*! |
2851 | | * \brief pathJoin() |
2852 | | * |
2853 | | * \param[in] dir [optional] can be null |
2854 | | * \param[in] fname [optional] can be null |
2855 | | * \return specially concatenated path, or NULL on error |
2856 | | * |
2857 | | * <pre> |
2858 | | * Notes: |
2859 | | * (1) Use unix-style pathname separators ('/'). |
2860 | | * (2) %fname can be the entire path, or part of the path containing |
2861 | | * at least one directory, or a tail without a directory, or NULL. |
2862 | | * (3) It produces a path that strips multiple slashes to a single |
2863 | | * slash, joins %dir and %fname by a slash, and has no trailing |
2864 | | * slashes (except in the cases where %dir == "/" and |
2865 | | * %fname == NULL, or v.v.). |
2866 | | * (4) If both %dir and %fname are null, produces an empty string. |
2867 | | * (5) Neither %dir nor %fname can begin with '..'. |
2868 | | * (6) The result is not canonicalized or tested for correctness: |
2869 | | * garbage in (e.g., /&%), garbage out. |
2870 | | * (7) Examples: |
2871 | | * //tmp// + //abc/ --> /tmp/abc |
2872 | | * tmp/ + /abc/ --> tmp/abc |
2873 | | * tmp/ + abc/ --> tmp/abc |
2874 | | * /tmp/ + /// --> /tmp |
2875 | | * /tmp/ + NULL --> /tmp |
2876 | | * // + /abc// --> /abc |
2877 | | * // + NULL --> / |
2878 | | * NULL + /abc/def/ --> /abc/def |
2879 | | * NULL + abc// --> abc |
2880 | | * NULL + // --> / |
2881 | | * NULL + NULL --> (empty string) |
2882 | | * "" + "" --> (empty string) |
2883 | | * "" + / --> / |
2884 | | * ".." + /etc/foo --> NULL |
2885 | | * /tmp + ".." --> NULL |
2886 | | * </pre> |
2887 | | */ |
2888 | | char * |
2889 | | pathJoin(const char *dir, |
2890 | | const char *fname) |
2891 | 0 | { |
2892 | 0 | const char *slash = "/"; |
2893 | 0 | char *str, *dest; |
2894 | 0 | l_int32 i, n1, n2, emptydir; |
2895 | 0 | size_t size; |
2896 | 0 | SARRAY *sa1, *sa2; |
2897 | 0 | L_BYTEA *ba; |
2898 | |
|
2899 | 0 | if (!dir && !fname) |
2900 | 0 | return stringNew(""); |
2901 | 0 | if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.') |
2902 | 0 | return (char *)ERROR_PTR("dir starts with '..'", __func__, NULL); |
2903 | 0 | if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.') |
2904 | 0 | return (char *)ERROR_PTR("fname starts with '..'", __func__, NULL); |
2905 | | |
2906 | 0 | sa1 = sarrayCreate(0); |
2907 | 0 | sa2 = sarrayCreate(0); |
2908 | 0 | ba = l_byteaCreate(4); |
2909 | | |
2910 | | /* Process %dir */ |
2911 | 0 | if (dir && strlen(dir) > 0) { |
2912 | 0 | if (dir[0] == '/') |
2913 | 0 | l_byteaAppendString(ba, slash); |
2914 | 0 | sarraySplitString(sa1, dir, "/"); /* removes all slashes */ |
2915 | 0 | n1 = sarrayGetCount(sa1); |
2916 | 0 | for (i = 0; i < n1; i++) { |
2917 | 0 | str = sarrayGetString(sa1, i, L_NOCOPY); |
2918 | 0 | l_byteaAppendString(ba, str); |
2919 | 0 | l_byteaAppendString(ba, slash); |
2920 | 0 | } |
2921 | 0 | } |
2922 | | |
2923 | | /* Special case to add leading slash: dir NULL or empty string */ |
2924 | 0 | emptydir = dir && strlen(dir) == 0; |
2925 | 0 | if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/') |
2926 | 0 | l_byteaAppendString(ba, slash); |
2927 | | |
2928 | | /* Process %fname */ |
2929 | 0 | if (fname && strlen(fname) > 0) { |
2930 | 0 | sarraySplitString(sa2, fname, "/"); |
2931 | 0 | n2 = sarrayGetCount(sa2); |
2932 | 0 | for (i = 0; i < n2; i++) { |
2933 | 0 | str = sarrayGetString(sa2, i, L_NOCOPY); |
2934 | 0 | l_byteaAppendString(ba, str); |
2935 | 0 | l_byteaAppendString(ba, slash); |
2936 | 0 | } |
2937 | 0 | } |
2938 | | |
2939 | | /* Remove trailing slash */ |
2940 | 0 | dest = (char *)l_byteaCopyData(ba, &size); |
2941 | 0 | if (size > 1 && dest[size - 1] == '/') |
2942 | 0 | dest[size - 1] = '\0'; |
2943 | |
|
2944 | 0 | sarrayDestroy(&sa1); |
2945 | 0 | sarrayDestroy(&sa2); |
2946 | 0 | l_byteaDestroy(&ba); |
2947 | 0 | return dest; |
2948 | 0 | } |
2949 | | |
2950 | | |
2951 | | /*! |
2952 | | * \brief appendSubdirs() |
2953 | | * |
2954 | | * \param[in] basedir |
2955 | | * \param[in] subdirs |
2956 | | * \return concatenated full directory path without trailing slash, |
2957 | | * or NULL on error |
2958 | | * |
2959 | | * <pre> |
2960 | | * Notes: |
2961 | | * (1) Use unix pathname separators |
2962 | | * (2) Allocates a new string: [basedir]/[subdirs] |
2963 | | * </pre> |
2964 | | */ |
2965 | | char * |
2966 | | appendSubdirs(const char *basedir, |
2967 | | const char *subdirs) |
2968 | 0 | { |
2969 | 0 | char *newdir; |
2970 | 0 | size_t len1, len2, len3, len4; |
2971 | |
|
2972 | 0 | if (!basedir || !subdirs) |
2973 | 0 | return (char *)ERROR_PTR("basedir and subdirs not both defined", |
2974 | 0 | __func__, NULL); |
2975 | | |
2976 | 0 | len1 = strlen(basedir); |
2977 | 0 | len2 = strlen(subdirs); |
2978 | 0 | len3 = len1 + len2 + 8; |
2979 | 0 | if ((newdir = (char *)LEPT_CALLOC(len3, 1)) == NULL) |
2980 | 0 | return (char *)ERROR_PTR("newdir not made", __func__, NULL); |
2981 | 0 | stringCat(newdir, len3, basedir); |
2982 | 0 | if (newdir[len1 - 1] != '/') /* add '/' if necessary */ |
2983 | 0 | newdir[len1] = '/'; |
2984 | 0 | if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */ |
2985 | 0 | stringCat(newdir, len3, subdirs + 1); |
2986 | 0 | else |
2987 | 0 | stringCat(newdir, len3, subdirs); |
2988 | 0 | len4 = strlen(newdir); |
2989 | 0 | if (newdir[len4 - 1] == '/') /* strip trailing '/' */ |
2990 | 0 | newdir[len4 - 1] = '\0'; |
2991 | |
|
2992 | 0 | return newdir; |
2993 | 0 | } |
2994 | | |
2995 | | |
2996 | | /*--------------------------------------------------------------------* |
2997 | | * Special file name operations * |
2998 | | *--------------------------------------------------------------------*/ |
2999 | | /*! |
3000 | | * \brief convertSepCharsInPath() |
3001 | | * |
3002 | | * \param[in] path |
3003 | | * \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR |
3004 | | * \return 0 if OK, 1 on error |
3005 | | * |
3006 | | * <pre> |
3007 | | * Notes: |
3008 | | * (1) In-place conversion. |
3009 | | * (2) Type is the resulting type: |
3010 | | * * UNIX_PATH_SEPCHAR: '\\' ==> '/' |
3011 | | * * WIN_PATH_SEPCHAR: '/' ==> '\\' |
3012 | | * (3) Virtually all path operations in leptonica use unix separators. |
3013 | | * (4) The backslash is a valid character in unix pathnames and should |
3014 | | * not be converted. Each backslash needs to be escaped with a |
3015 | | * preceding backslash for the shell, but the actual filename |
3016 | | * does not include these escape characters. |
3017 | | * </pre> |
3018 | | */ |
3019 | | l_ok |
3020 | | convertSepCharsInPath(char *path, |
3021 | | l_int32 type) |
3022 | 13.0k | { |
3023 | 13.0k | l_int32 i; |
3024 | 13.0k | size_t len; |
3025 | | |
3026 | 13.0k | if (!path) |
3027 | 0 | return ERROR_INT("path not defined", __func__, 1); |
3028 | 13.0k | if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR) |
3029 | 0 | return ERROR_INT("invalid type", __func__, 1); |
3030 | | |
3031 | 13.0k | len = strlen(path); |
3032 | 13.0k | if (type == UNIX_PATH_SEPCHAR) { |
3033 | | #ifdef _WIN32 /* only convert on Windows */ |
3034 | | for (i = 0; i < len; i++) { |
3035 | | if (path[i] == '\\') |
3036 | | path[i] = '/'; |
3037 | | } |
3038 | | #endif /* _WIN32 */ |
3039 | 13.0k | } else { /* WIN_PATH_SEPCHAR */ |
3040 | 0 | for (i = 0; i < len; i++) { |
3041 | 0 | if (path[i] == '/') |
3042 | 0 | path[i] = '\\'; |
3043 | 0 | } |
3044 | 0 | } |
3045 | 13.0k | return 0; |
3046 | 13.0k | } |
3047 | | |
3048 | | |
3049 | | /*! |
3050 | | * \brief genPathname() |
3051 | | * |
3052 | | * \param[in] dir [optional] directory or full path name, |
3053 | | * with or without the trailing '/' |
3054 | | * \param[in] fname [optional] file name within a directory |
3055 | | * \return pathname either a directory or full path, or NULL on error |
3056 | | * |
3057 | | * <pre> |
3058 | | * Notes: |
3059 | | * (1) This function generates actual paths in the following ways: |
3060 | | * * from two sub-parts (e.g., a directory and a file name). |
3061 | | * * from a single path full path, placed in %dir, with |
3062 | | * %fname == NULL. |
3063 | | * * from the name of a file in the local directory placed in |
3064 | | * %fname, with %dir == NULL. |
3065 | | * * if in a "/tmp" directory and on iOS, macOS or Windows, |
3066 | | * the OS specific temp directory is used. |
3067 | | * (2) This does an automatic directory translation for operating |
3068 | | * systems that use a different path for /tmp. |
3069 | | * That path is determined |
3070 | | * * on Windows: by GetTempPath() |
3071 | | * * on macOS, iOS: by confstr() (see man page) |
3072 | | * (3) On unix, the TMPDIR variable is ignored. No rewriting |
3073 | | * of temp directories is permitted. |
3074 | | * (4) There are four cases for the input: |
3075 | | * (a) %dir is a directory and %fname is defined: result is a |
3076 | | * full path |
3077 | | * (b) %dir is a directory and %fname is null: result is a directory |
3078 | | * (c) %dir is a full path and %fname is null: result is a full path |
3079 | | * (d) %dir is null or an empty string: start in the current dir; |
3080 | | * result is a full path |
3081 | | * (5) In all cases, the resulting pathname is not terminated with a slash |
3082 | | * (6) The caller is responsible for freeing the returned pathname. |
3083 | | * </pre> |
3084 | | */ |
3085 | | char * |
3086 | | genPathname(const char *dir, |
3087 | | const char *fname) |
3088 | 6.51k | { |
3089 | | #if defined(REWRITE_TMP) |
3090 | | l_int32 rewrite_tmp = TRUE; |
3091 | | #else |
3092 | 6.51k | l_int32 rewrite_tmp = FALSE; |
3093 | 6.51k | #endif /* REWRITE_TMP */ |
3094 | 6.51k | char *cdir, *pathout; |
3095 | 6.51k | l_int32 dirlen, namelen; |
3096 | 6.51k | size_t size; |
3097 | | |
3098 | 6.51k | if (!dir && !fname) |
3099 | 0 | return (char *)ERROR_PTR("no input", __func__, NULL); |
3100 | | |
3101 | | /* Handle the case where we start from the current directory */ |
3102 | 6.51k | if (!dir || dir[0] == '\0') { |
3103 | 0 | if ((cdir = getcwd(NULL, 0)) == NULL) |
3104 | 0 | return (char *)ERROR_PTR("no current dir found", __func__, NULL); |
3105 | 6.51k | } else { |
3106 | 6.51k | if ((cdir = stringNew(dir)) == NULL) |
3107 | 0 | return (char *)ERROR_PTR("stringNew failed", __func__, NULL); |
3108 | 6.51k | } |
3109 | | |
3110 | | /* Convert to unix path separators, and remove the trailing |
3111 | | * slash in the directory, except when dir == "/" */ |
3112 | 6.51k | convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR); |
3113 | 6.51k | dirlen = strlen(cdir); |
3114 | 6.51k | if (cdir[dirlen - 1] == '/' && dirlen != 1) { |
3115 | 0 | cdir[dirlen - 1] = '\0'; |
3116 | 0 | dirlen--; |
3117 | 0 | } |
3118 | | |
3119 | 6.51k | namelen = (fname) ? strlen(fname) : 0; |
3120 | 6.51k | size = dirlen + namelen + 256; |
3121 | 6.51k | if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) { |
3122 | 0 | LEPT_FREE(cdir); |
3123 | 0 | return (char *)ERROR_PTR("pathout not made", __func__, NULL); |
3124 | 0 | } |
3125 | | |
3126 | | /* First handle %dir (which may be a full pathname). |
3127 | | * There is no path rewriting on unix, and on win32, we do not |
3128 | | * rewrite unless the specified directory is /tmp or |
3129 | | * a subdirectory of /tmp */ |
3130 | 6.51k | if (!rewrite_tmp || dirlen < 4 || |
3131 | 6.51k | (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */ |
3132 | 6.51k | (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */ |
3133 | 6.51k | stringCopy(pathout, cdir, dirlen); |
3134 | 6.51k | } else { /* Rewrite with "/tmp" specified for the directory. */ |
3135 | | #if defined(__APPLE__) |
3136 | | size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, pathout, size); |
3137 | | if (n == 0 || n > size) { |
3138 | | /* Fall back to using /tmp */ |
3139 | | stringCopy(pathout, cdir, dirlen); |
3140 | | } else { |
3141 | | /* Add the rest of cdir */ |
3142 | | if (dirlen > 4) |
3143 | | stringCat(pathout, size, cdir + 4); |
3144 | | } |
3145 | | #elif defined(_WIN32) |
3146 | | l_int32 tmpdirlen; |
3147 | | char tmpdir[MAX_PATH]; |
3148 | | GetTempPathA(sizeof(tmpdir), tmpdir); /* get the Windows temp dir */ |
3149 | | tmpdirlen = strlen(tmpdir); |
3150 | | if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') { |
3151 | | tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */ |
3152 | | } |
3153 | | tmpdirlen = strlen(tmpdir); |
3154 | | stringCopy(pathout, tmpdir, tmpdirlen); |
3155 | | |
3156 | | /* Add the rest of cdir */ |
3157 | | if (dirlen > 4) |
3158 | | stringCat(pathout, size, cdir + 4); |
3159 | | #endif /* _WIN32 */ |
3160 | 0 | } |
3161 | | |
3162 | | /* Now handle %fname */ |
3163 | 6.51k | if (fname && strlen(fname) > 0) { |
3164 | 0 | dirlen = strlen(pathout); |
3165 | 0 | pathout[dirlen] = '/'; |
3166 | 0 | stringCat(pathout, size, fname); |
3167 | 0 | } |
3168 | | |
3169 | 6.51k | LEPT_FREE(cdir); |
3170 | 6.51k | return pathout; |
3171 | 6.51k | } |
3172 | | |
3173 | | |
3174 | | /*! |
3175 | | * \brief makeTempDirname() |
3176 | | * |
3177 | | * \param[in] result preallocated on stack or heap and passed in |
3178 | | * \param[in] nbytes size of %result array, in bytes |
3179 | | * \param[in] subdir [optional]; can be NULL or an empty string |
3180 | | * \return 0 if OK, 1 on error |
3181 | | * |
3182 | | * <pre> |
3183 | | * Notes: |
3184 | | * (1) This generates the directory path for output temp files, |
3185 | | * written into %result with unix separators. |
3186 | | * (2) Caller allocates %result, large enough to hold the path, |
3187 | | * which is: |
3188 | | * /tmp/%subdir (unix) |
3189 | | * [Temp]/%subdir (Windows, macOS, iOS) |
3190 | | * where [Temp] is the OS path |
3191 | | * and %subdir is in general a set of nested subdirectories: |
3192 | | * dir1/dir2/.../dirN |
3193 | | * which in use would not typically exceed 2 levels. |
3194 | | * (3) Usage example: |
3195 | | * \code |
3196 | | * char result[256]; |
3197 | | * makeTempDirname(result, sizeof(result), "lept/golden"); |
3198 | | * \endcode |
3199 | | * </pre> |
3200 | | */ |
3201 | | l_ok |
3202 | | makeTempDirname(char *result, |
3203 | | size_t nbytes, |
3204 | | const char *subdir) |
3205 | 0 | { |
3206 | 0 | char *dir, *path; |
3207 | 0 | l_int32 ret = 0; |
3208 | 0 | size_t pathlen; |
3209 | |
|
3210 | 0 | if (!result) |
3211 | 0 | return ERROR_INT("result not defined", __func__, 1); |
3212 | 0 | if (subdir && ((subdir[0] == '.') || (subdir[0] == '/'))) |
3213 | 0 | return ERROR_INT("subdir not an actual subdirectory", __func__, 1); |
3214 | | |
3215 | 0 | memset(result, 0, nbytes); |
3216 | |
|
3217 | 0 | dir = pathJoin("/tmp", subdir); |
3218 | |
|
3219 | | #if defined(REWRITE_TMP) |
3220 | | path = genPathname(dir, NULL); |
3221 | | #else |
3222 | 0 | path = stringNew(dir); |
3223 | 0 | #endif /* ~ _WIN32 */ |
3224 | 0 | pathlen = strlen(path); |
3225 | 0 | if (pathlen < nbytes - 1) { |
3226 | 0 | stringCopy(result, path, nbytes); |
3227 | 0 | } else { |
3228 | 0 | L_ERROR("result array too small for path\n", __func__); |
3229 | 0 | ret = 1; |
3230 | 0 | } |
3231 | |
|
3232 | 0 | LEPT_FREE(dir); |
3233 | 0 | LEPT_FREE(path); |
3234 | 0 | return ret; |
3235 | 0 | } |
3236 | | |
3237 | | |
3238 | | /*! |
3239 | | * \brief modifyTrailingSlash() |
3240 | | * |
3241 | | * \param[in] path preallocated on stack or heap and passed in |
3242 | | * \param[in] nbytes size of %path array, in bytes |
3243 | | * \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH |
3244 | | * \return 0 if OK, 1 on error |
3245 | | * |
3246 | | * <pre> |
3247 | | * Notes: |
3248 | | * (1) This carries out the requested action if necessary. |
3249 | | * </pre> |
3250 | | */ |
3251 | | l_ok |
3252 | | modifyTrailingSlash(char *path, |
3253 | | size_t nbytes, |
3254 | | l_int32 flag) |
3255 | 0 | { |
3256 | 0 | char lastchar; |
3257 | 0 | size_t len; |
3258 | |
|
3259 | 0 | if (!path) |
3260 | 0 | return ERROR_INT("path not defined", __func__, 1); |
3261 | 0 | if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH) |
3262 | 0 | return ERROR_INT("invalid flag", __func__, 1); |
3263 | | |
3264 | 0 | len = strlen(path); |
3265 | 0 | lastchar = path[len - 1]; |
3266 | 0 | if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) { |
3267 | 0 | path[len] = '/'; |
3268 | 0 | path[len + 1] = '\0'; |
3269 | 0 | } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') { |
3270 | 0 | path[len - 1] = '\0'; |
3271 | 0 | } |
3272 | 0 | return 0; |
3273 | 0 | } |
3274 | | |
3275 | | |
3276 | | /*! |
3277 | | * \brief l_makeTempFilename() |
3278 | | * |
3279 | | * \return fname : heap allocated filename; returns NULL on failure. |
3280 | | * |
3281 | | * <pre> |
3282 | | * Notes: |
3283 | | * (1) On unix, this makes a filename of the form |
3284 | | * "/tmp/lept.XXXXXX", |
3285 | | * where each X is a random character. |
3286 | | * (2) On Windows, this makes a filename of the form |
3287 | | * "/[Temp]/lp.XXXXXX". |
3288 | | * (3) On all systems, this fails if the file is not writable. |
3289 | | * (4) Safest usage is to write to a subdirectory in debug code. |
3290 | | * (5) The returned filename must be freed by the caller, using lept_free. |
3291 | | * (6) The tail of the filename has a '.', so that cygwin interprets |
3292 | | * the file as having an extension. Otherwise, cygwin assumes it |
3293 | | * is an executable and appends ".exe" to the filename. |
3294 | | * (7) On unix, whenever possible use tmpfile() instead. tmpfile() |
3295 | | * hides the file name, returns a stream opened for write, |
3296 | | * and deletes the temp file when the stream is closed. |
3297 | | * </pre> |
3298 | | */ |
3299 | | char * |
3300 | | l_makeTempFilename(void) |
3301 | 0 | { |
3302 | 0 | char dirname[240]; |
3303 | |
|
3304 | 0 | if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1) |
3305 | 0 | return (char *)ERROR_PTR("failed to make dirname", __func__, NULL); |
3306 | | |
3307 | 0 | #ifndef _WIN32 |
3308 | 0 | { |
3309 | 0 | char *pattern; |
3310 | 0 | l_int32 fd; |
3311 | 0 | pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL); |
3312 | 0 | fd = mkstemp(pattern); |
3313 | 0 | if (fd == -1) { |
3314 | 0 | LEPT_FREE(pattern); |
3315 | 0 | return (char *)ERROR_PTR("mkstemp failed", __func__, NULL); |
3316 | 0 | } |
3317 | 0 | close(fd); |
3318 | 0 | return pattern; |
3319 | 0 | } |
3320 | | #else |
3321 | | { |
3322 | | char fname[MAX_PATH]; |
3323 | | FILE *fp; |
3324 | | if (GetTempFileNameA(dirname, "lp.", 0, fname) == 0) |
3325 | | return (char *)ERROR_PTR("GetTempFileName failed", __func__, NULL); |
3326 | | if ((fp = fopen(fname, "wb")) == NULL) |
3327 | | return (char *)ERROR_PTR("file cannot be written to", __func__, NULL); |
3328 | | fclose(fp); |
3329 | | return stringNew(fname); |
3330 | | } |
3331 | | #endif /* ~ _WIN32 */ |
3332 | 0 | } |
3333 | | |
3334 | | |
3335 | | /*! |
3336 | | * \brief extractNumberFromFilename() |
3337 | | * |
3338 | | * \param[in] fname |
3339 | | * \param[in] numpre number of characters before the digits to be found |
3340 | | * \param[in] numpost number of characters after the digits to be found |
3341 | | * \return num number embedded in the filename; -1 on error or if |
3342 | | * not found |
3343 | | * |
3344 | | * <pre> |
3345 | | * Notes: |
3346 | | * (1) The number is to be found in the basename, which is the |
3347 | | * filename without either the directory or the last extension. |
3348 | | * (2) When a number is found, it is non-negative. If no number |
3349 | | * is found, this returns -1, without an error message. The |
3350 | | * caller needs to check. |
3351 | | * </pre> |
3352 | | */ |
3353 | | l_int32 |
3354 | | extractNumberFromFilename(const char *fname, |
3355 | | l_int32 numpre, |
3356 | | l_int32 numpost) |
3357 | 0 | { |
3358 | 0 | char *tail, *basename; |
3359 | 0 | l_int32 len, nret, num; |
3360 | |
|
3361 | 0 | if (!fname) |
3362 | 0 | return ERROR_INT("fname not defined", __func__, -1); |
3363 | | |
3364 | 0 | splitPathAtDirectory(fname, NULL, &tail); |
3365 | 0 | splitPathAtExtension(tail, &basename, NULL); |
3366 | 0 | LEPT_FREE(tail); |
3367 | |
|
3368 | 0 | len = strlen(basename); |
3369 | 0 | if (numpre + numpost > len - 1) { |
3370 | 0 | LEPT_FREE(basename); |
3371 | 0 | return ERROR_INT("numpre + numpost too big", __func__, -1); |
3372 | 0 | } |
3373 | | |
3374 | 0 | basename[len - numpost] = '\0'; |
3375 | 0 | nret = sscanf(basename + numpre, "%d", &num); |
3376 | 0 | LEPT_FREE(basename); |
3377 | |
|
3378 | 0 | if (nret == 1) |
3379 | 0 | return num; |
3380 | 0 | else |
3381 | 0 | return -1; /* not found */ |
3382 | 0 | } |