Coverage Report

Created: 2023-01-17 06:24

/src/htslib/cram/mFILE.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2005-2006, 2008-2009, 2013, 2015, 2017-2019 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
   1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
   2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
32
#include <config.h>
33
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <errno.h>
37
#include <string.h>
38
#include <sys/types.h>
39
#include <sys/stat.h>
40
#include <fcntl.h>
41
#include <unistd.h>
42
#include <stdarg.h>
43
44
#include "../htslib/hts_log.h"
45
#include "os.h"
46
#include "mFILE.h"
47
48
#ifdef HAVE_MMAP
49
#include <sys/mman.h>
50
#endif
51
52
/*
53
 * This file contains memory-based versions of the most commonly used
54
 * (by io_lib) stdio functions.
55
 *
56
 * Actual file IO takes place either on opening or closing an mFILE.
57
 *
58
 * Coupled to this are a bunch of rather scary macros which can be obtained
59
 * by including stdio_hack.h. It is recommended though that you use mFILE.h
60
 * instead and replace fopen with mfopen (etc). This is more or less
61
 * mandatory if you wish to use both FILE and mFILE structs in a single file.
62
 */
63
64
static mFILE *m_channel[3];  /* stdin, stdout and stderr fakes */
65
66
/*
67
 * Reads the entirety of fp into memory. If 'fn' exists it is the filename
68
 * associated with fp. This will be used for more optimal reading (via a
69
 * stat to identify the size and a single read). Otherwise we use successive
70
 * reads until EOF.
71
 *
72
 * Returns a malloced buffer on success of length *size
73
 *         NULL on failure
74
 */
75
0
static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
76
0
    struct stat sb;
77
0
    char *data = NULL;
78
0
    size_t allocated = 0, used = 0;
79
0
    int bufsize = 8192;
80
81
#ifdef _WIN32
82
    if (binary)
83
        _setmode(_fileno(fp), _O_BINARY);
84
    else
85
        _setmode(_fileno(fp), _O_TEXT);
86
#endif
87
88
0
    if (fn && -1 != stat(fn, &sb)) {
89
0
        data = malloc(allocated = sb.st_size);
90
0
        if (!data)
91
0
            return NULL;
92
0
        bufsize = sb.st_size;
93
0
    } else {
94
0
        fn = NULL;
95
0
    }
96
97
0
    do {
98
0
        size_t len;
99
0
        if (used + bufsize > allocated) {
100
0
            allocated += bufsize;
101
0
            char *datan = realloc(data, allocated);
102
0
            if (datan) {
103
0
                data = datan;
104
0
            } else {
105
0
                free(data);
106
0
                return NULL;
107
0
            }
108
0
        }
109
0
        len = fread(data + used, 1, allocated - used, fp);
110
0
        if (len > 0)
111
0
            used += len;
112
0
    } while (!feof(fp) && (fn == NULL || used < sb.st_size));
113
114
0
    *size = used;
115
116
0
    return data;
117
0
}
118
119
120
#ifdef HAVE_MMAP
121
/*
122
 * mmaps in the file, but only for reading currently.
123
 *
124
 * Returns 0 on success
125
 *        -1 on failure
126
 */
127
0
int mfmmap(mFILE *mf, FILE *fp, const char *fn) {
128
0
    struct stat sb;
129
130
0
    if (stat(fn, &sb) != 0)
131
0
        return -1;
132
133
0
    mf->size = sb.st_size;
134
0
    mf->data = mmap(NULL, mf->size, PROT_READ, MAP_SHARED,
135
0
                    fileno(fp), 0);
136
137
0
    if (!mf->data || mf->data == (void *)-1)
138
0
        return -1;
139
140
0
    mf->alloced = 0;
141
0
    return 0;
142
0
}
143
#endif
144
145
146
/*
147
 * Creates and returns m_channel[0].
148
 * We initialise this on the first attempted read, which then slurps in
149
 * all of stdin until EOF is met.
150
 */
151
0
mFILE *mstdin(void) {
152
0
    if (m_channel[0])
153
0
        return m_channel[0];
154
155
0
    m_channel[0] = mfcreate(NULL, 0);
156
0
    if (NULL == m_channel[0]) return NULL;
157
0
    m_channel[0]->fp = stdin;
158
0
    return m_channel[0];
159
0
}
160
161
0
static void init_mstdin(void) {
162
0
    static int done_stdin = 0;
163
0
    if (done_stdin)
164
0
        return;
165
166
0
    m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
167
0
    m_channel[0]->mode = MF_READ;
168
0
    done_stdin = 1;
169
0
}
170
171
/*
172
 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
173
 * an empty buffer which is physically written out only when mfflush or
174
 * mfclose are called.
175
 */
176
0
mFILE *mstdout(void) {
177
0
    if (m_channel[1])
178
0
        return m_channel[1];
179
180
0
    m_channel[1] = mfcreate(NULL, 0);
181
0
    if (NULL == m_channel[1]) return NULL;
182
0
    m_channel[1]->fp = stdout;
183
0
    m_channel[1]->mode = MF_WRITE;
184
0
    return m_channel[1];
185
0
}
186
187
/*
188
 * Stderr as an mFILE.
189
 * The code handles stderr by returning m_channel[2], but also checking
190
 * for stderr in fprintf (the common usage of it) to auto-flush.
191
 */
192
0
mFILE *mstderr(void) {
193
0
    if (m_channel[2])
194
0
        return m_channel[2];
195
196
0
    m_channel[2] = mfcreate(NULL, 0);
197
0
    if (NULL == m_channel[2]) return NULL;
198
0
    m_channel[2]->fp = stderr;
199
0
    m_channel[2]->mode = MF_WRITE;
200
0
    return m_channel[2];
201
0
}
202
203
204
/*
205
 * For creating existing mFILE pointers directly from memory buffers.
206
 */
207
0
mFILE *mfcreate(char *data, int size) {
208
0
    mFILE *mf = (mFILE *)malloc(sizeof(*mf));
209
0
    if (NULL == mf) return NULL;
210
0
    mf->fp = NULL;
211
0
    mf->data = data;
212
0
    mf->alloced = size;
213
0
    mf->size = size;
214
0
    mf->eof = 0;
215
0
    mf->offset = 0;
216
0
    mf->flush_pos = 0;
217
0
    mf->mode = MF_READ | MF_WRITE;
218
0
    return mf;
219
0
}
220
221
/*
222
 * Recreate an existing mFILE to house new data/size.
223
 * It also rewinds the file.
224
 */
225
0
void mfrecreate(mFILE *mf, char *data, int size) {
226
0
    if (mf->data)
227
0
        free(mf->data);
228
0
    mf->data = data;
229
0
    mf->size = size;
230
0
    mf->alloced = size;
231
0
    mf->eof = 0;
232
0
    mf->offset = 0;
233
0
    mf->flush_pos = 0;
234
0
}
235
236
237
/*
238
 * Creates a new mFILE to contain the contents of the FILE pointer.
239
 * This mFILE is purely for in-memory operations and has no links to the
240
 * original FILE* it came from. It also doesn't close the FILE pointer.
241
 * Consider using mfreopen() is you need different behaviour.
242
 *
243
 * Returns mFILE * on success
244
 *         NULL on failure.
245
 */
246
0
mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
247
0
    mFILE *mf;
248
249
    /* Open using mfreopen() */
250
0
    if (NULL == (mf = mfreopen(path, mode_str, fp)))
251
0
        return NULL;
252
253
    /* Disassociate from the input stream */
254
0
    mf->fp = NULL;
255
256
0
    return mf;
257
0
}
258
259
/*
260
 * Converts a FILE * to an mFILE *.
261
 * Use this for wrapper functions to turn external prototypes requiring
262
 * FILE * as an argument into internal code using mFILE *.
263
 */
264
0
mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
265
0
    mFILE *mf;
266
0
    int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
267
268
    /* Parse mode:
269
     * r = read file contents (if truncated => don't read)
270
     * w = write on close
271
     * a = position at end of buffer
272
     * x = position at same location as the original fp, don't seek on flush
273
     * + = for update (read and write)
274
     * m = mmap (read only)
275
     */
276
0
    if (strchr(mode_str, 'r'))
277
0
        r = 1, mode |= MF_READ;
278
0
    if (strchr(mode_str, 'w'))
279
0
        w = 1, mode |= MF_WRITE | MF_TRUNC;
280
0
    if (strchr(mode_str, 'a'))
281
0
        w = a = 1, mode |= MF_WRITE | MF_APPEND;
282
0
    if (strchr(mode_str, 'b'))
283
0
        b = 1, mode |= MF_BINARY;
284
0
    if (strchr(mode_str, 'x'))
285
0
        x = 1;
286
0
    if (strchr(mode_str, '+')) {
287
0
        w = 1, mode |= MF_READ | MF_WRITE;
288
0
        if (a)
289
0
            r = 1;
290
0
    }
291
0
#ifdef HAVE_MMAP
292
0
    if (strchr(mode_str, 'm'))
293
0
        if (!w) mode |= MF_MMAP;
294
0
#endif
295
296
0
    if (r) {
297
0
        mf = mfcreate(NULL, 0);
298
0
        if (NULL == mf) return NULL;
299
0
        if (!(mode & MF_TRUNC)) {
300
0
#ifdef HAVE_MMAP
301
0
            if (mode & MF_MMAP) {
302
0
                if (mfmmap(mf, fp, path) == -1) {
303
0
                    mf->data = NULL;
304
0
                    mode &= ~MF_MMAP;
305
0
                }
306
0
            }
307
0
#endif
308
0
            if (!mf->data) {
309
0
                mf->data = mfload(fp, path, &mf->size, b);
310
0
                if (!mf->data) {
311
0
                    free(mf);
312
0
                    return NULL;
313
0
                }
314
0
                mf->alloced = mf->size;
315
0
                if (!a)
316
0
                    fseek(fp, 0, SEEK_SET);
317
0
            }
318
0
        }
319
0
    } else if (w) {
320
        /* Write - initialise the data structures */
321
0
        mf = mfcreate(NULL, 0);
322
0
        if (NULL == mf) return NULL;
323
0
    } else {
324
0
        hts_log_error("Must specify either r, w or a for mode");
325
0
        return NULL;
326
0
    }
327
0
    mf->fp = fp;
328
0
    mf->mode = mode;
329
330
0
    if (x) {
331
0
        mf->mode |= MF_MODEX;
332
0
    }
333
334
0
    if (a) {
335
0
        mf->flush_pos = mf->size;
336
0
        fseek(fp, 0, SEEK_END);
337
0
    }
338
339
0
    return mf;
340
0
}
341
342
/*
343
 * Opens a file. If we have read access (r or a+) then it loads the entire
344
 * file into memory. If We have write access then the pathname is stored.
345
 * We do not actually write until an mfclose, which then checks this pathname.
346
 */
347
0
mFILE *mfopen(const char *path, const char *mode) {
348
0
    FILE *fp;
349
350
0
    if (NULL == (fp = fopen(path, mode)))
351
0
        return NULL;
352
0
    return mfreopen(path, mode, fp);
353
0
}
354
355
/*
356
 * Closes an mFILE. If the filename is known (implying write access) then this
357
 * also writes the data to disk.
358
 *
359
 * Stdout is handled by calling mfflush which writes to stdout if appropriate.
360
 */
361
0
int mfclose(mFILE *mf) {
362
0
    if (!mf)
363
0
        return -1;
364
365
0
    mfflush(mf);
366
367
0
#ifdef HAVE_MMAP
368
0
    if ((mf->mode & MF_MMAP) && mf->data) {
369
        /* Mmaped */
370
0
        munmap(mf->data, mf->size);
371
0
        mf->data = NULL;
372
0
    }
373
0
#endif
374
375
0
    if (mf->fp)
376
0
        fclose(mf->fp);
377
378
0
    mfdestroy(mf);
379
380
0
    return 0;
381
0
}
382
383
/*
384
 * Closes the file pointer contained within the mFILE without destroying
385
 * the in-memory data.
386
 *
387
 * Attempting to do this on an mmaped buffer is an error.
388
 */
389
0
int mfdetach(mFILE *mf) {
390
0
    if (!mf)
391
0
        return -1;
392
393
0
    mfflush(mf);
394
0
    if (mf->mode & MF_MMAP)
395
0
        return -1;
396
397
0
    if (mf->fp) {
398
0
        fclose(mf->fp);
399
0
        mf->fp = NULL;
400
0
    }
401
402
0
    return 0;
403
0
}
404
405
/*
406
 * Destroys an mFILE structure but does not flush or close it
407
 */
408
0
int mfdestroy(mFILE *mf) {
409
0
    if (!mf)
410
0
        return -1;
411
412
0
    if (mf->data)
413
0
        free(mf->data);
414
0
    free(mf);
415
416
0
    return 0;
417
0
}
418
419
/*
420
 * Steals that data out of an mFILE.  The mFILE itself will be closed.
421
 * It is up to the caller to free the stolen buffer.  If size_out is
422
 * not NULL, mf->size will be stored in it.
423
 * This is more-or-less the opposite of mfcreate().
424
 *
425
 * Note, we cannot steal the allocated buffer from an mmaped mFILE.
426
 */
427
428
0
void *mfsteal(mFILE *mf, size_t *size_out) {
429
0
    void *data;
430
431
0
    if (!mf) return NULL;
432
433
0
    data = mf->data;
434
435
0
    if (NULL != size_out) *size_out = mf->size;
436
437
0
    if (mfdetach(mf) != 0)
438
0
        return NULL;
439
440
0
    mf->data = NULL;
441
0
    mfdestroy(mf);
442
443
0
    return data;
444
0
}
445
446
/*
447
 * Seek/tell functions. Nothing more than updating and reporting an
448
 * in-memory index. NB we can seek on stdin or stdout even provided we
449
 * haven't been flushing.
450
 */
451
0
int mfseek(mFILE *mf, long offset, int whence) {
452
0
    switch (whence) {
453
0
    case SEEK_SET:
454
0
        mf->offset = offset;
455
0
        break;
456
0
    case SEEK_CUR:
457
0
        mf->offset += offset;
458
0
        break;
459
0
    case SEEK_END:
460
0
        mf->offset = mf->size + offset;
461
0
        break;
462
0
    default:
463
0
        errno = EINVAL;
464
0
        return -1;
465
0
    }
466
467
0
    mf->eof = 0;
468
0
    return 0;
469
0
}
470
471
0
long mftell(mFILE *mf) {
472
0
    return mf->offset;
473
0
}
474
475
0
void mrewind(mFILE *mf) {
476
0
    mf->offset = 0;
477
0
    mf->eof = 0;
478
0
}
479
480
/*
481
 * mftruncate is not directly a translation of ftruncate as the latter
482
 * takes a file descriptor instead of a FILE *. It performs the analogous
483
 * role though.
484
 *
485
 * If offset is -1 then the file is truncated to be the current file
486
 * offset.
487
 */
488
0
void mftruncate(mFILE *mf, long offset) {
489
0
    mf->size = offset != -1 ? offset : mf->offset;
490
0
    if (mf->offset > mf->size)
491
0
        mf->offset = mf->size;
492
0
}
493
494
0
int mfeof(mFILE *mf) {
495
0
    return mf->eof;
496
0
}
497
498
/*
499
 * mFILE read/write functions. Basically these turn fread/fwrite syntax
500
 * into memcpy statements, with appropriate memory handling for writing.
501
 */
502
0
size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
503
0
    size_t len;
504
0
    char *cptr = (char *)ptr;
505
506
0
    if (mf == m_channel[0]) init_mstdin();
507
508
0
    if (mf->size <= mf->offset)
509
0
        return 0;
510
511
0
    len = size * nmemb <= mf->size - mf->offset
512
0
        ? size * nmemb
513
0
        : mf->size - mf->offset;
514
0
    if (!size)
515
0
        return 0;
516
517
0
    memcpy(cptr, &mf->data[mf->offset], len);
518
0
    mf->offset += len;
519
520
0
    if (len != size * nmemb) {
521
0
        mf->eof = 1;
522
0
    }
523
524
0
    return len / size;
525
0
}
526
527
0
size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
528
0
    if (!(mf->mode & MF_WRITE))
529
0
        return 0;
530
531
    /* Append mode => forced all writes to end of file */
532
0
    if (mf->mode & MF_APPEND)
533
0
        mf->offset = mf->size;
534
535
    /* Make sure we have enough room */
536
0
    while (size * nmemb + mf->offset > mf->alloced) {
537
0
        size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
538
0
        void * new_data = realloc(mf->data, new_alloced);
539
0
        if (NULL == new_data) return 0;
540
0
        mf->alloced = new_alloced;
541
0
        mf->data    = new_data;
542
0
    }
543
544
    /* Record where we need to reflush from */
545
0
    if (mf->offset < mf->flush_pos)
546
0
        mf->flush_pos = mf->offset;
547
548
    /* Copy the data over */
549
0
    memcpy(&mf->data[mf->offset], ptr, size * nmemb);
550
0
    mf->offset += size * nmemb;
551
0
    if (mf->size < mf->offset)
552
0
        mf->size = mf->offset;
553
554
0
    return nmemb;
555
0
}
556
557
0
int mfgetc(mFILE *mf) {
558
0
    if (mf == m_channel[0]) init_mstdin();
559
0
    if (mf->offset < mf->size) {
560
0
        return (unsigned char)mf->data[mf->offset++];
561
0
    }
562
563
0
    mf->eof = 1;
564
0
    return -1;
565
0
}
566
567
0
int mungetc(int c, mFILE *mf) {
568
0
    if (mf->offset > 0) {
569
0
        mf->data[--mf->offset] = c;
570
0
        return c;
571
0
    }
572
573
0
    mf->eof = 1;
574
0
    return -1;
575
0
}
576
577
0
char *mfgets(char *s, int size, mFILE *mf) {
578
0
    int i;
579
580
0
    if (mf == m_channel[0]) init_mstdin();
581
0
    *s = 0;
582
0
    for (i = 0; i < size-1;) {
583
0
        if (mf->offset < mf->size) {
584
0
            s[i] = mf->data[mf->offset++];
585
0
            if (s[i++] == '\n')
586
0
                break;
587
0
        } else {
588
0
            mf->eof = 1;
589
0
            break;
590
0
        }
591
0
    }
592
593
0
    s[i] = 0;
594
0
    return i ? s : NULL;
595
0
}
596
597
/*
598
 * Flushes an mFILE. If this is a real open of a file in write mode then
599
 * mFILE->fp will be set. We then write out any new data in mFILE since the
600
 * last flush. We cannot tell what may have been modified as we don't keep
601
 * track of that, so we typically rewrite out the entire file contents between
602
 * the last flush_pos and the end of file.
603
 *
604
 * For stderr/stdout we also reset the offsets so we cannot modify things
605
 * we've already output.
606
 */
607
0
int mfflush(mFILE *mf) {
608
0
    if (!mf->fp)
609
0
        return 0;
610
611
    /* FIXME: only do this when opened in write mode */
612
0
    if (mf == m_channel[1] || mf == m_channel[2]) {
613
0
        if (mf->flush_pos < mf->size) {
614
0
            size_t bytes = mf->size - mf->flush_pos;
615
0
            if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
616
0
                return -1;
617
0
            if (0 != fflush(mf->fp))
618
0
                return -1;
619
0
        }
620
621
        /* Stdout & stderr are non-seekable streams so throw away the data */
622
0
        mf->offset = mf->size = mf->flush_pos = 0;
623
0
    }
624
625
    /* only flush when opened in write mode */
626
0
    if (mf->mode & MF_WRITE) {
627
0
        if (mf->flush_pos < mf->size) {
628
0
            size_t bytes = mf->size - mf->flush_pos;
629
0
            if (!(mf->mode & MF_MODEX)) {
630
0
                fseek(mf->fp, mf->flush_pos, SEEK_SET);
631
0
            }
632
0
            if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
633
0
                return -1;
634
0
            if (0 != fflush(mf->fp))
635
0
                return -1;
636
0
        }
637
0
        if (ftell(mf->fp) != -1 &&
638
0
            ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
639
0
            return -1;
640
0
        mf->flush_pos = mf->size;
641
0
    }
642
643
0
    return 0;
644
0
}
645
646
/*
647
 * Converts an mFILE from binary to ascii mode by replacing all
648
 * cr-nl with nl.
649
 *
650
 * Primarily used on windows when we've uncompressed a binary file which
651
 * happens to be a text file (eg Experiment File). Previously we would have
652
 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
653
 *
654
 * Side effect: resets offset and flush_pos back to the start.
655
 */
656
0
void mfascii(mFILE *mf) {
657
0
    size_t p1, p2;
658
659
0
    for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
660
0
        if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
661
0
            p2--; /* delete the \r */
662
0
        }
663
0
        mf->data[p2] = mf->data[p1];
664
0
    }
665
0
    mf->size = p2;
666
667
0
    mf->offset = mf->flush_pos = 0;
668
0
}