Coverage Report

Created: 2025-07-11 06:53

/src/htslib/region.c
Line
Count
Source (jump to first uncovered line)
1
/*  region.c -- Functions to create and free region lists
2
3
    Copyright (C) 2019 Genome Research Ltd.
4
5
    Author: Valeriu Ohan <vo2@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include "htslib/hts.h"
29
#include "htslib/khash.h"
30
31
typedef struct reglist
32
{
33
    uint32_t n, m;
34
    hts_pair_pos_t *a;
35
    int tid;
36
} reglist_t;
37
38
KHASH_MAP_INIT_INT(reg, reglist_t)
39
typedef kh_reg_t reghash_t;
40
41
static int compare_hts_pair_pos_t (const void *av, const void *bv)
42
0
{
43
0
    hts_pair_pos_t *a = (hts_pair_pos_t *) av;
44
0
    hts_pair_pos_t *b = (hts_pair_pos_t *) bv;
45
0
    if (a->beg < b->beg) return -1;
46
0
    if (a->beg > b->beg) return  1;
47
0
    if (a->end < b->end) return -1;
48
0
    if (a->end > b->end) return  1;
49
50
0
    return 0;
51
0
}
52
53
#if 0
54
/**
55
 * Good to have around for debugging
56
 */
57
static void reg_print(reghash_t *h) {
58
    reglist_t *p;
59
    khint_t k;
60
    uint32_t i;
61
    khint32_t key;
62
63
    if (!h) {
64
        fprintf(stderr, "Hash table is empty!\n");
65
        return;
66
    }
67
    for (k = kh_begin(h); k < kh_end(h); k++) {
68
        if (kh_exist(h,k)) {
69
            key = kh_key(h,k);
70
            fprintf(stderr, "Region: key %u tid %d\n", key, p->tid);
71
            if ((p = &kh_val(h,k)) != NULL && p->n > 0) {
72
                for (i=0; i<p->n; i++) {
73
                    fprintf(stderr, "\tinterval[%d]: %"PRIhts_pos"-%"PRIhts_pos"\n", i,
74
                            p->a[i].beg, p->a[i].end);
75
                }
76
            } else {
77
                fprintf(stderr, "Region key %u has no intervals!\n", key);
78
            }
79
        }
80
    }
81
}
82
#endif
83
84
/**
85
 * Sort and merge overlapping or adjacent intervals.
86
 */
87
0
static int reg_compact(reghash_t *h) {
88
0
    khint_t i;
89
0
    uint32_t j, new_n;
90
0
    reglist_t *p;
91
0
    int count = 0;
92
93
0
    if (!h)
94
0
        return 0;
95
96
0
    for (i = kh_begin(h); i < kh_end(h); i++) {
97
0
        if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || !(p->n))
98
0
            continue;
99
100
0
        qsort(p->a, p->n, sizeof(p->a[0]), compare_hts_pair_pos_t);
101
0
        for (new_n = 0, j = 1; j < p->n; j++) {
102
0
            if (p->a[new_n].end < p->a[j].beg) {
103
0
                p->a[++new_n].beg = p->a[j].beg;
104
0
                p->a[new_n].end = p->a[j].end;
105
0
            } else {
106
0
                if (p->a[new_n].end < p->a[j].end)
107
0
                    p->a[new_n].end = p->a[j].end;
108
0
            }
109
0
        }
110
0
        ++new_n;
111
0
        if (p->n > new_n) {
112
            // Shrink array to required size.
113
0
            hts_pair_pos_t *new_a = realloc(p->a, new_n * sizeof(p->a[0]));
114
0
            if (new_a) p->a = new_a;
115
0
        }
116
0
        p->n = new_n;
117
0
        count++;
118
0
    }
119
120
0
    return count;
121
0
}
122
123
0
static int reg_insert(reghash_t *h, int tid, hts_pos_t beg, hts_pos_t end) {
124
125
0
    khint_t k;
126
0
    reglist_t *p;
127
128
0
    if (!h)
129
0
        return -1;
130
131
    // Put reg in the hash table if not already there
132
0
    k = kh_get(reg, h, tid);
133
0
    if (k == kh_end(h)) { // absent from the hash table
134
0
        int ret;
135
0
        k = kh_put(reg, h, tid, &ret);
136
0
        if (-1 == ret) {
137
0
            return -1;
138
0
        }
139
0
        memset(&kh_val(h, k), 0, sizeof(reglist_t));
140
0
        kh_val(h, k).tid = tid;
141
0
    }
142
0
    p = &kh_val(h, k);
143
144
    // Add beg and end to the list
145
0
    if (p->n == p->m) {
146
0
        uint32_t new_m = p->m ? p->m<<1 : 4;
147
0
        if (new_m == 0) return -1;
148
0
        hts_pair_pos_t *new_a = realloc(p->a, new_m * sizeof(p->a[0]));
149
0
        if (new_a == NULL) return -1;
150
0
        p->m = new_m;
151
0
        p->a = new_a;
152
0
    }
153
0
    p->a[p->n].beg = beg;
154
0
    p->a[p->n++].end = end;
155
156
0
    return 0;
157
0
}
158
159
0
static void reg_destroy(reghash_t *h) {
160
161
0
    khint_t k;
162
163
0
    if (!h)
164
0
        return;
165
166
0
    for (k = 0; k < kh_end(h); ++k) {
167
0
        if (kh_exist(h, k)) {
168
0
            free(kh_val(h, k).a);
169
0
        }
170
0
    }
171
0
    kh_destroy(reg, h);
172
0
}
173
174
/**
175
 * Take a char array of reg:interval elements and produce a hts_reglis_t with r_count elements.
176
 */
177
0
hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr,  hts_name2id_f getid) {
178
179
0
    if (!argv || argc < 1)
180
0
        return NULL;
181
182
0
    reghash_t *h = NULL;
183
0
    reglist_t *p;
184
0
    hts_reglist_t *h_reglist = NULL;
185
186
0
    khint_t k;
187
0
    int i, l_count = 0, tid;
188
0
    const char *q;
189
0
    hts_pos_t beg, end;
190
191
    /* First, transform the char array into a hash table */
192
0
    h = kh_init(reg);
193
0
    if (!h) {
194
0
        hts_log_error("Error when creating the region hash table");
195
0
        return NULL;
196
0
    }
197
198
0
    for (i=0; i<argc; i++) {
199
0
        if (!strcmp(argv[i], ".")) {
200
0
            q = argv[i] + 1;
201
0
            tid = HTS_IDX_START; beg = 0; end = INT64_MAX;
202
0
        } else if (!strcmp(argv[i], "*")) {
203
0
            q = argv[i] + 1;
204
0
            tid = HTS_IDX_NOCOOR; beg = 0; end = INT64_MAX;
205
0
        } else {
206
0
            q = hts_parse_region(argv[i], &tid, &beg, &end, getid, hdr,
207
0
                                 HTS_PARSE_THOUSANDS_SEP);
208
0
        }
209
0
        if (!q) {
210
0
            if (tid < -1) {
211
0
                hts_log_error("Failed to parse header");
212
0
                goto fail;
213
0
            } else {
214
                // not parsable as a region
215
0
                hts_log_warning("Region '%s' specifies an unknown reference name. Continue anyway", argv[i]);
216
0
                continue;
217
0
            }
218
0
        }
219
220
0
        if (reg_insert(h, tid, beg, end) != 0) {
221
0
            hts_log_error("Error when inserting region='%s' in the bed hash table at address=%p", argv[i], (void *) h);
222
0
            goto fail;
223
0
        }
224
0
    }
225
226
0
    *r_count = reg_compact(h);
227
0
    if (!*r_count)
228
0
        goto fail;
229
230
    /* Transform the hash table into a list */
231
0
    h_reglist = (hts_reglist_t *)calloc(*r_count, sizeof(hts_reglist_t));
232
0
    if (!h_reglist)
233
0
        goto fail;
234
235
0
    for (k = kh_begin(h); k < kh_end(h) && l_count < *r_count; k++) {
236
0
        if (!kh_exist(h,k) || !(p = &kh_val(h,k)))
237
0
            continue;
238
239
0
        h_reglist[l_count].tid = p->tid;
240
0
        h_reglist[l_count].intervals = p->a;
241
0
        h_reglist[l_count].count = p->n;
242
0
        p->a = NULL; // As we stole it.
243
244
        // After reg_compact(), list is ordered and non-overlapping, so...
245
0
        if (p->n > 0) {
246
0
            h_reglist[l_count].min_beg = h_reglist[l_count].intervals[0].beg;
247
0
            h_reglist[l_count].max_end = h_reglist[l_count].intervals[p->n - 1].end;
248
0
        } else {
249
0
            h_reglist[l_count].min_beg = 0;
250
0
            h_reglist[l_count].max_end = 0;
251
0
        }
252
253
0
        l_count++;
254
0
    }
255
0
    reg_destroy(h);
256
257
0
    return h_reglist;
258
259
0
fail:
260
0
    reg_destroy(h);
261
0
    if(h_reglist) hts_reglist_free(h_reglist, l_count);
262
263
0
    return NULL;
264
0
}
265
266
0
void hts_reglist_free(hts_reglist_t *reglist, int count) {
267
268
0
    int i;
269
0
    if(reglist) {
270
0
        for (i = 0; i < count; i++) {
271
0
            if (reglist[i].intervals)
272
0
                free(reglist[i].intervals);
273
0
        }
274
0
        free(reglist);
275
0
    }
276
0
}