_ZN7roaring7RoaringC2EmPKj:
   85|  13.0k|    Roaring(size_t n, const uint32_t *data) : Roaring() {
   86|  13.0k|        api::roaring_bitmap_add_many(&roaring, n, data);
   87|  13.0k|    }
_ZN7roaring7RoaringC2Ev:
   76|  19.5k|    Roaring() : roaring{} {
   77|       |        // The empty constructor roaring{} silences warnings from pedantic
   78|       |        // static analyzers.
   79|  19.5k|        api::roaring_bitmap_init_cleared(&roaring);
   80|  19.5k|    }
_ZN7roaring7RoaringD2Ev:
  907|  58.6k|    ~Roaring() {
  908|  58.6k|        if (!(roaring.high_low_container.flags & ROARING_FLAG_FROZEN)) {
  ------------------
  |  |   47|  58.6k|#define ROARING_FLAG_FROZEN UINT8_C(0x2)
  ------------------
  |  Branch (908:13): [True: 58.6k, False: 0]
  ------------------
  909|  58.6k|            api::roaring_bitmap_clear(&roaring);
  910|  58.6k|        } else {
  911|       |            // The roaring member variable copies the `roaring_bitmap_t` and
  912|       |            // nested `roaring_array_t` structures by value and is freed in the
  913|       |            // constructor, however the underlying memory arena used for the
  914|       |            // container data is not freed with it. Here we derive the arena
  915|       |            // pointer from the second arena allocation in
  916|       |            // `roaring_bitmap_frozen_view` and free it as well.
  917|      0|            roaring_bitmap_free(
  918|      0|                (roaring_bitmap_t *)((char *)
  919|      0|                                         roaring.high_low_container.containers -
  920|      0|                                     sizeof(roaring_bitmap_t)));
  921|      0|        }
  922|  58.6k|    }
_ZN7roaring7Roaring11runOptimizeEv:
  487|  13.0k|    bool runOptimize() noexcept {
  488|  13.0k|        return api::roaring_bitmap_run_optimize(&roaring);
  489|  13.0k|    }
_ZN7roaring7Roaring11shrinkToFitEv:
  495|  6.51k|    size_t shrinkToFit() noexcept {
  496|  6.51k|        return api::roaring_bitmap_shrink_to_fit(&roaring);
  497|  6.51k|    }
_ZN7roaring7Roaring3addEj:
  198|  6.51k|    void add(uint32_t x) noexcept { api::roaring_bitmap_add(&roaring, x); }
_ZN7roaring7Roaring10addCheckedEj:
  205|  6.51k|    bool addChecked(uint32_t x) noexcept {
  206|  6.51k|        return api::roaring_bitmap_add_checked(&roaring, x);
  207|  6.51k|    }
_ZN7roaring7Roaring8addRangeEmm:
  212|  6.51k|    void addRange(const uint64_t min, const uint64_t max) noexcept {
  213|  6.51k|        return api::roaring_bitmap_add_range(&roaring, min, max);
  214|  6.51k|    }
_ZN7roaring7Roaring7addManyEmPKj:
  226|  6.51k|    void addMany(size_t n_args, const uint32_t *vals) noexcept {
  227|  6.51k|        api::roaring_bitmap_add_many(&roaring, n_args, vals);
  228|  6.51k|    }
_ZN7roaring7Roaring6removeEj:
  258|  6.51k|    void remove(uint32_t x) noexcept {
  259|  6.51k|        api::roaring_bitmap_remove(&roaring, x);
  260|  6.51k|    }
_ZN7roaring7Roaring13removeCheckedEj:
  267|  6.51k|    bool removeChecked(uint32_t x) noexcept {
  268|  6.51k|        return api::roaring_bitmap_remove_checked(&roaring, x);
  269|  6.51k|    }
_ZN7roaring7Roaring11removeRangeEmm:
  274|  6.51k|    void removeRange(uint64_t min, uint64_t max) noexcept {
  275|  6.51k|        return api::roaring_bitmap_remove_range(&roaring, min, max);
  276|  6.51k|    }
_ZN7roaring7Roaring17removeRangeClosedEjj:
  281|  6.51k|    void removeRangeClosed(uint32_t min, uint32_t max) noexcept {
  282|  6.51k|        return api::roaring_bitmap_remove_range_closed(&roaring, min, max);
  283|  6.51k|    }
_ZNK7roaring7Roaring7maximumEv:
  315|  6.51k|    uint32_t maximum() const noexcept {
  316|  6.51k|        return api::roaring_bitmap_maximum(&roaring);
  317|  6.51k|    }
_ZNK7roaring7Roaring7minimumEv:
  322|  6.51k|    uint32_t minimum() const noexcept {
  323|  6.51k|        return api::roaring_bitmap_minimum(&roaring);
  324|  6.51k|    }
_ZNK7roaring7Roaring8containsEj:
  329|  6.51k|    bool contains(uint32_t x) const noexcept {
  330|  6.51k|        return api::roaring_bitmap_contains(&roaring, x);
  331|  6.51k|    }
_ZNK7roaring7Roaring13containsRangeEmm:
  336|  6.51k|    bool containsRange(const uint64_t x, const uint64_t y) const noexcept {
  337|  6.51k|        return api::roaring_bitmap_contains_range(&roaring, x, y);
  338|  6.51k|    }
_ZNK7roaring7Roaring6selectEjPj:
  520|  6.51k|    bool select(uint32_t rnk, uint32_t *element) const noexcept {
  521|  6.51k|        return api::roaring_bitmap_select(&roaring, rnk, element);
  522|  6.51k|    }
_ZNK7roaring7Roaring9intersectERKS0_:
  534|  6.51k|    bool intersect(const Roaring &r) const noexcept {
  535|  6.51k|        return api::roaring_bitmap_intersect(&roaring, &r.roaring);
  536|  6.51k|    }
_ZNK7roaring7Roaring13jaccard_indexERKS0_:
  545|  6.51k|    double jaccard_index(const Roaring &r) const noexcept {
  546|  6.51k|        return api::roaring_bitmap_jaccard_index(&roaring, &r.roaring);
  547|  6.51k|    }
_ZNK7roaring7Roaring14or_cardinalityERKS0_:
  552|  6.51k|    uint64_t or_cardinality(const Roaring &r) const noexcept {
  553|  6.51k|        return api::roaring_bitmap_or_cardinality(&roaring, &r.roaring);
  554|  6.51k|    }
_ZNK7roaring7Roaring18andnot_cardinalityERKS0_:
  559|  6.51k|    uint64_t andnot_cardinality(const Roaring &r) const noexcept {
  560|  6.51k|        return api::roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
  561|  6.51k|    }
_ZNK7roaring7Roaring15xor_cardinalityERKS0_:
  567|  6.51k|    uint64_t xor_cardinality(const Roaring &r) const noexcept {
  568|  6.51k|        return api::roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
  569|  6.51k|    }
_ZNK7roaring7Roaring4rankEj:
  579|  6.51k|    uint64_t rank(uint32_t x) const noexcept {
  580|  6.51k|        return api::roaring_bitmap_rank(&roaring, x);
  581|  6.51k|    }
_ZNK7roaring7Roaring14getSizeInBytesEb:
  726|  13.0k|    size_t getSizeInBytes(bool portable = true) const noexcept {
  727|  13.0k|        if (portable) {
  ------------------
  |  Branch (727:13): [True: 13.0k, False: 0]
  ------------------
  728|  13.0k|            return api::roaring_bitmap_portable_size_in_bytes(&roaring);
  729|  13.0k|        } else {
  730|      0|            return api::roaring_bitmap_size_in_bytes(&roaring);
  731|      0|        }
  732|  13.0k|    }
_ZNK7roaring7RoaringanERKS0_:
  788|  6.51k|    Roaring operator&(const Roaring &o) const {
  789|  6.51k|        roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring);
  790|  6.51k|        if (r == NULL) {
  ------------------
  |  Branch (790:13): [True: 0, False: 6.51k]
  ------------------
  791|      0|            ROARING_TERMINATE("failed materalization in and");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  792|      0|        }
  793|  6.51k|        return Roaring(r);
  794|  6.51k|    }
_ZN7roaring7RoaringC2EPNS_3api16roaring_bitmap_sE:
  102|  32.5k|    explicit Roaring(roaring_bitmap_t *s) noexcept : roaring(*s) {
  103|  32.5k|        roaring_free(s);  // deallocate the passed-in pointer
  104|  32.5k|    }
_ZNK7roaring7RoaringmiERKS0_:
  801|  6.51k|    Roaring operator-(const Roaring &o) const {
  802|  6.51k|        roaring_bitmap_t *r = api::roaring_bitmap_andnot(&roaring, &o.roaring);
  803|  6.51k|        if (r == NULL) {
  ------------------
  |  Branch (803:13): [True: 0, False: 6.51k]
  ------------------
  804|      0|            ROARING_TERMINATE("failed materalization in andnot");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  805|      0|        }
  806|  6.51k|        return Roaring(r);
  807|  6.51k|    }
_ZNK7roaring7RoaringorERKS0_:
  814|  6.51k|    Roaring operator|(const Roaring &o) const {
  815|  6.51k|        roaring_bitmap_t *r = api::roaring_bitmap_or(&roaring, &o.roaring);
  816|  6.51k|        if (r == NULL) {
  ------------------
  |  Branch (816:13): [True: 0, False: 6.51k]
  ------------------
  817|      0|            ROARING_TERMINATE("failed materalization in or");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  818|      0|        }
  819|  6.51k|        return Roaring(r);
  820|  6.51k|    }
_ZNK7roaring7RoaringeoERKS0_:
  827|  6.51k|    Roaring operator^(const Roaring &o) const {
  828|  6.51k|        roaring_bitmap_t *r = api::roaring_bitmap_xor(&roaring, &o.roaring);
  829|  6.51k|        if (r == NULL) {
  ------------------
  |  Branch (829:13): [True: 0, False: 6.51k]
  ------------------
  830|      0|            ROARING_TERMINATE("failed materalization in xor");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  831|      0|        }
  832|  6.51k|        return Roaring(r);
  833|  6.51k|    }
_ZN7roaring7RoaringoRERKS0_:
  375|  6.51k|    Roaring &operator|=(const Roaring &r) noexcept {
  376|  6.51k|        api::roaring_bitmap_or_inplace(&roaring, &r.roaring);
  377|  6.51k|        return *this;
  378|  6.51k|    }
_ZN7roaring7RoaringaNERKS0_:
  353|  6.51k|    Roaring &operator&=(const Roaring &r) noexcept {
  354|  6.51k|        api::roaring_bitmap_and_inplace(&roaring, &r.roaring);
  355|  6.51k|        return *this;
  356|  6.51k|    }
_ZN7roaring7RoaringmIERKS0_:
  363|  6.51k|    Roaring &operator-=(const Roaring &r) noexcept {
  364|  6.51k|        api::roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
  365|  6.51k|        return *this;
  366|  6.51k|    }
_ZN7roaring7RoaringeOERKS0_:
  385|  6.51k|    Roaring &operator^=(const Roaring &r) noexcept {
  386|  6.51k|        api::roaring_bitmap_xor_inplace(&roaring, &r.roaring);
  387|  6.51k|        return *this;
  388|  6.51k|    }
_ZNK7roaring7RoaringeqERKS0_:
  452|  13.0k|    bool operator==(const Roaring &r) const noexcept {
  453|  13.0k|        return api::roaring_bitmap_equals(&roaring, &r.roaring);
  454|  13.0k|    }
_ZNK7roaring7Roaring11cardinalityEv:
  398|  6.51k|    uint64_t cardinality() const noexcept {
  399|  6.51k|        return api::roaring_bitmap_get_cardinality(&roaring);
  400|  6.51k|    }
_ZNK7roaring7Roaring7isEmptyEv:
  405|  13.0k|    bool isEmpty() const noexcept {
  406|  13.0k|        return api::roaring_bitmap_is_empty(&roaring);
  407|  13.0k|    }
_ZNK7roaring7Roaring13toUint32ArrayEPj:
  438|  6.51k|    void toUint32Array(uint32_t *ans) const noexcept {
  439|  6.51k|        api::roaring_bitmap_to_uint32_array(&roaring, ans);
  440|  6.51k|    }
_ZNK7roaring7Roaring8isSubsetERKS0_:
  422|  6.51k|    bool isSubset(const Roaring &r) const noexcept {
  423|  6.51k|        return api::roaring_bitmap_is_subset(&roaring, &r.roaring);
  424|  6.51k|    }
_ZNK7roaring7Roaring14isStrictSubsetERKS0_:
  429|  6.51k|    bool isStrictSubset(const Roaring &r) const noexcept {
  430|  6.51k|        return api::roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
  431|  6.51k|    }
_ZN7roaring7Roaring4flipEmm:
  460|  6.51k|    void flip(uint64_t range_start, uint64_t range_end) noexcept {
  461|  6.51k|        api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
  462|  6.51k|    }
_ZN7roaring7Roaring10flipClosedEjj:
  468|  6.51k|    void flipClosed(uint32_t range_start, uint32_t range_end) noexcept {
  469|  6.51k|        api::roaring_bitmap_flip_inplace_closed(&roaring, range_start,
  470|  6.51k|                                                range_end);
  471|  6.51k|    }
_ZN7roaring7Roaring20removeRunCompressionEv:
  477|  6.51k|    bool removeRunCompression() noexcept {
  478|  6.51k|        return api::roaring_bitmap_remove_run_compression(&roaring);
  479|  6.51k|    }
_ZN7roaring7RoaringC2ERKS0_:
  110|  6.51k|    Roaring(const Roaring &r) : Roaring() {
  111|  6.51k|        if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
  ------------------
  |  Branch (111:13): [True: 0, False: 6.51k]
  ------------------
  112|      0|            ROARING_TERMINATE("failed roaring_bitmap_overwrite in constructor");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  113|      0|        }
  114|  6.51k|        api::roaring_bitmap_set_copy_on_write(
  115|  6.51k|            &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring));
  116|  6.51k|    }
_ZN7roaring7RoaringC2EOS0_:
  122|  6.51k|    Roaring(Roaring &&r) noexcept : roaring(r.roaring) {
  123|       |        //
  124|       |        // !!! This clones the bits of the roaring structure to a new location
  125|       |        // and then overwrites the old bits...assuming that this will still
  126|       |        // work.  There are scenarios where this could break; e.g. if some of
  127|       |        // those bits were pointers into the structure memory itself.  If such
  128|       |        // things were possible, a roaring_bitmap_move() API would be needed.
  129|       |        //
  130|  6.51k|        api::roaring_bitmap_init_cleared(&r.roaring);
  131|  6.51k|    }
_ZN7roaring7RoaringaSERKS0_:
  152|  6.51k|    Roaring &operator=(const Roaring &r) {
  153|  6.51k|        if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
  ------------------
  |  Branch (153:13): [True: 0, False: 6.51k]
  ------------------
  154|      0|            ROARING_TERMINATE("failed memory alloc in assignment");
  ------------------
  |  |   27|      0|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  155|      0|        }
  156|  6.51k|        api::roaring_bitmap_set_copy_on_write(
  157|  6.51k|            &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring));
  158|  6.51k|        return *this;
  159|  6.51k|    }
_ZN7roaring7RoaringaSEOS0_:
  165|  6.51k|    Roaring &operator=(Roaring &&r) noexcept {
  166|  6.51k|        api::roaring_bitmap_clear(&roaring);  // free this class's allocations
  167|       |
  168|       |        // !!! See notes in the Move Constructor regarding roaring_bitmap_move()
  169|       |        //
  170|  6.51k|        roaring = r.roaring;
  171|  6.51k|        api::roaring_bitmap_init_cleared(&r.roaring);
  172|       |
  173|  6.51k|        return *this;
  174|  6.51k|    }
_ZN7roaring7Roaring8readSafeEPKcm:
  700|  13.0k|    static Roaring readSafe(const char *buf, size_t maxbytes) {
  701|  13.0k|        roaring_bitmap_t *r =
  702|  13.0k|            api::roaring_bitmap_portable_deserialize_safe(buf, maxbytes);
  703|  13.0k|        if (r == NULL) {
  ------------------
  |  Branch (703:13): [True: 6.50k, False: 6.52k]
  ------------------
  704|  6.50k|            ROARING_TERMINATE("failed alloc while reading");
  ------------------
  |  |   27|  6.50k|#define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  ------------------
  705|  6.50k|        }
  706|  6.52k|        return Roaring(r);
  707|  13.0k|    }
_ZNK7roaring7Roaring5writeEPcb:
  643|  6.51k|    size_t write(char *buf, bool portable = true) const noexcept {
  644|  6.51k|        if (portable) {
  ------------------
  |  Branch (644:13): [True: 6.51k, False: 0]
  ------------------
  645|  6.51k|            return api::roaring_bitmap_portable_serialize(&roaring, buf);
  646|  6.51k|        } else {
  647|      0|            return api::roaring_bitmap_serialize(&roaring, buf);
  648|      0|        }
  649|  6.51k|    }
_ZNK7roaring7Roaring8toStringEv:
  850|  6.51k|    std::string toString() const noexcept {
  851|  6.51k|        struct iter_data {
  852|  6.51k|            std::string str{};  // The empty constructor silences warnings from
  853|       |                                // pedantic static analyzers.
  854|  6.51k|            char first_char = '{';
  855|  6.51k|        } outer_iter_data;
  856|  6.51k|        if (!isEmpty()) {
  ------------------
  |  Branch (856:13): [True: 6.50k, False: 14]
  ------------------
  857|  6.50k|            iterate(
  858|  6.50k|                [](uint32_t value, void *inner_iter_data) -> bool {
  859|  6.50k|                    ((iter_data *)inner_iter_data)->str +=
  860|  6.50k|                        ((iter_data *)inner_iter_data)->first_char;
  861|  6.50k|                    ((iter_data *)inner_iter_data)->str +=
  862|  6.50k|                        std::to_string(value);
  863|  6.50k|                    ((iter_data *)inner_iter_data)->first_char = ',';
  864|  6.50k|                    return true;
  865|  6.50k|                },
  866|  6.50k|                (void *)&outer_iter_data);
  867|  6.50k|        } else
  868|     14|            outer_iter_data.str = '{';
  869|  6.51k|        outer_iter_data.str += '}';
  870|  6.51k|        return outer_iter_data.str;
  871|  6.51k|    }
_ZNK7roaring7Roaring7iterateEPFbjPvES1_:
  508|  6.50k|    void iterate(api::roaring_iterator iterator, void *ptr) const {
  509|  6.50k|        api::roaring_iterate(&roaring, iterator, ptr);
  510|  6.50k|    }
_ZZNK7roaring7Roaring8toStringEvENKUljPvE_clEjS1_:
  858|  3.91G|                [](uint32_t value, void *inner_iter_data) -> bool {
  859|  3.91G|                    ((iter_data *)inner_iter_data)->str +=
  860|  3.91G|                        ((iter_data *)inner_iter_data)->first_char;
  861|  3.91G|                    ((iter_data *)inner_iter_data)->str +=
  862|  3.91G|                        std::to_string(value);
  863|  3.91G|                    ((iter_data *)inner_iter_data)->first_char = ',';
  864|  3.91G|                    return true;
  865|  3.91G|                },
_ZNK7roaring7Roaring5beginEv:
 1067|  13.0k|inline RoaringSetBitBiDirectionalIterator Roaring::begin() const {
 1068|  13.0k|    return RoaringSetBitBiDirectionalIterator(*this);
 1069|  13.0k|}
_ZN7roaring34RoaringSetBitBiDirectionalIteratorC2ERKNS_7RoaringEb:
  961|  13.0k|                                                bool exhausted = false) {
  962|  13.0k|        if (exhausted) {
  ------------------
  |  Branch (962:13): [True: 1, False: 13.0k]
  ------------------
  963|      1|            i.parent = &parent.roaring;
  964|      1|            i.container_index = INT32_MAX;
  965|      1|            i.has_value = false;
  966|      1|            i.current_value = UINT32_MAX;
  967|  13.0k|        } else {
  968|  13.0k|            api::roaring_iterator_init(&parent.roaring, &i);
  969|  13.0k|        }
  970|  13.0k|    }
_ZNK7roaring34RoaringSetBitBiDirectionalIteratorneERKS0_:
 1058|   342k|    bool operator!=(const RoaringSetBitBiDirectionalIterator &o) const {
 1059|   342k|        return i.current_value != *o || i.has_value != o.i.has_value;
  ------------------
  |  Branch (1059:16): [True: 336k, False: 6.51k]
  |  Branch (1059:41): [True: 0, False: 6.51k]
  ------------------
 1060|   342k|    }
_ZNK7roaring34RoaringSetBitBiDirectionalIteratordeEv:
  975|   342k|    value_type operator*() const { return i.current_value; }
_ZNK7roaring7Roaring3endEv:
 1071|   342k|inline RoaringSetBitBiDirectionalIterator &Roaring::end() const {
 1072|   342k|    static RoaringSetBitBiDirectionalIterator e(*this, true);
 1073|   342k|    return e;
 1074|   342k|}
_ZN7roaring34RoaringSetBitBiDirectionalIteratorppEi:
 1006|   336k|    type_of_iterator operator++(int) {  // i++, must return orig. value
 1007|   336k|        RoaringSetBitBiDirectionalIterator orig(*this);
 1008|   336k|        api::roaring_uint32_iterator_advance(&i);
 1009|   336k|        return orig;
 1010|   336k|    }
_ZN7roaring34RoaringSetBitBiDirectionalIterator13equalorlargerEj:
 1021|  6.51k|    CROARING_DEPRECATED void equalorlarger(uint32_t val) {
 1022|  6.51k|        api::roaring_uint32_iterator_move_equalorlarger(&i, val);
 1023|  6.51k|    }

binarySearch:
   52|  7.44M|                            uint16_t ikey) {
   53|  7.44M|    const int32_t gap = 16;
   54|  7.44M|    if (lenarray < gap) {
  ------------------
  |  Branch (54:9): [True: 2.89M, False: 4.55M]
  ------------------
   55|  4.42M|        for (int32_t j = 0; j < lenarray; j++) {
  ------------------
  |  Branch (55:29): [True: 4.41M, False: 7.63k]
  ------------------
   56|  4.41M|            if (array[j] >= ikey) {
  ------------------
  |  Branch (56:17): [True: 2.89M, False: 1.52M]
  ------------------
   57|  2.89M|                return (array[j] == ikey) ? j : -(j + 1);
  ------------------
  |  Branch (57:24): [True: 2.73M, False: 153k]
  ------------------
   58|  2.89M|            }
   59|  4.41M|        }
   60|  7.63k|        return -(lenarray + 1);
   61|  2.89M|    }
   62|  4.55M|    const int32_t num_blocks = lenarray / gap;
   63|  4.55M|    int32_t base = 0;
   64|  4.55M|    int32_t n = num_blocks;
   65|  9.14M|    while (n > 3) {
  ------------------
  |  Branch (65:12): [True: 4.59M, False: 4.55M]
  ------------------
   66|  4.59M|        int32_t quarter = n >> 2;
   67|       |
   68|  4.59M|        int32_t k1 = array[(base + quarter + 1) * gap - 1];
   69|  4.59M|        int32_t k2 = array[(base + 2 * quarter + 1) * gap - 1];
   70|  4.59M|        int32_t k3 = array[(base + 3 * quarter + 1) * gap - 1];
   71|       |
   72|  4.59M|        int32_t c1 = (k1 < ikey);
   73|  4.59M|        int32_t c2 = (k2 < ikey);
   74|  4.59M|        int32_t c3 = (k3 < ikey);
   75|       |
   76|  4.59M|        base += (c1 + c2 + c3) * quarter;
   77|  4.59M|        n -= 3 * quarter;
   78|  4.59M|    }
   79|  8.42M|    while (n > 1) {
  ------------------
  |  Branch (79:12): [True: 3.87M, False: 4.55M]
  ------------------
   80|  3.87M|        int32_t half = n >> 1;
   81|  3.87M|        base = (array[(base + half + 1) * gap - 1] < ikey) ? base + half : base;
  ------------------
  |  Branch (81:16): [True: 550k, False: 3.32M]
  ------------------
   82|  3.87M|        n -= half;
   83|  3.87M|    }
   84|  4.55M|    int32_t lo = (array[(base + 1) * gap - 1] < ikey) ? base + 1 : base;
  ------------------
  |  Branch (84:18): [True: 2.13M, False: 2.41M]
  ------------------
   85|       |
   86|  4.55M|    if (lo < num_blocks) {
  ------------------
  |  Branch (86:9): [True: 4.22M, False: 329k]
  ------------------
   87|  4.22M|        const int32_t start = lo * gap;
   88|  4.22M|#if defined(CROARING_IS_X64)
   89|       |        // SSE2: subs_epu16 yields zero where lane >= ikey. movemask of an
   90|       |        // epi16 compare gives 2 bits per lane; ctz>>1 = lane index. Scan
   91|       |        // the first 8 lanes first and exit early when they contain the
   92|       |        // answer; otherwise the block-narrowing invariant guarantees the
   93|       |        // second-half mask is non-zero.
   94|  4.22M|        __m128i needle = _mm_set1_epi16((short)ikey);
   95|  4.22M|        __m128i zero = _mm_setzero_si128();
   96|  4.22M|        __m128i v0 = _mm_loadu_si128((const __m128i *)(array + start));
   97|  4.22M|        __m128i ge0 = _mm_cmpeq_epi16(_mm_subs_epu16(needle, v0), zero);
   98|  4.22M|        unsigned m0 = (unsigned)_mm_movemask_epi8(ge0);
   99|  4.22M|        if (m0 != 0) {
  ------------------
  |  Branch (99:13): [True: 2.99M, False: 1.22M]
  ------------------
  100|  2.99M|            int32_t j = start + (int32_t)(roaring_trailing_zeroes(m0) >> 1);
  101|  2.99M|            return (array[j] == ikey) ? j : -(j + 1);
  ------------------
  |  Branch (101:20): [True: 2.22M, False: 777k]
  ------------------
  102|  2.99M|        }
  103|  1.22M|        __m128i v1 = _mm_loadu_si128((const __m128i *)(array + start + 8));
  104|  1.22M|        __m128i ge1 = _mm_cmpeq_epi16(_mm_subs_epu16(needle, v1), zero);
  105|  1.22M|        unsigned m1 = (unsigned)_mm_movemask_epi8(ge1);
  106|  1.22M|        int32_t j = start + 8 + (int32_t)(roaring_trailing_zeroes(m1) >> 1);
  107|  1.22M|        return (array[j] == ikey) ? j : -(j + 1);
  ------------------
  |  Branch (107:16): [True: 532k, False: 689k]
  ------------------
  108|       |#else
  109|       |        const int32_t end = start + gap;
  110|       |        for (int32_t j = start; j < end; j++) {
  111|       |            if (array[j] >= ikey) {
  112|       |                return (array[j] == ikey) ? j : -(j + 1);
  113|       |            }
  114|       |        }
  115|       |        // Unreachable: the narrowing guarantees the last element of the
  116|       |        // selected block is >= ikey.
  117|       |        return -(end + 1);
  118|       |#endif
  119|  4.22M|    }
  120|       |
  121|  1.86M|    for (int32_t j = num_blocks * gap; j < lenarray; j++) {
  ------------------
  |  Branch (121:40): [True: 1.85M, False: 10.9k]
  ------------------
  122|  1.85M|        if (array[j] >= ikey) {
  ------------------
  |  Branch (122:13): [True: 318k, False: 1.53M]
  ------------------
  123|   318k|            return (array[j] == ikey) ? j : -(j + 1);
  ------------------
  |  Branch (123:20): [True: 179k, False: 139k]
  ------------------
  124|   318k|        }
  125|  1.85M|    }
  126|  10.9k|    return -(lenarray + 1);
  127|   329k|}
roaring.c:advanceUntil:
  137|    504|                                   int32_t length, uint16_t min) {
  138|    504|    int32_t lower = pos + 1;
  139|       |
  140|    504|    if ((lower >= length) || (array[lower] >= min)) {
  ------------------
  |  Branch (140:9): [True: 504, False: 0]
  |  Branch (140:30): [True: 0, False: 0]
  ------------------
  141|    504|        return lower;
  142|    504|    }
  143|       |
  144|      0|    int32_t spansize = 1;
  145|       |
  146|      0|    while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  ------------------
  |  Branch (146:12): [True: 0, False: 0]
  |  Branch (146:43): [True: 0, False: 0]
  ------------------
  147|      0|        spansize <<= 1;
  148|      0|    }
  149|      0|    int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  ------------------
  |  Branch (149:21): [True: 0, False: 0]
  ------------------
  150|       |
  151|      0|    if (array[upper] == min) {
  ------------------
  |  Branch (151:9): [True: 0, False: 0]
  ------------------
  152|      0|        return upper;
  153|      0|    }
  154|      0|    if (array[upper] < min) {
  ------------------
  |  Branch (154:9): [True: 0, False: 0]
  ------------------
  155|       |        // means
  156|       |        // array
  157|       |        // has no
  158|       |        // item
  159|       |        // >= min
  160|       |        // pos = array.length;
  161|      0|        return length;
  162|      0|    }
  163|       |
  164|       |    // we know that the next-smallest span was too small
  165|      0|    lower += (spansize >> 1);
  166|       |
  167|      0|    int32_t mid = 0;
  168|      0|    while (lower + 1 != upper) {
  ------------------
  |  Branch (168:12): [True: 0, False: 0]
  ------------------
  169|      0|        mid = (lower + upper) >> 1;
  170|      0|        if (array[mid] == min) {
  ------------------
  |  Branch (170:13): [True: 0, False: 0]
  ------------------
  171|      0|            return mid;
  172|      0|        } else if (array[mid] < min) {
  ------------------
  |  Branch (172:20): [True: 0, False: 0]
  ------------------
  173|      0|            lower = mid;
  174|      0|        } else {
  175|      0|            upper = mid;
  176|      0|        }
  177|      0|    }
  178|      0|    return upper;
  179|      0|}
roaring.c:count_greater:
  197|  9.35k|                                    uint16_t ikey) {
  198|  9.35k|    if (lenarray == 0) return 0;
  ------------------
  |  Branch (198:9): [True: 0, False: 9.35k]
  ------------------
  199|  9.35k|    int32_t pos = binarySearch(array, lenarray, ikey);
  200|  9.35k|    if (pos >= 0) {
  ------------------
  |  Branch (200:9): [True: 2.07k, False: 7.27k]
  ------------------
  201|  2.07k|        return lenarray - (pos + 1);
  202|  7.27k|    } else {
  203|  7.27k|        return lenarray - (-pos - 1);
  204|  7.27k|    }
  205|  9.35k|}
roaring.c:count_less:
  186|  16.0k|                                 uint16_t ikey) {
  187|  16.0k|    if (lenarray == 0) return 0;
  ------------------
  |  Branch (187:9): [True: 3.06k, False: 13.0k]
  ------------------
  188|  13.0k|    int32_t pos = binarySearch(array, lenarray, ikey);
  189|  13.0k|    return pos >= 0 ? pos : -(pos + 1);
  ------------------
  |  Branch (189:12): [True: 9.80k, False: 3.22k]
  ------------------
  190|  16.0k|}
array_util.c:advanceUntil:
  137|  19.8k|                                   int32_t length, uint16_t min) {
  138|  19.8k|    int32_t lower = pos + 1;
  139|       |
  140|  19.8k|    if ((lower >= length) || (array[lower] >= min)) {
  ------------------
  |  Branch (140:9): [True: 22, False: 19.7k]
  |  Branch (140:30): [True: 6.70k, False: 13.0k]
  ------------------
  141|  6.73k|        return lower;
  142|  6.73k|    }
  143|       |
  144|  13.0k|    int32_t spansize = 1;
  145|       |
  146|  57.2k|    while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  ------------------
  |  Branch (146:12): [True: 55.8k, False: 1.38k]
  |  Branch (146:43): [True: 44.1k, False: 11.7k]
  ------------------
  147|  44.1k|        spansize <<= 1;
  148|  44.1k|    }
  149|  13.0k|    int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  ------------------
  |  Branch (149:21): [True: 11.7k, False: 1.38k]
  ------------------
  150|       |
  151|  13.0k|    if (array[upper] == min) {
  ------------------
  |  Branch (151:9): [True: 3.00k, False: 10.0k]
  ------------------
  152|  3.00k|        return upper;
  153|  3.00k|    }
  154|  10.0k|    if (array[upper] < min) {
  ------------------
  |  Branch (154:9): [True: 231, False: 9.84k]
  ------------------
  155|       |        // means
  156|       |        // array
  157|       |        // has no
  158|       |        // item
  159|       |        // >= min
  160|       |        // pos = array.length;
  161|    231|        return length;
  162|    231|    }
  163|       |
  164|       |    // we know that the next-smallest span was too small
  165|  9.84k|    lower += (spansize >> 1);
  166|       |
  167|  9.84k|    int32_t mid = 0;
  168|  30.7k|    while (lower + 1 != upper) {
  ------------------
  |  Branch (168:12): [True: 26.2k, False: 4.48k]
  ------------------
  169|  26.2k|        mid = (lower + upper) >> 1;
  170|  26.2k|        if (array[mid] == min) {
  ------------------
  |  Branch (170:13): [True: 5.36k, False: 20.9k]
  ------------------
  171|  5.36k|            return mid;
  172|  20.9k|        } else if (array[mid] < min) {
  ------------------
  |  Branch (172:20): [True: 10.4k, False: 10.4k]
  ------------------
  173|  10.4k|            lower = mid;
  174|  10.4k|        } else {
  175|  10.4k|            upper = mid;
  176|  10.4k|        }
  177|  26.2k|    }
  178|  4.48k|    return upper;
  179|  9.84k|}
mixed_intersection.c:advanceUntil:
  137|   137k|                                   int32_t length, uint16_t min) {
  138|   137k|    int32_t lower = pos + 1;
  139|       |
  140|   137k|    if ((lower >= length) || (array[lower] >= min)) {
  ------------------
  |  Branch (140:9): [True: 521, False: 136k]
  |  Branch (140:30): [True: 77.9k, False: 58.6k]
  ------------------
  141|  78.4k|        return lower;
  142|  78.4k|    }
  143|       |
  144|  58.6k|    int32_t spansize = 1;
  145|       |
  146|   140k|    while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  ------------------
  |  Branch (146:12): [True: 138k, False: 1.17k]
  |  Branch (146:43): [True: 81.4k, False: 57.4k]
  ------------------
  147|  81.4k|        spansize <<= 1;
  148|  81.4k|    }
  149|  58.6k|    int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  ------------------
  |  Branch (149:21): [True: 57.4k, False: 1.17k]
  ------------------
  150|       |
  151|  58.6k|    if (array[upper] == min) {
  ------------------
  |  Branch (151:9): [True: 28.2k, False: 30.3k]
  ------------------
  152|  28.2k|        return upper;
  153|  28.2k|    }
  154|  30.3k|    if (array[upper] < min) {
  ------------------
  |  Branch (154:9): [True: 568, False: 29.8k]
  ------------------
  155|       |        // means
  156|       |        // array
  157|       |        // has no
  158|       |        // item
  159|       |        // >= min
  160|       |        // pos = array.length;
  161|    568|        return length;
  162|    568|    }
  163|       |
  164|       |    // we know that the next-smallest span was too small
  165|  29.8k|    lower += (spansize >> 1);
  166|       |
  167|  29.8k|    int32_t mid = 0;
  168|  53.2k|    while (lower + 1 != upper) {
  ------------------
  |  Branch (168:12): [True: 36.6k, False: 16.5k]
  ------------------
  169|  36.6k|        mid = (lower + upper) >> 1;
  170|  36.6k|        if (array[mid] == min) {
  ------------------
  |  Branch (170:13): [True: 13.2k, False: 23.4k]
  ------------------
  171|  13.2k|            return mid;
  172|  23.4k|        } else if (array[mid] < min) {
  ------------------
  |  Branch (172:20): [True: 8.76k, False: 14.6k]
  ------------------
  173|  8.76k|            lower = mid;
  174|  14.6k|        } else {
  175|  14.6k|            upper = mid;
  176|  14.6k|        }
  177|  36.6k|    }
  178|  16.5k|    return upper;
  179|  29.8k|}
mixed_subset.c:advanceUntil:
  137|    398|                                   int32_t length, uint16_t min) {
  138|    398|    int32_t lower = pos + 1;
  139|       |
  140|    398|    if ((lower >= length) || (array[lower] >= min)) {
  ------------------
  |  Branch (140:9): [True: 0, False: 398]
  |  Branch (140:30): [True: 202, False: 196]
  ------------------
  141|    202|        return lower;
  142|    202|    }
  143|       |
  144|    196|    int32_t spansize = 1;
  145|       |
  146|    950|    while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  ------------------
  |  Branch (146:12): [True: 872, False: 78]
  |  Branch (146:43): [True: 754, False: 118]
  ------------------
  147|    754|        spansize <<= 1;
  148|    754|    }
  149|    196|    int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  ------------------
  |  Branch (149:21): [True: 118, False: 78]
  ------------------
  150|       |
  151|    196|    if (array[upper] == min) {
  ------------------
  |  Branch (151:9): [True: 0, False: 196]
  ------------------
  152|      0|        return upper;
  153|      0|    }
  154|    196|    if (array[upper] < min) {
  ------------------
  |  Branch (154:9): [True: 10, False: 186]
  ------------------
  155|       |        // means
  156|       |        // array
  157|       |        // has no
  158|       |        // item
  159|       |        // >= min
  160|       |        // pos = array.length;
  161|     10|        return length;
  162|     10|    }
  163|       |
  164|       |    // we know that the next-smallest span was too small
  165|    186|    lower += (spansize >> 1);
  166|       |
  167|    186|    int32_t mid = 0;
  168|    666|    while (lower + 1 != upper) {
  ------------------
  |  Branch (168:12): [True: 480, False: 186]
  ------------------
  169|    480|        mid = (lower + upper) >> 1;
  170|    480|        if (array[mid] == min) {
  ------------------
  |  Branch (170:13): [True: 0, False: 480]
  ------------------
  171|      0|            return mid;
  172|    480|        } else if (array[mid] < min) {
  ------------------
  |  Branch (172:20): [True: 260, False: 220]
  ------------------
  173|    260|            lower = mid;
  174|    260|        } else {
  175|    220|            upper = mid;
  176|    220|        }
  177|    480|    }
  178|    186|    return upper;
  179|    186|}
mixed_andnot.c:advanceUntil:
  137|  96.7k|                                   int32_t length, uint16_t min) {
  138|  96.7k|    int32_t lower = pos + 1;
  139|       |
  140|  96.7k|    if ((lower >= length) || (array[lower] >= min)) {
  ------------------
  |  Branch (140:9): [True: 5.20k, False: 91.5k]
  |  Branch (140:30): [True: 78.6k, False: 12.9k]
  ------------------
  141|  83.8k|        return lower;
  142|  83.8k|    }
  143|       |
  144|  12.9k|    int32_t spansize = 1;
  145|       |
  146|  26.3k|    while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  ------------------
  |  Branch (146:12): [True: 25.8k, False: 514]
  |  Branch (146:43): [True: 13.4k, False: 12.4k]
  ------------------
  147|  13.4k|        spansize <<= 1;
  148|  13.4k|    }
  149|  12.9k|    int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  ------------------
  |  Branch (149:21): [True: 12.4k, False: 514]
  ------------------
  150|       |
  151|  12.9k|    if (array[upper] == min) {
  ------------------
  |  Branch (151:9): [True: 6.24k, False: 6.67k]
  ------------------
  152|  6.24k|        return upper;
  153|  6.24k|    }
  154|  6.67k|    if (array[upper] < min) {
  ------------------
  |  Branch (154:9): [True: 476, False: 6.19k]
  ------------------
  155|       |        // means
  156|       |        // array
  157|       |        // has no
  158|       |        // item
  159|       |        // >= min
  160|       |        // pos = array.length;
  161|    476|        return length;
  162|    476|    }
  163|       |
  164|       |    // we know that the next-smallest span was too small
  165|  6.19k|    lower += (spansize >> 1);
  166|       |
  167|  6.19k|    int32_t mid = 0;
  168|  9.65k|    while (lower + 1 != upper) {
  ------------------
  |  Branch (168:12): [True: 5.67k, False: 3.97k]
  ------------------
  169|  5.67k|        mid = (lower + upper) >> 1;
  170|  5.67k|        if (array[mid] == min) {
  ------------------
  |  Branch (170:13): [True: 2.22k, False: 3.45k]
  ------------------
  171|  2.22k|            return mid;
  172|  3.45k|        } else if (array[mid] < min) {
  ------------------
  |  Branch (172:20): [True: 1.22k, False: 2.22k]
  ------------------
  173|  1.22k|            lower = mid;
  174|  2.22k|        } else {
  175|  2.22k|            upper = mid;
  176|  2.22k|        }
  177|  5.67k|    }
  178|  3.97k|    return upper;
  179|  6.19k|}

roaring.c:bitset_lenrange_cardinality:
   63|  1.35k|                                              uint32_t lenminusone) {
   64|  1.35k|    uint32_t firstword = start / 64;
   65|  1.35k|    uint32_t endword = (start + lenminusone) / 64;
   66|  1.35k|    if (firstword == endword) {
  ------------------
  |  Branch (66:9): [True: 957, False: 395]
  ------------------
   67|    957|        return roaring_hamming(words[firstword] &
   68|    957|                               ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
   69|    957|                                   << (start % 64));
   70|    957|    }
   71|    395|    int answer =
   72|    395|        roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
   73|   197k|    for (uint32_t i = firstword + 1; i < endword; i++) {
  ------------------
  |  Branch (73:38): [True: 197k, False: 395]
  ------------------
   74|   197k|        answer += roaring_hamming(words[i]);
   75|   197k|    }
   76|    395|    answer += roaring_hamming(words[endword] &
   77|       |                              (~UINT64_C(0)) >>
   78|    395|                                  (((~start + 1) - lenminusone - 1) % 64));
   79|    395|    return answer;
   80|  1.35k|}
roaring.c:bitset_reset_range:
  147|  1.31k|                                      uint32_t end) {
  148|  1.31k|    if (start == end) return;
  ------------------
  |  Branch (148:9): [True: 0, False: 1.31k]
  ------------------
  149|  1.31k|    uint32_t firstword = start / 64;
  150|  1.31k|    uint32_t endword = (end - 1) / 64;
  151|  1.31k|    if (firstword == endword) {
  ------------------
  |  Branch (151:9): [True: 957, False: 361]
  ------------------
  152|    957|        words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) &
  153|    957|                              ((~UINT64_C(0)) >> ((~end + 1) % 64)));
  154|    957|        return;
  155|    957|    }
  156|    361|    words[firstword] &= ~((~UINT64_C(0)) << (start % 64));
  157|   169k|    for (uint32_t i = firstword + 1; i < endword; i++) {
  ------------------
  |  Branch (157:38): [True: 168k, False: 361]
  ------------------
  158|   168k|        words[i] = UINT64_C(0);
  159|   168k|    }
  160|       |    words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
  161|    361|}
roaring.c:bitset_set_lenrange:
  112|  1.54k|                                       uint32_t lenminusone) {
  113|  1.54k|    uint32_t firstword = start / 64;
  114|  1.54k|    uint32_t endword = (start + lenminusone) / 64;
  115|  1.54k|    if (firstword == endword) {
  ------------------
  |  Branch (115:9): [True: 0, False: 1.54k]
  ------------------
  116|      0|        words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
  117|      0|                            << (start % 64);
  118|      0|        return;
  119|      0|    }
  120|  1.54k|    uint64_t temp = words[endword];
  121|  1.54k|    words[firstword] |= (~UINT64_C(0)) << (start % 64);
  122|   454k|    for (uint32_t i = firstword + 1; i < endword; i += 2)
  ------------------
  |  Branch (122:38): [True: 453k, False: 1.54k]
  ------------------
  123|   453k|        words[i] = words[i + 1] = ~UINT64_C(0);
  124|  1.54k|    words[endword] =
  125|       |        temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
  126|  1.54k|}
bitset.c:avx2_harley_seal_popcount256:
  340|  3.44k|                                                    const uint64_t size) {
  341|  3.44k|    __m256i total = _mm256_setzero_si256();
  342|  3.44k|    __m256i ones = _mm256_setzero_si256();
  343|  3.44k|    __m256i twos = _mm256_setzero_si256();
  344|  3.44k|    __m256i fours = _mm256_setzero_si256();
  345|  3.44k|    __m256i eights = _mm256_setzero_si256();
  346|  3.44k|    __m256i sixteens = _mm256_setzero_si256();
  347|  3.44k|    __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;
  348|       |
  349|  3.44k|    const uint64_t limit = size - size % 16;
  350|  3.44k|    uint64_t i = 0;
  351|       |
  352|  58.6k|    for (; i < limit; i += 16) {
  ------------------
  |  Branch (352:12): [True: 55.1k, False: 3.44k]
  ------------------
  353|  55.1k|        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i),
  354|  55.1k|            _mm256_lddqu_si256(data + i + 1));
  355|  55.1k|        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2),
  356|  55.1k|            _mm256_lddqu_si256(data + i + 3));
  357|  55.1k|        CSA(&foursA, &twos, twos, twosA, twosB);
  358|  55.1k|        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4),
  359|  55.1k|            _mm256_lddqu_si256(data + i + 5));
  360|  55.1k|        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6),
  361|  55.1k|            _mm256_lddqu_si256(data + i + 7));
  362|  55.1k|        CSA(&foursB, &twos, twos, twosA, twosB);
  363|  55.1k|        CSA(&eightsA, &fours, fours, foursA, foursB);
  364|  55.1k|        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8),
  365|  55.1k|            _mm256_lddqu_si256(data + i + 9));
  366|  55.1k|        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10),
  367|  55.1k|            _mm256_lddqu_si256(data + i + 11));
  368|  55.1k|        CSA(&foursA, &twos, twos, twosA, twosB);
  369|  55.1k|        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12),
  370|  55.1k|            _mm256_lddqu_si256(data + i + 13));
  371|  55.1k|        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14),
  372|  55.1k|            _mm256_lddqu_si256(data + i + 15));
  373|  55.1k|        CSA(&foursB, &twos, twos, twosA, twosB);
  374|  55.1k|        CSA(&eightsB, &fours, fours, foursA, foursB);
  375|  55.1k|        CSA(&sixteens, &eights, eights, eightsA, eightsB);
  376|       |
  377|  55.1k|        total = _mm256_add_epi64(total, popcount256(sixteens));
  378|  55.1k|    }
  379|       |
  380|  3.44k|    total = _mm256_slli_epi64(total, 4);  // * 16
  381|  3.44k|    total = _mm256_add_epi64(
  382|  3.44k|        total, _mm256_slli_epi64(popcount256(eights), 3));  // += 8 * ...
  383|  3.44k|    total = _mm256_add_epi64(
  384|  3.44k|        total, _mm256_slli_epi64(popcount256(fours), 2));  // += 4 * ...
  385|  3.44k|    total = _mm256_add_epi64(
  386|  3.44k|        total, _mm256_slli_epi64(popcount256(twos), 1));  // += 2 * ...
  387|  3.44k|    total = _mm256_add_epi64(total, popcount256(ones));
  388|  3.44k|    for (; i < size; i++)
  ------------------
  |  Branch (388:12): [True: 0, False: 3.44k]
  ------------------
  389|      0|        total =
  390|      0|            _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i)));
  391|       |
  392|  3.44k|    return (uint64_t)(_mm256_extract_epi64(total, 0)) +
  393|  3.44k|           (uint64_t)(_mm256_extract_epi64(total, 1)) +
  394|  3.44k|           (uint64_t)(_mm256_extract_epi64(total, 2)) +
  395|       |           (uint64_t)(_mm256_extract_epi64(total, 3));
  396|  3.44k|}
bitset.c:CSA:
  328|  1.55M|                       __m256i c) {
  329|  1.55M|    const __m256i u = _mm256_xor_si256(a, b);
  330|  1.55M|    *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
  331|  1.55M|    *l = _mm256_xor_si256(u, c);
  332|  1.55M|}
bitset.c:popcount256:
  292|   129k|static inline __m256i popcount256(__m256i v) {
  293|   129k|    const __m256i lookuppos = _mm256_setr_epi8(
  294|   129k|        /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
  295|   129k|        /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
  296|   129k|        /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
  297|   129k|        /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4,
  298|       |
  299|       |        /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
  300|   129k|        /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
  301|   129k|        /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
  302|   129k|        /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4);
  303|   129k|    const __m256i lookupneg = _mm256_setr_epi8(
  304|   129k|        /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
  305|   129k|        /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
  306|   129k|        /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
  307|   129k|        /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4,
  308|       |
  309|       |        /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
  310|   129k|        /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
  311|   129k|        /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
  312|   129k|        /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4);
  313|   129k|    const __m256i low_mask = _mm256_set1_epi8(0x0f);
  314|       |
  315|   129k|    const __m256i lo = _mm256_and_si256(v, low_mask);
  316|   129k|    const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask);
  317|   129k|    const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo);
  318|   129k|    const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
  319|   129k|    return _mm256_sad_epu8(popcnt1, popcnt2);
  320|   129k|}
convert.c:bitset_set_lenrange:
  112|   107k|                                       uint32_t lenminusone) {
  113|   107k|    uint32_t firstword = start / 64;
  114|   107k|    uint32_t endword = (start + lenminusone) / 64;
  115|   107k|    if (firstword == endword) {
  ------------------
  |  Branch (115:9): [True: 34.9k, False: 72.9k]
  ------------------
  116|  34.9k|        words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
  117|  34.9k|                            << (start % 64);
  118|  34.9k|        return;
  119|  34.9k|    }
  120|  72.9k|    uint64_t temp = words[endword];
  121|  72.9k|    words[firstword] |= (~UINT64_C(0)) << (start % 64);
  122|  36.0M|    for (uint32_t i = firstword + 1; i < endword; i += 2)
  ------------------
  |  Branch (122:38): [True: 35.9M, False: 72.9k]
  ------------------
  123|  35.9M|        words[i] = words[i + 1] = ~UINT64_C(0);
  124|  72.9k|    words[endword] =
  125|       |        temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
  126|  72.9k|}
mixed_intersection.c:bitset_lenrange_cardinality:
   63|   217k|                                              uint32_t lenminusone) {
   64|   217k|    uint32_t firstword = start / 64;
   65|   217k|    uint32_t endword = (start + lenminusone) / 64;
   66|   217k|    if (firstword == endword) {
  ------------------
  |  Branch (66:9): [True: 211k, False: 6.12k]
  ------------------
   67|   211k|        return roaring_hamming(words[firstword] &
   68|   211k|                               ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
   69|   211k|                                   << (start % 64));
   70|   211k|    }
   71|  6.12k|    int answer =
   72|  6.12k|        roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
   73|  6.40k|    for (uint32_t i = firstword + 1; i < endword; i++) {
  ------------------
  |  Branch (73:38): [True: 276, False: 6.12k]
  ------------------
   74|    276|        answer += roaring_hamming(words[i]);
   75|    276|    }
   76|  6.12k|    answer += roaring_hamming(words[endword] &
   77|       |                              (~UINT64_C(0)) >>
   78|  6.12k|                                  (((~start + 1) - lenminusone - 1) % 64));
   79|  6.12k|    return answer;
   80|   217k|}
mixed_intersection.c:bitset_lenrange_empty:
   86|  4.53k|                                         uint32_t lenminusone) {
   87|  4.53k|    uint32_t firstword = start / 64;
   88|  4.53k|    uint32_t endword = (start + lenminusone) / 64;
   89|  4.53k|    if (firstword == endword) {
  ------------------
  |  Branch (89:9): [True: 4.23k, False: 298]
  ------------------
   90|  4.23k|        return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
   91|  4.23k|                                       << (start % 64)) == 0;
   92|  4.23k|    }
   93|    298|    if (((words[firstword] & ((~UINT64_C(0)) << (start % 64)))) != 0) {
  ------------------
  |  Branch (93:9): [True: 105, False: 193]
  ------------------
   94|    105|        return false;
   95|    105|    }
   96|    204|    for (uint32_t i = firstword + 1; i < endword; i++) {
  ------------------
  |  Branch (96:38): [True: 13, False: 191]
  ------------------
   97|     13|        if (words[i] != 0) {
  ------------------
  |  Branch (97:13): [True: 2, False: 11]
  ------------------
   98|      2|            return false;
   99|      2|        }
  100|     13|    }
  101|    191|    if ((words[endword] &
  ------------------
  |  Branch (101:9): [True: 104, False: 87]
  ------------------
  102|    191|         (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) {
  103|    104|        return false;
  104|    104|    }
  105|     87|    return true;
  106|    191|}
mixed_union.c:bitset_set_lenrange:
  112|   108k|                                       uint32_t lenminusone) {
  113|   108k|    uint32_t firstword = start / 64;
  114|   108k|    uint32_t endword = (start + lenminusone) / 64;
  115|   108k|    if (firstword == endword) {
  ------------------
  |  Branch (115:9): [True: 105k, False: 3.06k]
  ------------------
  116|   105k|        words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
  117|   105k|                            << (start % 64);
  118|   105k|        return;
  119|   105k|    }
  120|  3.06k|    uint64_t temp = words[endword];
  121|  3.06k|    words[firstword] |= (~UINT64_C(0)) << (start % 64);
  122|  3.17k|    for (uint32_t i = firstword + 1; i < endword; i += 2)
  ------------------
  |  Branch (122:38): [True: 108, False: 3.06k]
  ------------------
  123|    108|        words[i] = words[i + 1] = ~UINT64_C(0);
  124|  3.06k|    words[endword] =
  125|       |        temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
  126|  3.06k|}
mixed_negation.c:bitset_flip_range:
  132|  1.54k|                                     uint32_t end) {
  133|  1.54k|    if (start == end) return;
  ------------------
  |  Branch (133:9): [True: 0, False: 1.54k]
  ------------------
  134|  1.54k|    uint32_t firstword = start / 64;
  135|  1.54k|    uint32_t endword = (end - 1) / 64;
  136|  1.54k|    words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
  137|   199k|    for (uint32_t i = firstword; i < endword; i++) {
  ------------------
  |  Branch (137:34): [True: 197k, False: 1.54k]
  ------------------
  138|   197k|        words[i] = ~words[i];
  139|   197k|    }
  140|       |    words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
  141|  1.54k|}
mixed_xor.c:bitset_flip_range:
  132|  58.0k|                                     uint32_t end) {
  133|  58.0k|    if (start == end) return;
  ------------------
  |  Branch (133:9): [True: 0, False: 58.0k]
  ------------------
  134|  58.0k|    uint32_t firstword = start / 64;
  135|  58.0k|    uint32_t endword = (end - 1) / 64;
  136|  58.0k|    words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
  137|   120k|    for (uint32_t i = firstword; i < endword; i++) {
  ------------------
  |  Branch (137:34): [True: 62.6k, False: 58.0k]
  ------------------
  138|  62.6k|        words[i] = ~words[i];
  139|  62.6k|    }
  140|       |    words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
  141|  58.0k|}

array_container_contains:
  343|  3.38k|                                     uint16_t pos) {
  344|       |    /**
  345|       |     * SIMD Quad algorithm
  346|       |     * Daniel Lemire, "You can beat the binary search," in Daniel Lemire's blog,
  347|       |     *  April 27, 2026,
  348|       |     * https://lemire.me/blog/2026/04/27/you-can-beat-the-binary-search/.
  349|       |     */
  350|  3.38k|    const int32_t gap = 16;
  351|  3.38k|    const uint16_t *carr = arr->array;
  352|  3.38k|    int32_t cardinality = arr->cardinality;
  353|  3.38k|    if (cardinality < gap) {
  ------------------
  |  Branch (353:9): [True: 975, False: 2.41k]
  ------------------
  354|  1.07k|        for (int32_t j = 0; j < cardinality; j++) {
  ------------------
  |  Branch (354:29): [True: 1.03k, False: 32]
  ------------------
  355|  1.03k|            if (carr[j] >= pos) return carr[j] == pos;
  ------------------
  |  Branch (355:17): [True: 943, False: 95]
  ------------------
  356|  1.03k|        }
  357|     32|        return false;
  358|    975|    }
  359|  2.41k|    int32_t num_blocks = cardinality / gap;
  360|  2.41k|    int32_t base = 0;
  361|  2.41k|    int32_t n = num_blocks;
  362|  5.60k|    while (n > 3) {
  ------------------
  |  Branch (362:12): [True: 3.18k, False: 2.41k]
  ------------------
  363|  3.18k|        int32_t quarter = n >> 2;
  364|       |
  365|  3.18k|        int32_t k1 = carr[(base + quarter + 1) * gap - 1];
  366|  3.18k|        int32_t k2 = carr[(base + 2 * quarter + 1) * gap - 1];
  367|  3.18k|        int32_t k3 = carr[(base + 3 * quarter + 1) * gap - 1];
  368|       |
  369|  3.18k|        int32_t c1 = (k1 < pos);
  370|  3.18k|        int32_t c2 = (k2 < pos);
  371|  3.18k|        int32_t c3 = (k3 < pos);
  372|       |
  373|  3.18k|        base += (c1 + c2 + c3) * quarter;
  374|  3.18k|        n -= 3 * quarter;
  375|  3.18k|    }
  376|  4.23k|    while (n > 1) {
  ------------------
  |  Branch (376:12): [True: 1.81k, False: 2.41k]
  ------------------
  377|  1.81k|        int32_t half = n >> 1;
  378|  1.81k|        base = (carr[(base + half + 1) * gap - 1] < pos) ? base + half : base;
  ------------------
  |  Branch (378:16): [True: 69, False: 1.75k]
  ------------------
  379|  1.81k|        n -= half;
  380|  1.81k|    }
  381|  2.41k|    int32_t lo = (carr[(base + 1) * gap - 1] < pos) ? base + 1 : base;
  ------------------
  |  Branch (381:18): [True: 126, False: 2.28k]
  ------------------
  382|       |
  383|  2.41k|    if (lo < num_blocks) {
  ------------------
  |  Branch (383:9): [True: 2.33k, False: 75]
  ------------------
  384|  2.33k|        const uint16_t *blk = carr + lo * gap;
  385|       |#ifdef CROARING_USENEON
  386|       |        uint16x8_t needle = vdupq_n_u16(pos);
  387|       |        uint16x8_t v0 = vld1q_u16(blk);
  388|       |        uint16x8_t v1 = vld1q_u16(blk + 8);
  389|       |        uint16x8_t hit =
  390|       |            vorrq_u16(vceqq_u16(v0, needle), vceqq_u16(v1, needle));
  391|       |        return vmaxvq_u16(hit) != 0;
  392|       |#elif defined(CROARING_IS_X64)
  393|       |        __m128i needle = _mm_set1_epi16((short)pos);
  394|  2.33k|        __m128i v0 = _mm_loadu_si128((const __m128i *)blk);
  395|  2.33k|        __m128i v1 = _mm_loadu_si128((const __m128i *)(blk + 8));
  396|  2.33k|        __m128i hit = _mm_or_si128(_mm_cmpeq_epi16(v0, needle),
  397|  2.33k|                                   _mm_cmpeq_epi16(v1, needle));
  398|  2.33k|        return _mm_movemask_epi8(hit) != 0;
  399|       |#else
  400|       |        for (int32_t j = 0; j < gap; j++) {
  401|       |            if (blk[j] >= pos) return blk[j] == pos;
  402|       |        }
  403|       |        return false;
  404|       |#endif
  405|  2.33k|    }
  406|       |
  407|    606|    for (int32_t j = num_blocks * gap; j < cardinality; j++) {
  ------------------
  |  Branch (407:40): [True: 554, False: 52]
  ------------------
  408|    554|        uint16_t v = carr[j];
  409|    554|        if (v >= pos) return (v == pos);
  ------------------
  |  Branch (409:13): [True: 23, False: 531]
  ------------------
  410|    554|    }
  411|     52|    return false;
  412|     75|}
array_container_minimum:
  445|  3.67k|inline uint16_t array_container_minimum(const array_container_t *arr) {
  446|  3.67k|    if (arr->cardinality == 0) return 0;
  ------------------
  |  Branch (446:9): [True: 0, False: 3.67k]
  ------------------
  447|  3.67k|    return arr->array[0];
  448|  3.67k|}
array_container_maximum:
  451|  13.5k|inline uint16_t array_container_maximum(const array_container_t *arr) {
  452|  13.5k|    if (arr->cardinality == 0) return 0;
  ------------------
  |  Branch (452:9): [True: 0, False: 13.5k]
  ------------------
  453|  13.5k|    return arr->array[arr->cardinality - 1];
  454|  13.5k|}
array_container_rank:
  457|  3.87k|inline int array_container_rank(const array_container_t *arr, uint16_t x) {
  458|  3.87k|    const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
  459|  3.87k|    const bool is_present = idx >= 0;
  460|  3.87k|    if (is_present) {
  ------------------
  |  Branch (460:9): [True: 3.26k, False: 606]
  ------------------
  461|  3.26k|        return idx + 1;
  462|  3.26k|    } else {
  463|    606|        return -idx - 1;
  464|    606|    }
  465|  3.87k|}
array_container_index_equalorlarger:
  507|  4.54k|                                               uint16_t x) {
  508|  4.54k|    const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
  509|  4.54k|    const bool is_present = idx >= 0;
  510|  4.54k|    if (is_present) {
  ------------------
  |  Branch (510:9): [True: 4.15k, False: 395]
  ------------------
  511|  4.15k|        return idx;
  512|  4.15k|    } else {
  513|    395|        int32_t candidate = -idx - 1;
  514|    395|        if (candidate < arr->cardinality) return candidate;
  ------------------
  |  Branch (514:13): [True: 395, False: 0]
  ------------------
  515|      0|        return -1;
  516|    395|    }
  517|  4.54k|}
roaring.c:array_container_add_range_nvals:
  527|    609|                                                   int32_t nvals_greater) {
  528|    609|    int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
  529|    609|    if (union_cardinality > array->capacity) {
  ------------------
  |  Branch (529:9): [True: 561, False: 48]
  ------------------
  530|    561|        array_container_grow(array, union_cardinality, true);
  531|    561|    }
  532|    609|    memmove(&(array->array[union_cardinality - nvals_greater]),
  533|    609|            &(array->array[array->cardinality - nvals_greater]),
  534|    609|            nvals_greater * sizeof(uint16_t));
  535|  1.49M|    for (uint32_t i = 0; i <= max - min; i++) {
  ------------------
  |  Branch (535:26): [True: 1.49M, False: 609]
  ------------------
  536|  1.49M|        array->array[nvals_less + i] = (uint16_t)(min + i);
  537|  1.49M|    }
  538|    609|    array->cardinality = union_cardinality;
  539|    609|}
roaring.c:array_container_remove_range:
  557|  3.27k|                                                uint32_t pos, uint32_t count) {
  558|  3.27k|    if (count != 0) {
  ------------------
  |  Branch (558:9): [True: 340, False: 2.93k]
  ------------------
  559|    340|        memmove(&(array->array[pos]), &(array->array[pos + count]),
  560|    340|                (array->cardinality - pos - count) * sizeof(uint16_t));
  561|    340|        array->cardinality -= count;
  562|    340|    }
  563|  3.27k|}
roaring.c:array_container_cardinality:
   82|  73.1k|static inline int array_container_cardinality(const array_container_t *array) {
   83|  73.1k|    return array->cardinality;
   84|  73.1k|}
roaring.c:array_container_try_add:
  293|  7.43M|                                          int32_t max_cardinality) {
  294|  7.43M|    const int32_t cardinality = arr->cardinality;
  295|       |
  296|       |    // best case, we can append.
  297|  7.43M|    if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) &&
  ------------------
  |  Branch (297:10): [True: 16.4k, False: 7.41M]
  |  Branch (297:40): [True: 52.9k, False: 7.36M]
  ------------------
  298|  69.3k|        cardinality < max_cardinality) {
  ------------------
  |  Branch (298:9): [True: 69.3k, False: 0]
  ------------------
  299|  69.3k|        array_container_append(arr, value);
  300|  69.3k|        return 1;
  301|  69.3k|    }
  302|       |
  303|  7.36M|    const int32_t loc = binarySearch(arr->array, cardinality, value);
  304|       |
  305|  7.36M|    if (loc >= 0) {
  ------------------
  |  Branch (305:9): [True: 5.62M, False: 1.73M]
  ------------------
  306|  5.62M|        return 0;
  307|  5.62M|    } else if (cardinality < max_cardinality) {
  ------------------
  |  Branch (307:16): [True: 1.73M, False: 127]
  ------------------
  308|  1.73M|        if (array_container_full(arr)) {
  ------------------
  |  Branch (308:13): [True: 77.8k, False: 1.65M]
  ------------------
  309|  77.8k|            array_container_grow(arr, arr->capacity + 1, true);
  310|  77.8k|        }
  311|  1.73M|        const int32_t insert_idx = -loc - 1;
  312|  1.73M|        memmove(arr->array + insert_idx + 1, arr->array + insert_idx,
  313|  1.73M|                (cardinality - insert_idx) * sizeof(uint16_t));
  314|  1.73M|        arr->array[insert_idx] = value;
  315|  1.73M|        arr->cardinality++;
  316|  1.73M|        return 1;
  317|  1.73M|    } else {
  318|    127|        return -1;
  319|    127|    }
  320|  7.36M|}
roaring.c:array_container_empty:
  100|  7.43M|static inline bool array_container_empty(const array_container_t *array) {
  101|  7.43M|    return array->cardinality == 0;
  102|  7.43M|}
roaring.c:array_container_append:
  274|  69.3k|                                          uint16_t pos) {
  275|  69.3k|    const int32_t capacity = arr->capacity;
  276|       |
  277|  69.3k|    if (array_container_full(arr)) {
  ------------------
  |  Branch (277:9): [True: 33.2k, False: 36.0k]
  ------------------
  278|  33.2k|        array_container_grow(arr, capacity + 1, true);
  279|  33.2k|    }
  280|       |
  281|  69.3k|    arr->array[arr->cardinality++] = pos;
  282|  69.3k|}
roaring.c:array_container_full:
  106|  1.80M|static inline bool array_container_full(const array_container_t *array) {
  107|  1.80M|    return array->cardinality == array->capacity;
  108|  1.80M|}
roaring.c:array_container_remove:
  329|  6.21k|                                          uint16_t pos) {
  330|  6.21k|    const int32_t idx = binarySearch(arr->array, arr->cardinality, pos);
  331|  6.21k|    const bool is_present = idx >= 0;
  332|  6.21k|    if (is_present) {
  ------------------
  |  Branch (332:9): [True: 3.12k, False: 3.08k]
  ------------------
  333|  3.12k|        memmove(arr->array + idx, arr->array + idx + 1,
  334|  3.12k|                (arr->cardinality - idx - 1) * sizeof(uint16_t));
  335|  3.12k|        arr->cardinality--;
  336|  3.12k|    }
  337|       |
  338|  6.21k|    return is_present;
  339|  6.21k|}
roaring.c:array_container_nonzero_cardinality:
   87|   105k|    const array_container_t *array) {
   88|   105k|    return array->cardinality > 0;
   89|   105k|}
roaring.c:array_container_equals:
  230|  9.78k|                                          const array_container_t *container2) {
  231|  9.78k|    if (container1->cardinality != container2->cardinality) {
  ------------------
  |  Branch (231:9): [True: 2.31k, False: 7.46k]
  ------------------
  232|  2.31k|        return false;
  233|  2.31k|    }
  234|  7.46k|    return memequals(container1->array, container2->array,
  235|  7.46k|                     container1->cardinality * 2);
  236|  9.78k|}
roaring.c:array_container_select:
  252|  3.87k|                                          uint32_t *element) {
  253|  3.87k|    int card = array_container_cardinality(container);
  254|  3.87k|    if (*start_rank + card <= rank) {
  ------------------
  |  Branch (254:9): [True: 342, False: 3.53k]
  ------------------
  255|    342|        *start_rank += card;
  256|    342|        return false;
  257|  3.53k|    } else {
  258|  3.53k|        *element = container->array[rank - *start_rank];
  259|       |        return true;
  260|  3.53k|    }
  261|  3.87k|}
roaring.c:array_container_contains_range:
  421|     94|                                                  uint32_t range_end) {
  422|     94|    const int32_t range_count = range_end - range_start;
  423|     94|    const uint16_t rs_included = (uint16_t)range_start;
  424|     94|    const uint16_t re_included = (uint16_t)(range_end - 1);
  425|       |
  426|       |    // Empty range is always included
  427|     94|    if (range_count <= 0) {
  ------------------
  |  Branch (427:9): [True: 0, False: 94]
  ------------------
  428|      0|        return true;
  429|      0|    }
  430|     94|    if (range_count > arr->cardinality) {
  ------------------
  |  Branch (430:9): [True: 41, False: 53]
  ------------------
  431|     41|        return false;
  432|     41|    }
  433|       |
  434|     53|    const int32_t start =
  435|     53|        binarySearch(arr->array, arr->cardinality, rs_included);
  436|       |    // If this sorted array contains all items in the range:
  437|       |    // * the start item must be found
  438|       |    // * the last item in range range_count must exist, and be the expected end
  439|       |    // value
  440|     53|    return (start >= 0) && (arr->cardinality >= start + range_count) &&
  ------------------
  |  Branch (440:12): [True: 35, False: 18]
  |  Branch (440:28): [True: 29, False: 6]
  ------------------
  441|     29|           (arr->array[start + range_count - 1] == re_included);
  ------------------
  |  Branch (441:12): [True: 8, False: 21]
  ------------------
  442|     94|}
roaring_array.c:array_container_size_in_bytes:
  221|  18.5k|    const array_container_t *container) {
  222|  18.5k|    return container->cardinality * sizeof(uint16_t);
  223|  18.5k|}
roaring_array.c:array_container_cardinality:
   82|  7.31k|static inline int array_container_cardinality(const array_container_t *array) {
   83|  7.31k|    return array->cardinality;
   84|  7.31k|}
array.c:array_container_size_in_bytes:
  221|  14.6k|    const array_container_t *container) {
  222|  14.6k|    return container->cardinality * sizeof(uint16_t);
  223|  14.6k|}
convert.c:array_container_cardinality:
   82|  15.3k|static inline int array_container_cardinality(const array_container_t *array) {
   83|  15.3k|    return array->cardinality;
   84|  15.3k|}
convert.c:array_container_serialized_size_in_bytes:
  173|   151k|static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
  174|   151k|    return card * sizeof(uint16_t);
  175|   151k|}
mixed_intersection.c:array_container_empty:
  100|     30|static inline bool array_container_empty(const array_container_t *array) {
  101|     30|    return array->cardinality == 0;
  102|     30|}
mixed_andnot.c:array_container_cardinality:
   82|    378|static inline int array_container_cardinality(const array_container_t *array) {
   83|    378|    return array->cardinality;
   84|    378|}

bitset_container_get:
  193|  1.08M|                                 uint16_t pos) {
  194|  1.08M|    const uint64_t word = bitset->words[pos >> 6];
  195|  1.08M|    return (word >> (pos & 63)) & 1;
  196|  1.08M|}
bitset_container_contains:
  232|   943k|                                      uint16_t pos) {
  233|   943k|    return bitset_container_get(bitset, pos);
  234|   943k|}
roaring.c:bitset_container_cardinality:
  248|  11.1k|    const bitset_container_t *bitset) {
  249|  11.1k|    return bitset->cardinality;
  250|  11.1k|}
roaring.c:bitset_container_set:
  147|   325k|                                        uint16_t pos) {
  148|   325k|    const uint64_t old_word = bitset->words[pos >> 6];
  149|   325k|    const int index = pos & 63;
  150|       |    const uint64_t new_word = old_word | (UINT64_C(1) << index);
  151|   325k|    bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
  152|   325k|    bitset->words[pos >> 6] = new_word;
  153|   325k|}
roaring.c:bitset_container_add:
  168|    127|                                        uint16_t pos) {
  169|    127|    const uint64_t old_word = bitset->words[pos >> 6];
  170|    127|    const int index = pos & 63;
  171|       |    const uint64_t new_word = old_word | (UINT64_C(1) << index);
  172|    127|    const uint64_t increment = (old_word ^ new_word) >> index;
  173|    127|    bitset->cardinality += (uint32_t)increment;
  174|    127|    bitset->words[pos >> 6] = new_word;
  175|    127|    return increment > 0;
  176|    127|}
roaring.c:bitset_container_remove:
  181|  2.26k|                                           uint16_t pos) {
  182|  2.26k|    const uint64_t old_word = bitset->words[pos >> 6];
  183|  2.26k|    const int index = pos & 63;
  184|       |    const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
  185|  2.26k|    const uint64_t increment = (old_word ^ new_word) >> index;
  186|  2.26k|    bitset->cardinality -= (uint32_t)increment;
  187|  2.26k|    bitset->words[pos >> 6] = new_word;
  188|  2.26k|    return increment > 0;
  189|  2.26k|}
roaring.c:bitset_container_const_nonzero_cardinality:
  281|  6.98k|    const bitset_container_t *bitset) {
  282|  6.98k|    return !bitset_container_empty(bitset);
  283|  6.98k|}
roaring.c:bitset_container_empty:
  268|  6.98k|static inline bool bitset_container_empty(const bitset_container_t *bitset) {
  269|  6.98k|    if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
  ------------------
  |  Branch (269:9): [True: 0, False: 6.98k]
  ------------------
  270|      0|        for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) {
  ------------------
  |  Branch (270:25): [True: 0, False: 0]
  ------------------
  271|      0|            if ((bitset->words[i]) != 0) return false;
  ------------------
  |  Branch (271:17): [True: 0, False: 0]
  ------------------
  272|      0|        }
  273|      0|        return true;
  274|      0|    }
  275|  6.98k|    return bitset->cardinality == 0;
  276|  6.98k|}
roaring.c:bitset_container_get_range:
  206|    239|                                              uint32_t pos_end) {
  207|    239|    const uint32_t start = pos_start >> 6;
  208|    239|    const uint32_t end = pos_end >> 6;
  209|       |
  210|    239|    const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
  211|    239|    const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
  212|       |
  213|    239|    if (start == end)
  ------------------
  |  Branch (213:9): [True: 51, False: 188]
  ------------------
  214|     51|        return ((bitset->words[end] & first & last) == (first & last));
  215|    188|    if ((bitset->words[start] & first) != first) return false;
  ------------------
  |  Branch (215:9): [True: 63, False: 125]
  ------------------
  216|       |
  217|    125|    if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) &&
  ------------------
  |  Branch (217:9): [True: 109, False: 16]
  ------------------
  218|    109|        ((bitset->words[end] & last) != last)) {
  ------------------
  |  Branch (218:9): [True: 95, False: 14]
  ------------------
  219|     95|        return false;
  220|     95|    }
  221|       |
  222|     30|    for (uint32_t i = start + 1;
  223|  10.2k|         (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i) {
  ------------------
  |  Branch (223:10): [True: 10.2k, False: 13]
  |  Branch (223:50): [True: 10.2k, False: 12]
  ------------------
  224|  10.2k|        if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
  ------------------
  |  Branch (224:13): [True: 5, False: 10.2k]
  ------------------
  225|  10.2k|    }
  226|       |
  227|     25|    return true;
  228|     30|}
roaring_array.c:bitset_container_size_in_bytes:
  468|   145k|    const bitset_container_t *container) {
  469|   145k|    (void)container;
  470|   145k|    return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
  471|   145k|}
roaring_array.c:bitset_container_cardinality:
  248|  72.5k|    const bitset_container_t *bitset) {
  249|  72.5k|    return bitset->cardinality;
  250|  72.5k|}
bitset.c:bitset_container_size_in_bytes:
  468|   145k|    const bitset_container_t *container) {
  469|   145k|    (void)container;
  470|   145k|    return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
  471|   145k|}
convert.c:bitset_container_set:
  147|  2.33M|                                        uint16_t pos) {
  148|  2.33M|    const uint64_t old_word = bitset->words[pos >> 6];
  149|  2.33M|    const int index = pos & 63;
  150|       |    const uint64_t new_word = old_word | (UINT64_C(1) << index);
  151|  2.33M|    bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
  152|  2.33M|    bitset->words[pos >> 6] = new_word;
  153|  2.33M|}
convert.c:bitset_container_serialized_size_in_bytes:
  427|   138k|static inline int32_t bitset_container_serialized_size_in_bytes(void) {
  428|   138k|    return BITSET_CONTAINER_SIZE_IN_WORDS * 8;
  429|   138k|}

container_iterator_next:
 2456|   336k|                                    uint16_t *value) {
 2457|   336k|    switch (typecode) {
 2458|      0|        case BITSET_CONTAINER_TYPE: {
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2458:9): [True: 0, False: 336k]
  ------------------
 2459|      0|            const bitset_container_t *bc = const_CAST_bitset(c);
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2460|      0|            it->index++;
 2461|       |
 2462|      0|            uint32_t wordindex = it->index / 64;
 2463|      0|            if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) {
  ------------------
  |  Branch (2463:17): [True: 0, False: 0]
  ------------------
 2464|      0|                return false;
 2465|      0|            }
 2466|       |
 2467|      0|            uint64_t word =
 2468|      0|                bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
 2469|       |            // next part could be optimized/simplified
 2470|      0|            while (word == 0 &&
  ------------------
  |  Branch (2470:20): [True: 0, False: 0]
  ------------------
 2471|      0|                   (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
  ------------------
  |  Branch (2471:20): [True: 0, False: 0]
  ------------------
 2472|      0|                wordindex++;
 2473|      0|                word = bc->words[wordindex];
 2474|      0|            }
 2475|      0|            if (word != 0) {
  ------------------
  |  Branch (2475:17): [True: 0, False: 0]
  ------------------
 2476|      0|                it->index = wordindex * 64 + roaring_trailing_zeroes(word);
 2477|      0|                *value = it->index;
 2478|      0|                return true;
 2479|      0|            }
 2480|      0|            return false;
 2481|      0|        }
 2482|   295k|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|   295k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2482:9): [True: 295k, False: 40.7k]
  ------------------
 2483|   295k|            const array_container_t *ac = const_CAST_array(c);
  ------------------
  |  |   55|   295k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   295k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2484|   295k|            it->index++;
 2485|   295k|            if (it->index < ac->cardinality) {
  ------------------
  |  Branch (2485:17): [True: 289k, False: 5.80k]
  ------------------
 2486|   289k|                *value = ac->array[it->index];
 2487|   289k|                return true;
 2488|   289k|            }
 2489|  5.80k|            return false;
 2490|   295k|        }
 2491|  40.7k|        case RUN_CONTAINER_TYPE: {
  ------------------
  |  |   50|  40.7k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2491:9): [True: 40.7k, False: 295k]
  ------------------
 2492|  40.7k|            if (*value == UINT16_MAX) {  // Avoid overflow to zero
  ------------------
  |  Branch (2492:17): [True: 0, False: 40.7k]
  ------------------
 2493|      0|                return false;
 2494|      0|            }
 2495|       |
 2496|  40.7k|            const run_container_t *rc = const_CAST_run(c);
  ------------------
  |  |   78|  40.7k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  40.7k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2497|  40.7k|            uint32_t limit =
 2498|  40.7k|                rc->runs[it->index].value + rc->runs[it->index].length;
 2499|  40.7k|            if (*value < limit) {
  ------------------
  |  Branch (2499:17): [True: 26.2k, False: 14.5k]
  ------------------
 2500|  26.2k|                (*value)++;
 2501|  26.2k|                return true;
 2502|  26.2k|            }
 2503|       |
 2504|  14.5k|            it->index++;
 2505|  14.5k|            if (it->index < rc->n_runs) {
  ------------------
  |  Branch (2505:17): [True: 14.0k, False: 485]
  ------------------
 2506|  14.0k|                *value = rc->runs[it->index].value;
 2507|  14.0k|                return true;
 2508|  14.0k|            }
 2509|    485|            return false;
 2510|  14.5k|        }
 2511|      0|        default:
  ------------------
  |  Branch (2511:9): [True: 0, False: 336k]
  ------------------
 2512|      0|            assert(false);
 2513|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2514|      0|            return false;
 2515|   336k|    }
 2516|   336k|}
roaring.c:container_from_range:
  317|  74.5k|                                                uint32_t max, uint16_t step) {
  318|  74.5k|    if (step == 0) return NULL;  // being paranoid
  ------------------
  |  Branch (318:9): [True: 0, False: 74.5k]
  ------------------
  319|  74.5k|    if (step == 1) {
  ------------------
  |  Branch (319:9): [True: 74.5k, False: 0]
  ------------------
  320|  74.5k|        return container_range_of_ones(min, max, type);
  321|       |        // Note: the result is not always a run (need to check the cardinality)
  322|       |        //*type = RUN_CONTAINER_TYPE;
  323|       |        // return run_container_create_range(min, max);
  324|  74.5k|    }
  325|      0|    int size = (max - min + step - 1) / step;
  326|      0|    if (size <= DEFAULT_MAX_SIZE) {  // array container
  ------------------
  |  Branch (326:9): [True: 0, False: 0]
  ------------------
  327|      0|        *type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  328|      0|        array_container_t *array = array_container_create_given_capacity(size);
  329|      0|        array_container_add_from_range(array, min, max, step);
  330|      0|        assert(array->cardinality == size);
  331|      0|        return array;
  332|      0|    } else {  // bitset container
  333|      0|        *type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  334|      0|        bitset_container_t *bitset = bitset_container_create();
  335|      0|        bitset_container_add_from_range(bitset, min, max, step);
  336|       |        assert(bitset->cardinality == size);
  337|      0|        return bitset;
  338|      0|    }
  339|      0|}
roaring.c:container_range_of_ones:
  302|  91.2k|                                                   uint8_t *result_type) {
  303|  91.2k|    assert(range_end >= range_start);
  304|  91.2k|    uint64_t cardinality = range_end - range_start + 1;
  305|  91.2k|    if (cardinality <= 2) {
  ------------------
  |  Branch (305:9): [True: 149, False: 91.1k]
  ------------------
  306|    149|        *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    149|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  307|    149|        return array_container_create_range(range_start, range_end);
  308|  91.1k|    } else {
  309|  91.1k|        *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|  91.1k|#define RUN_CONTAINER_TYPE 3
  ------------------
  310|  91.1k|        return run_container_create_range(range_start, range_end);
  311|  91.1k|    }
  312|  91.2k|}
roaring.c:container_add_range:
 2282|  3.27k|                                               uint8_t *result_type) {
 2283|       |    // NB: when selecting new container type, we perform only inexpensive checks
 2284|  3.27k|    switch (type) {
 2285|      0|        case BITSET_CONTAINER_TYPE: {
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2285:9): [True: 0, False: 3.27k]
  ------------------
 2286|      0|            bitset_container_t *bitset = CAST_bitset(c);
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2287|       |
 2288|      0|            int32_t union_cardinality = 0;
 2289|      0|            union_cardinality += bitset->cardinality;
 2290|      0|            union_cardinality += max - min + 1;
 2291|      0|            union_cardinality -=
 2292|      0|                bitset_lenrange_cardinality(bitset->words, min, max - min);
 2293|       |
 2294|      0|            if (union_cardinality == INT32_C(0x10000)) {
  ------------------
  |  Branch (2294:17): [True: 0, False: 0]
  ------------------
 2295|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 2296|      0|                return run_container_create_range(0, INT32_C(0x10000));
 2297|      0|            } else {
 2298|      0|                *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2299|      0|                bitset_set_lenrange(bitset->words, min, max - min);
 2300|      0|                bitset->cardinality = union_cardinality;
 2301|      0|                return bitset;
 2302|      0|            }
 2303|      0|        }
 2304|  2.84k|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|  2.84k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2304:9): [True: 2.84k, False: 435]
  ------------------
 2305|  2.84k|            array_container_t *array = CAST_array(c);
  ------------------
  |  |   54|  2.84k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.84k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2306|       |
 2307|  2.84k|            int32_t nvals_greater =
 2308|  2.84k|                count_greater(array->array, array->cardinality, (uint16_t)max);
 2309|  2.84k|            int32_t nvals_less =
 2310|  2.84k|                count_less(array->array, array->cardinality - nvals_greater,
 2311|  2.84k|                           (uint16_t)min);
 2312|  2.84k|            int32_t union_cardinality =
 2313|  2.84k|                nvals_less + (max - min + 1) + nvals_greater;
 2314|       |
 2315|  2.84k|            if (union_cardinality == INT32_C(0x10000)) {
  ------------------
  |  Branch (2315:17): [True: 690, False: 2.15k]
  ------------------
 2316|    690|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|    690|#define RUN_CONTAINER_TYPE 3
  ------------------
 2317|    690|                return run_container_create_range(0, INT32_C(0x10000));
 2318|  2.15k|            } else if (union_cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (2318:24): [True: 609, False: 1.54k]
  ------------------
 2319|    609|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    609|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2320|    609|                array_container_add_range_nvals(array, min, max, nvals_less,
 2321|    609|                                                nvals_greater);
 2322|    609|                return array;
 2323|  1.54k|            } else {
 2324|  1.54k|                *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|  1.54k|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2325|  1.54k|                bitset_container_t *bitset = bitset_container_from_array(array);
 2326|  1.54k|                bitset_set_lenrange(bitset->words, min, max - min);
 2327|  1.54k|                bitset->cardinality = union_cardinality;
 2328|  1.54k|                return bitset;
 2329|  1.54k|            }
 2330|  2.84k|        }
 2331|    435|        case RUN_CONTAINER_TYPE: {
  ------------------
  |  |   50|    435|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2331:9): [True: 435, False: 2.84k]
  ------------------
 2332|    435|            run_container_t *run = CAST_run(c);
  ------------------
  |  |   77|    435|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    435|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2333|       |
 2334|    435|            int32_t nruns_greater =
 2335|    435|                rle16_count_greater(run->runs, run->n_runs, (uint16_t)max);
 2336|    435|            int32_t nruns_less = rle16_count_less(
 2337|    435|                run->runs, run->n_runs - nruns_greater, (uint16_t)min);
 2338|       |
 2339|    435|            int32_t run_size_bytes =
 2340|    435|                (nruns_less + 1 + nruns_greater) * sizeof(rle16_t);
 2341|    435|            int32_t bitset_size_bytes =
 2342|    435|                BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
 2343|       |
 2344|    435|            if (run_size_bytes <= bitset_size_bytes) {
  ------------------
  |  Branch (2344:17): [True: 435, False: 0]
  ------------------
 2345|    435|                run_container_add_range_nruns(run, min, max, nruns_less,
 2346|    435|                                              nruns_greater);
 2347|    435|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|    435|#define RUN_CONTAINER_TYPE 3
  ------------------
 2348|    435|                return run;
 2349|    435|            } else {
 2350|      0|                return container_from_run_range(run, min, max, result_type);
 2351|      0|            }
 2352|    435|        }
 2353|      0|        default:
  ------------------
  |  Branch (2353:9): [True: 0, False: 3.27k]
  ------------------
 2354|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2355|  3.27k|    }
 2356|  3.27k|}
roaring.c:container_remove_range:
 2371|  22.6k|                                                  uint8_t *result_type) {
 2372|  22.6k|    switch (type) {
 2373|  1.35k|        case BITSET_CONTAINER_TYPE: {
  ------------------
  |  |   48|  1.35k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2373:9): [True: 1.35k, False: 21.2k]
  ------------------
 2374|  1.35k|            bitset_container_t *bitset = CAST_bitset(c);
  ------------------
  |  |   52|  1.35k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.35k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2375|       |
 2376|  1.35k|            int32_t result_cardinality =
 2377|  1.35k|                bitset->cardinality -
 2378|  1.35k|                bitset_lenrange_cardinality(bitset->words, min, max - min);
 2379|       |
 2380|  1.35k|            if (result_cardinality == 0) {
  ------------------
  |  Branch (2380:17): [True: 34, False: 1.31k]
  ------------------
 2381|     34|                return NULL;
 2382|  1.31k|            } else if (result_cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (2382:24): [True: 111, False: 1.20k]
  ------------------
 2383|    111|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    111|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2384|    111|                bitset_reset_range(bitset->words, min, max + 1);
 2385|    111|                bitset->cardinality = result_cardinality;
 2386|    111|                return array_container_from_bitset(bitset);
 2387|  1.20k|            } else {
 2388|  1.20k|                *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|  1.20k|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2389|  1.20k|                bitset_reset_range(bitset->words, min, max + 1);
 2390|  1.20k|                bitset->cardinality = result_cardinality;
 2391|  1.20k|                return bitset;
 2392|  1.20k|            }
 2393|  1.35k|        }
 2394|  3.65k|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|  3.65k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2394:9): [True: 3.65k, False: 18.9k]
  ------------------
 2395|  3.65k|            array_container_t *array = CAST_array(c);
  ------------------
  |  |   54|  3.65k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  3.65k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2396|       |
 2397|  3.65k|            int32_t nvals_greater =
 2398|  3.65k|                count_greater(array->array, array->cardinality, (uint16_t)max);
 2399|  3.65k|            int32_t nvals_less =
 2400|  3.65k|                count_less(array->array, array->cardinality - nvals_greater,
 2401|  3.65k|                           (uint16_t)min);
 2402|  3.65k|            int32_t result_cardinality = nvals_less + nvals_greater;
 2403|       |
 2404|  3.65k|            if (result_cardinality == 0) {
  ------------------
  |  Branch (2404:17): [True: 374, False: 3.27k]
  ------------------
 2405|    374|                return NULL;
 2406|  3.27k|            } else {
 2407|  3.27k|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  3.27k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2408|  3.27k|                array_container_remove_range(
 2409|  3.27k|                    array, nvals_less, array->cardinality - result_cardinality);
 2410|  3.27k|                return array;
 2411|  3.27k|            }
 2412|  3.65k|        }
 2413|  17.6k|        case RUN_CONTAINER_TYPE: {
  ------------------
  |  |   50|  17.6k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2413:9): [True: 17.6k, False: 5.00k]
  ------------------
 2414|  17.6k|            run_container_t *run = CAST_run(c);
  ------------------
  |  |   77|  17.6k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  17.6k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2415|       |
 2416|  17.6k|            if (run->n_runs == 0) {
  ------------------
  |  Branch (2416:17): [True: 0, False: 17.6k]
  ------------------
 2417|      0|                return NULL;
 2418|      0|            }
 2419|  17.6k|            if (min <= run_container_minimum(run) &&
  ------------------
  |  Branch (2419:17): [True: 17.2k, False: 371]
  ------------------
 2420|  17.2k|                max >= run_container_maximum(run)) {
  ------------------
  |  Branch (2420:17): [True: 15.7k, False: 1.53k]
  ------------------
 2421|  15.7k|                return NULL;
 2422|  15.7k|            }
 2423|       |
 2424|  1.90k|            run_container_remove_range(run, min, max);
 2425|  1.90k|            return convert_run_to_efficient_container(run, result_type);
 2426|  17.6k|        }
 2427|      0|        default:
  ------------------
  |  Branch (2427:9): [True: 0, False: 22.6k]
  ------------------
 2428|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2429|  22.6k|    }
 2430|  22.6k|}
roaring.c:container_get_cardinality:
  244|   395k|                                            uint8_t typecode) {
  245|   395k|    c = container_unwrap_shared(c, &typecode);
  246|   395k|    switch (typecode) {
  ------------------
  |  Branch (246:13): [True: 395k, False: 0]
  ------------------
  247|  9.79k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  9.79k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (247:9): [True: 9.79k, False: 385k]
  ------------------
  248|  9.79k|            return bitset_container_cardinality(const_CAST_bitset(c));
  ------------------
  |  |   53|  9.79k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  9.79k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  249|  65.3k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  65.3k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (249:9): [True: 65.3k, False: 330k]
  ------------------
  250|  65.3k|            return array_container_cardinality(const_CAST_array(c));
  ------------------
  |  |   55|  65.3k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  65.3k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  251|   320k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|   320k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (251:9): [True: 320k, False: 75.1k]
  ------------------
  252|   320k|            return run_container_cardinality(const_CAST_run(c));
  ------------------
  |  |   78|   320k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   320k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  253|   395k|    }
  254|   395k|    assert(false);
  255|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  256|      0|    return 0;  // unreached
  257|   395k|}
roaring.c:get_container_type:
  129|  79.8k|static inline uint8_t get_container_type(const container_t *c, uint8_t type) {
  130|  79.8k|    if (type == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  79.8k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (130:9): [True: 0, False: 79.8k]
  ------------------
  131|      0|        return const_CAST_shared(c)->typecode;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  132|  79.8k|    } else {
  133|  79.8k|        return type;
  134|  79.8k|    }
  135|  79.8k|}
roaring.c:get_writable_copy_if_shared:
  146|  8.41M|                                                       uint8_t *type) {
  147|  8.41M|    if (*type == SHARED_CONTAINER_TYPE) {  // shared, return enclosed container
  ------------------
  |  |   51|  8.41M|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (147:9): [True: 0, False: 8.41M]
  ------------------
  148|      0|        return shared_container_extract_copy(CAST_shared(c), type);
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  149|  8.41M|    } else {
  150|  8.41M|        return c;  // not shared, so return as-is
  151|  8.41M|    }
  152|  8.41M|}
roaring.c:container_add:
  492|  8.17M|    uint8_t *new_typecode) {
  493|  8.17M|    c = get_writable_copy_if_shared(c, &typecode);
  494|  8.17M|    switch (typecode) {
  495|   325k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|   325k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (495:9): [True: 325k, False: 7.85M]
  ------------------
  496|   325k|            bitset_container_set(CAST_bitset(c), val);
  ------------------
  |  |   52|   325k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|   325k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  497|   325k|            *new_typecode = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|   325k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  498|   325k|            return c;
  499|  7.43M|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|  7.43M|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (499:9): [True: 7.43M, False: 746k]
  ------------------
  500|  7.43M|            array_container_t *ac = CAST_array(c);
  ------------------
  |  |   54|  7.43M|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  7.43M|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  501|  7.43M|            if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) {
  ------------------
  |  Branch (501:17): [True: 7.43M, False: 127]
  ------------------
  502|  7.43M|                *new_typecode = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  7.43M|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  503|  7.43M|                return ac;
  504|  7.43M|            } else {
  505|    127|                bitset_container_t *bitset = bitset_container_from_array(ac);
  506|    127|                bitset_container_add(bitset, val);
  507|    127|                *new_typecode = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    127|#define BITSET_CONTAINER_TYPE 1
  ------------------
  508|    127|                return bitset;
  509|    127|            }
  510|  7.43M|        } break;
  511|   420k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|   420k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (511:9): [True: 420k, False: 7.75M]
  ------------------
  512|       |            // per Java, no container type adjustments are done (revisit?)
  513|   420k|            run_container_add(CAST_run(c), val);
  ------------------
  |  |   77|   420k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|   420k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  514|   420k|            *new_typecode = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|   420k|#define RUN_CONTAINER_TYPE 3
  ------------------
  515|   420k|            return c;
  516|      0|        default:
  ------------------
  |  Branch (516:9): [True: 0, False: 8.17M]
  ------------------
  517|      0|            assert(false);
  518|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  519|      0|            return NULL;
  520|  8.17M|    }
  521|  8.17M|}
roaring.c:container_remove:
  533|  11.6k|    uint8_t *new_typecode) {
  534|  11.6k|    c = get_writable_copy_if_shared(c, &typecode);
  535|  11.6k|    switch (typecode) {
  536|  2.26k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  2.26k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (536:9): [True: 2.26k, False: 9.40k]
  ------------------
  537|  2.26k|            if (bitset_container_remove(CAST_bitset(c), val)) {
  ------------------
  |  |   52|  2.26k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.26k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (537:17): [True: 1.35k, False: 910]
  ------------------
  538|  1.35k|                int card = bitset_container_cardinality(CAST_bitset(c));
  ------------------
  |  |   52|  1.35k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.35k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  539|  1.35k|                if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (539:21): [True: 17, False: 1.33k]
  ------------------
  540|     17|                    *new_typecode = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|     17|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  541|     17|                    return array_container_from_bitset(CAST_bitset(c));
  ------------------
  |  |   52|     17|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     17|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  542|     17|                }
  543|  1.35k|            }
  544|  2.24k|            *new_typecode = typecode;
  545|  2.24k|            return c;
  546|  6.21k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  6.21k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (546:9): [True: 6.21k, False: 5.45k]
  ------------------
  547|  6.21k|            *new_typecode = typecode;
  548|  6.21k|            array_container_remove(CAST_array(c), val);
  ------------------
  |  |   54|  6.21k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  6.21k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  549|  6.21k|            return c;
  550|  3.19k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  3.19k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (550:9): [True: 3.19k, False: 8.47k]
  ------------------
  551|       |            // per Java, no container type adjustments are done (revisit?)
  552|  3.19k|            run_container_remove(CAST_run(c), val);
  ------------------
  |  |   77|  3.19k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  3.19k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  553|  3.19k|            *new_typecode = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|  3.19k|#define RUN_CONTAINER_TYPE 3
  ------------------
  554|  3.19k|            return c;
  555|      0|        default:
  ------------------
  |  Branch (555:9): [True: 0, False: 11.6k]
  ------------------
  556|      0|            assert(false);
  557|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  558|      0|            return NULL;
  559|  11.6k|    }
  560|  11.6k|}
roaring.c:container_nonzero_cardinality:
  437|   178k|                                                 uint8_t typecode) {
  438|   178k|    c = container_unwrap_shared(c, &typecode);
  439|   178k|    switch (typecode) {
  ------------------
  |  Branch (439:13): [True: 178k, False: 0]
  ------------------
  440|  6.98k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  6.98k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (440:9): [True: 6.98k, False: 171k]
  ------------------
  441|  6.98k|            return bitset_container_const_nonzero_cardinality(
  442|  6.98k|                const_CAST_bitset(c));
  ------------------
  |  |   53|  6.98k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  6.98k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  443|   105k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|   105k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (443:9): [True: 105k, False: 73.1k]
  ------------------
  444|   105k|            return array_container_nonzero_cardinality(const_CAST_array(c));
  ------------------
  |  |   55|   105k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   105k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  445|  66.1k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  66.1k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (445:9): [True: 66.1k, False: 112k]
  ------------------
  446|  66.1k|            return run_container_nonzero_cardinality(const_CAST_run(c));
  ------------------
  |  |   78|  66.1k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  66.1k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  447|   178k|    }
  448|   178k|    assert(false);
  449|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  450|      0|    return 0;  // unreached
  451|   178k|}
roaring.c:container_and:
  728|  6.40k|                                         uint8_t *result_type) {
  729|  6.40k|    c1 = container_unwrap_shared(c1, &type1);
  730|  6.40k|    c2 = container_unwrap_shared(c2, &type2);
  731|  6.40k|    container_t *result = NULL;
  732|  6.40k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.40k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  733|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (733:9): [True: 0, False: 6.40k]
  ------------------
  734|      0|            *result_type =
  735|      0|                bitset_bitset_container_intersection(
  ------------------
  |  Branch (735:17): [True: 0, False: 0]
  ------------------
  736|      0|                    const_CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                  const_CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  737|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  738|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  739|      0|            return result;
  740|       |
  741|  2.40k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.40k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (741:9): [True: 2.40k, False: 4.00k]
  ------------------
  742|  2.40k|            result = array_container_create();
  743|  2.40k|            array_container_intersection(
  744|  2.40k|                const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   54|  2.40k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  745|  2.40k|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  746|  2.40k|            return result;
  747|       |
  748|    725|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    725|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (748:9): [True: 725, False: 5.67k]
  ------------------
  749|    725|            result = run_container_create();
  750|    725|            run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  751|    725|                                       CAST_run(result));
  ------------------
  |  |   77|    725|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  752|    725|            return convert_run_to_efficient_container_and_free(CAST_run(result),
  ------------------
  |  |   77|    725|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  753|    725|                                                               result_type);
  754|       |
  755|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (755:9): [True: 0, False: 6.40k]
  ------------------
  756|      0|            result = array_container_create();
  757|      0|            array_bitset_container_intersection(const_CAST_array(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  758|      0|                                                const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  759|      0|                                                CAST_array(result));
  ------------------
  |  |   54|      0|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  760|      0|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  761|      0|            return result;
  762|       |
  763|    608|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    608|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (763:9): [True: 608, False: 5.79k]
  ------------------
  764|    608|            result = array_container_create();
  765|    608|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  766|    608|            array_bitset_container_intersection(const_CAST_array(c1),
  ------------------
  |  |   55|    608|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  767|    608|                                                const_CAST_bitset(c2),
  ------------------
  |  |   53|    608|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  768|    608|                                                CAST_array(result));
  ------------------
  |  |   54|    608|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  769|    608|            return result;
  770|       |
  771|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (771:9): [True: 0, False: 6.40k]
  ------------------
  772|      0|            *result_type =
  773|      0|                run_bitset_container_intersection(
  ------------------
  |  Branch (773:17): [True: 0, False: 0]
  ------------------
  774|      0|                    const_CAST_run(c2), const_CAST_bitset(c1), &result)
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                  const_CAST_run(c2), const_CAST_bitset(c1), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  775|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  776|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  777|      0|            return result;
  778|       |
  779|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (779:9): [True: 650, False: 5.75k]
  ------------------
  780|    650|            *result_type =
  781|    650|                run_bitset_container_intersection(
  ------------------
  |  Branch (781:17): [True: 0, False: 650]
  ------------------
  782|    650|                    const_CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                  const_CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  783|    650|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  784|    650|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.30k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  785|    650|            return result;
  786|       |
  787|    784|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    784|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    784|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (787:9): [True: 784, False: 5.61k]
  ------------------
  788|    784|            result = array_container_create();
  789|    784|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  790|    784|            array_run_container_intersection(
  791|    784|                const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   55|    784|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   78|    784|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   54|    784|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  792|    784|            return result;
  793|       |
  794|  1.23k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.23k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.23k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (794:9): [True: 1.23k, False: 5.16k]
  ------------------
  795|  1.23k|            result = array_container_create();
  796|  1.23k|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  797|  1.23k|            array_run_container_intersection(
  798|  1.23k|                const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   55|  1.23k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   78|  1.23k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   54|  1.23k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  799|  1.23k|            return result;
  800|       |
  801|      0|        default:
  ------------------
  |  Branch (801:9): [True: 0, False: 6.40k]
  ------------------
  802|      0|            assert(false);
  803|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  804|      0|            return NULL;
  805|  6.40k|    }
  806|  6.40k|}
roaring.c:container_iand:
  923|  68.3k|                                          uint8_t *result_type) {
  924|  68.3k|    c1 = get_writable_copy_if_shared(c1, &type1);
  925|  68.3k|    c2 = container_unwrap_shared(c2, &type2);
  926|  68.3k|    container_t *result = NULL;
  927|  68.3k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  68.3k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  928|  1.50k|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|  1.50k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  1.50k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  1.50k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (928:9): [True: 1.50k, False: 66.8k]
  ------------------
  929|  1.50k|            *result_type = bitset_bitset_container_intersection_inplace(
  ------------------
  |  Branch (929:28): [True: 1.50k, False: 0]
  ------------------
  930|  1.50k|                               CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   52|  1.50k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.50k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|  1.50k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.50k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  931|  1.50k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|  1.50k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  932|  1.50k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.50k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  933|  1.50k|            return result;
  934|       |
  935|  4.95k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  4.95k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  4.95k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  4.95k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (935:9): [True: 4.95k, False: 63.3k]
  ------------------
  936|  4.95k|            array_container_intersection_inplace(CAST_array(c1),
  ------------------
  |  |   54|  4.95k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  4.95k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  937|  4.95k|                                                 const_CAST_array(c2));
  ------------------
  |  |   55|  4.95k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  4.95k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  938|  4.95k|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  4.95k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  939|  4.95k|            return c1;
  940|       |
  941|  60.5k|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|  60.5k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  60.5k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  60.5k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (941:9): [True: 60.5k, False: 7.74k]
  ------------------
  942|  60.5k|            result = run_container_create();
  943|  60.5k|            run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|  60.5k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  60.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          run_container_intersection(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|  60.5k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  60.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  944|  60.5k|                                       CAST_run(result));
  ------------------
  |  |   77|  60.5k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  60.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  945|       |            // as of January 2016, Java code used non-in-place intersection for
  946|       |            // two runcontainers
  947|  60.5k|            return convert_run_to_efficient_container_and_free(CAST_run(result),
  ------------------
  |  |   77|  60.5k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  60.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  948|  60.5k|                                                               result_type);
  949|       |
  950|     44|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|     44|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|     44|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|     44|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (950:9): [True: 44, False: 68.2k]
  ------------------
  951|       |            // c1 is a bitmap so no inplace possible
  952|     44|            result = array_container_create();
  953|     44|            array_bitset_container_intersection(const_CAST_array(c2),
  ------------------
  |  |   55|     44|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     44|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  954|     44|                                                const_CAST_bitset(c1),
  ------------------
  |  |   53|     44|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     44|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  955|     44|                                                CAST_array(result));
  ------------------
  |  |   54|     44|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     44|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  956|     44|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|     44|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  957|     44|            return result;
  958|       |
  959|      0|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (959:9): [True: 0, False: 68.3k]
  ------------------
  960|      0|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  961|      0|            array_bitset_container_intersection(
  962|      0|                const_CAST_array(c1), const_CAST_bitset(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_bitset(c2),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  963|      0|                CAST_array(c1));  // result is allowed to be same as c1
  ------------------
  |  |   54|      0|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  964|      0|            return c1;
  965|       |
  966|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (966:9): [True: 0, False: 68.3k]
  ------------------
  967|       |            // will attempt in-place computation
  968|      0|            *result_type = run_bitset_container_intersection(
  ------------------
  |  Branch (968:28): [True: 0, False: 0]
  ------------------
  969|      0|                               const_CAST_run(c2), const_CAST_bitset(c1), &c1)
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             const_CAST_run(c2), const_CAST_bitset(c1), &c1)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  970|      0|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  971|      0|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  972|      0|            return c1;
  973|       |
  974|      0|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (974:9): [True: 0, False: 68.3k]
  ------------------
  975|      0|            *result_type =
  976|      0|                run_bitset_container_intersection(
  ------------------
  |  Branch (976:17): [True: 0, False: 0]
  ------------------
  977|      0|                    const_CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                  const_CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  978|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  979|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  980|      0|            return result;
  981|       |
  982|    196|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    196|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    196|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    196|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (982:9): [True: 196, False: 68.1k]
  ------------------
  983|    196|            result = array_container_create();
  984|    196|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|    196|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  985|    196|            array_run_container_intersection(
  986|    196|                const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   55|    196|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    196|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   78|    196|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    196|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2), CAST_array(result));
  ------------------
  |  |   54|    196|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    196|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  987|    196|            return result;
  988|       |
  989|  1.03k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.03k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.03k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.03k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (989:9): [True: 1.03k, False: 67.2k]
  ------------------
  990|  1.03k|            result = array_container_create();
  991|  1.03k|            *result_type = ARRAY_CONTAINER_TYPE;  // never bitset
  ------------------
  |  |   49|  1.03k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  992|  1.03k|            array_run_container_intersection(
  993|  1.03k|                const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   55|  1.03k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.03k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   78|  1.03k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.03k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1), CAST_array(result));
  ------------------
  |  |   54|  1.03k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.03k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  994|  1.03k|            return result;
  995|       |
  996|      0|        default:
  ------------------
  |  Branch (996:9): [True: 0, False: 68.3k]
  ------------------
  997|      0|            assert(false);
  998|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  999|      0|            return NULL;
 1000|  68.3k|    }
 1001|  68.3k|}
roaring.c:container_or:
 1010|  6.40k|                                        uint8_t *result_type) {
 1011|  6.40k|    c1 = container_unwrap_shared(c1, &type1);
 1012|  6.40k|    c2 = container_unwrap_shared(c2, &type2);
 1013|  6.40k|    container_t *result = NULL;
 1014|  6.40k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.40k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1015|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1015:9): [True: 0, False: 6.40k]
  ------------------
 1016|      0|            result = bitset_container_create();
 1017|      0|            bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1018|      0|                                CAST_bitset(result));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1019|      0|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1020|      0|            return result;
 1021|       |
 1022|  2.40k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.40k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1022:9): [True: 2.40k, False: 4.00k]
  ------------------
 1023|  2.40k|            *result_type =
 1024|  2.40k|                array_array_container_union(const_CAST_array(c1),
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1024:17): [True: 44, False: 2.35k]
  ------------------
 1025|  2.40k|                                            const_CAST_array(c2), &result)
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1026|  2.40k|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|     44|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1027|  2.40k|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  4.75k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1028|  2.40k|            return result;
 1029|       |
 1030|    725|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    725|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1030:9): [True: 725, False: 5.67k]
  ------------------
 1031|    725|            result = run_container_create();
 1032|    725|            run_container_union(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          run_container_union(const_CAST_run(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1033|    725|                                CAST_run(result));
  ------------------
  |  |   77|    725|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1034|    725|            *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  ------------------
 1035|       |            // todo: could be optimized since will never convert to array
 1036|    725|            result = convert_run_to_efficient_container_and_free(
 1037|    725|                CAST_run(result), result_type);
  ------------------
  |  |   77|    725|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1038|    725|            return result;
 1039|       |
 1040|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1040:9): [True: 0, False: 6.40k]
  ------------------
 1041|      0|            result = bitset_container_create();
 1042|      0|            array_bitset_container_union(const_CAST_array(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1043|      0|                                         const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1044|      0|                                         CAST_bitset(result));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1045|      0|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1046|      0|            return result;
 1047|       |
 1048|    608|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    608|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1048:9): [True: 608, False: 5.79k]
  ------------------
 1049|    608|            result = bitset_container_create();
 1050|    608|            array_bitset_container_union(const_CAST_array(c1),
  ------------------
  |  |   55|    608|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1051|    608|                                         const_CAST_bitset(c2),
  ------------------
  |  |   53|    608|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1052|    608|                                         CAST_bitset(result));
  ------------------
  |  |   52|    608|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1053|    608|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1054|    608|            return result;
 1055|       |
 1056|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1056:9): [True: 0, False: 6.40k]
  ------------------
 1057|      0|            if (run_container_is_full(const_CAST_run(c2))) {
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1057:17): [True: 0, False: 0]
  ------------------
 1058|      0|                result = run_container_create();
 1059|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 1060|      0|                run_container_copy(const_CAST_run(c2), CAST_run(result));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              run_container_copy(const_CAST_run(c2), CAST_run(result));
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1061|      0|                return result;
 1062|      0|            }
 1063|      0|            result = bitset_container_create();
 1064|      0|            run_bitset_container_union(
 1065|      0|                const_CAST_run(c2), const_CAST_bitset(c1), CAST_bitset(result));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c2), const_CAST_bitset(c1), CAST_bitset(result));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c2), const_CAST_bitset(c1), CAST_bitset(result));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1066|      0|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1067|      0|            return result;
 1068|       |
 1069|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1069:9): [True: 650, False: 5.75k]
  ------------------
 1070|    650|            if (run_container_is_full(const_CAST_run(c1))) {
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1070:17): [True: 0, False: 650]
  ------------------
 1071|      0|                result = run_container_create();
 1072|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 1073|      0|                run_container_copy(const_CAST_run(c1), CAST_run(result));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              run_container_copy(const_CAST_run(c1), CAST_run(result));
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1074|      0|                return result;
 1075|      0|            }
 1076|    650|            result = bitset_container_create();
 1077|    650|            run_bitset_container_union(
 1078|    650|                const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   52|    650|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1079|    650|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1080|    650|            return result;
 1081|       |
 1082|    784|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    784|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    784|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1082:9): [True: 784, False: 5.61k]
  ------------------
 1083|    784|            result = run_container_create();
 1084|    784|            array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   55|    784|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    784|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1085|    784|                                      CAST_run(result));
  ------------------
  |  |   77|    784|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1086|    784|            result = convert_run_to_efficient_container_and_free(
 1087|    784|                CAST_run(result), result_type);
  ------------------
  |  |   77|    784|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1088|    784|            return result;
 1089|       |
 1090|  1.23k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.23k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.23k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1090:9): [True: 1.23k, False: 5.16k]
  ------------------
 1091|  1.23k|            result = run_container_create();
 1092|  1.23k|            array_run_container_union(const_CAST_array(c2), const_CAST_run(c1),
  ------------------
  |  |   55|  1.23k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_run_container_union(const_CAST_array(c2), const_CAST_run(c1),
  ------------------
  |  |   78|  1.23k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1093|  1.23k|                                      CAST_run(result));
  ------------------
  |  |   77|  1.23k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1094|  1.23k|            result = convert_run_to_efficient_container_and_free(
 1095|  1.23k|                CAST_run(result), result_type);
  ------------------
  |  |   77|  1.23k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1096|  1.23k|            return result;
 1097|       |
 1098|      0|        default:
  ------------------
  |  Branch (1098:9): [True: 0, False: 6.40k]
  ------------------
 1099|      0|            assert(false);
 1100|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1101|      0|            return NULL;  // unreached
 1102|  6.40k|    }
 1103|  6.40k|}
roaring.c:container_is_full:
  262|  7.74k|static inline bool container_is_full(const container_t *c, uint8_t typecode) {
  263|  7.74k|    c = container_unwrap_shared(c, &typecode);
  264|  7.74k|    switch (typecode) {
  ------------------
  |  Branch (264:13): [True: 7.74k, False: 0]
  ------------------
  265|      0|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (265:9): [True: 0, False: 7.74k]
  ------------------
  266|      0|            return bitset_container_cardinality(const_CAST_bitset(c)) ==
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  267|      0|                   (1 << 16);
  268|  3.87k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.87k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (268:9): [True: 3.87k, False: 3.87k]
  ------------------
  269|  3.87k|            return array_container_cardinality(const_CAST_array(c)) ==
  ------------------
  |  |   55|  3.87k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  270|  3.87k|                   (1 << 16);
  271|  3.87k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  3.87k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (271:9): [True: 3.87k, False: 3.87k]
  ------------------
  272|  3.87k|            return run_container_is_full(const_CAST_run(c));
  ------------------
  |  |   78|  3.87k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  273|  7.74k|    }
  274|  7.74k|    assert(false);
  275|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  276|      0|    return 0;  // unreached
  277|  7.74k|}
roaring.c:container_ior:
 1228|  6.51k|                                         uint8_t *result_type) {
 1229|  6.51k|    c1 = get_writable_copy_if_shared(c1, &type1);
 1230|  6.51k|    c2 = container_unwrap_shared(c2, &type2);
 1231|  6.51k|    container_t *result = NULL;
 1232|  6.51k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.51k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1233|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1233:9): [True: 0, False: 6.51k]
  ------------------
 1234|      0|            bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          bitset_container_or(const_CAST_bitset(c1), const_CAST_bitset(c2),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1235|      0|                                CAST_bitset(c1));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1236|      0|#ifdef OR_BITSET_CONVERSION_TO_FULL
 1237|      0|            if (CAST_bitset(c1)->cardinality == (1 << 16)) {  // we convert
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1237:17): [True: 0, False: 0]
  ------------------
 1238|      0|                result = run_container_create_range(0, (1 << 16));
 1239|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 1240|      0|                return result;
 1241|      0|            }
 1242|      0|#endif
 1243|      0|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1244|      0|            return c1;
 1245|       |
 1246|  2.58k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.58k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.58k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.58k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1246:9): [True: 2.58k, False: 3.93k]
  ------------------
 1247|  2.58k|            *result_type = array_array_container_inplace_union(
  ------------------
  |  Branch (1247:28): [True: 0, False: 2.58k]
  ------------------
 1248|  2.58k|                               CAST_array(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   54|  2.58k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.58k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_array(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   55|  2.58k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.58k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1249|  2.58k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1250|  2.58k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  5.17k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1251|  2.58k|            if ((result == NULL) && (*result_type == ARRAY_CONTAINER_TYPE)) {
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (1251:17): [True: 0, False: 2.58k]
  |  Branch (1251:37): [True: 0, False: 0]
  ------------------
 1252|      0|                return c1;  // the computation was done in-place!
 1253|      0|            }
 1254|  2.58k|            return result;
 1255|       |
 1256|  1.74k|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|  1.74k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.74k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.74k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1256:9): [True: 1.74k, False: 4.76k]
  ------------------
 1257|  1.74k|            run_container_union_inplace(CAST_run(c1), const_CAST_run(c2));
  ------------------
  |  |   77|  1.74k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.74k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          run_container_union_inplace(CAST_run(c1), const_CAST_run(c2));
  ------------------
  |  |   78|  1.74k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.74k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1258|  1.74k|            return convert_run_to_efficient_container(CAST_run(c1),
  ------------------
  |  |   77|  1.74k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.74k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1259|  1.74k|                                                      result_type);
 1260|       |
 1261|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1261:9): [True: 0, False: 6.51k]
  ------------------
 1262|      0|            array_bitset_container_union(
 1263|      0|                const_CAST_array(c2), const_CAST_bitset(c1), CAST_bitset(c1));
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_bitset(c1), CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_bitset(c1), CAST_bitset(c1));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1264|      0|            *result_type = BITSET_CONTAINER_TYPE;  // never array
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1265|      0|            return c1;
 1266|       |
 1267|    652|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    652|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    652|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    652|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1267:9): [True: 652, False: 5.86k]
  ------------------
 1268|       |            // c1 is an array, so no in-place possible
 1269|    652|            result = bitset_container_create();
 1270|    652|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    652|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1271|    652|            array_bitset_container_union(const_CAST_array(c1),
  ------------------
  |  |   55|    652|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    652|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1272|    652|                                         const_CAST_bitset(c2),
  ------------------
  |  |   53|    652|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    652|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1273|    652|                                         CAST_bitset(result));
  ------------------
  |  |   52|    652|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    652|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1274|    652|            return result;
 1275|       |
 1276|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1276:9): [True: 0, False: 6.51k]
  ------------------
 1277|      0|            if (run_container_is_full(const_CAST_run(c2))) {
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1277:17): [True: 0, False: 0]
  ------------------
 1278|      0|                result = run_container_create();
 1279|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 1280|      0|                run_container_copy(const_CAST_run(c2), CAST_run(result));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              run_container_copy(const_CAST_run(c2), CAST_run(result));
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1281|      0|                return result;
 1282|      0|            }
 1283|      0|            run_bitset_container_union(const_CAST_run(c2),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1284|      0|                                       const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1285|      0|                                       CAST_bitset(c1));  // allowed
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1286|      0|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1287|      0|            return c1;
 1288|       |
 1289|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1289:9): [True: 650, False: 5.86k]
  ------------------
 1290|    650|            if (run_container_is_full(const_CAST_run(c1))) {
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1290:17): [True: 0, False: 650]
  ------------------
 1291|      0|                *result_type = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
 1292|      0|                return c1;
 1293|      0|            }
 1294|    650|            result = bitset_container_create();
 1295|    650|            run_bitset_container_union(
 1296|    650|                const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_bitset(c2), CAST_bitset(result));
  ------------------
  |  |   52|    650|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1297|    650|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1298|    650|            return result;
 1299|       |
 1300|    635|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    635|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    635|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    635|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1300:9): [True: 635, False: 5.88k]
  ------------------
 1301|    635|            result = run_container_create();
 1302|    635|            array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   55|    635|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    635|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_run_container_union(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    635|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    635|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1303|    635|                                      CAST_run(result));
  ------------------
  |  |   77|    635|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    635|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1304|    635|            result = convert_run_to_efficient_container_and_free(
 1305|    635|                CAST_run(result), result_type);
  ------------------
  |  |   77|    635|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    635|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1306|    635|            return result;
 1307|       |
 1308|    244|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|    244|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    244|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    244|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1308:9): [True: 244, False: 6.27k]
  ------------------
 1309|    244|            array_run_container_inplace_union(const_CAST_array(c2),
  ------------------
  |  |   55|    244|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    244|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1310|    244|                                              CAST_run(c1));
  ------------------
  |  |   77|    244|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    244|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1311|    244|            c1 = convert_run_to_efficient_container(CAST_run(c1), result_type);
  ------------------
  |  |   77|    244|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    244|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1312|    244|            return c1;
 1313|       |
 1314|      0|        default:
  ------------------
  |  Branch (1314:9): [True: 0, False: 6.51k]
  ------------------
 1315|      0|            assert(false);
 1316|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1317|      0|            return NULL;
 1318|  6.51k|    }
 1319|  6.51k|}
roaring.c:container_xor:
 1451|  6.40k|                                         uint8_t *result_type) {
 1452|  6.40k|    c1 = container_unwrap_shared(c1, &type1);
 1453|  6.40k|    c2 = container_unwrap_shared(c2, &type2);
 1454|  6.40k|    container_t *result = NULL;
 1455|  6.40k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.40k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1456|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1456:9): [True: 0, False: 6.40k]
  ------------------
 1457|      0|            *result_type =
 1458|      0|                bitset_bitset_container_xor(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1458:17): [True: 0, False: 0]
  ------------------
 1459|      0|                                            const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1460|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1461|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1462|      0|            return result;
 1463|       |
 1464|  2.40k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.40k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1464:9): [True: 2.40k, False: 4.00k]
  ------------------
 1465|  2.40k|            *result_type =
 1466|  2.40k|                array_array_container_xor(const_CAST_array(c1),
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1466:17): [True: 12, False: 2.38k]
  ------------------
 1467|  2.40k|                                          const_CAST_array(c2), &result)
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1468|  2.40k|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|     12|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1469|  2.40k|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  4.78k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1470|  2.40k|            return result;
 1471|       |
 1472|    725|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    725|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1472:9): [True: 725, False: 5.67k]
  ------------------
 1473|    725|            *result_type = (uint8_t)run_run_container_xor(
 1474|    725|                const_CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1475|    725|            return result;
 1476|       |
 1477|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1477:9): [True: 0, False: 6.40k]
  ------------------
 1478|      0|            *result_type =
 1479|      0|                array_bitset_container_xor(const_CAST_array(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1479:17): [True: 0, False: 0]
  ------------------
 1480|      0|                                           const_CAST_bitset(c1), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1481|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1482|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1483|      0|            return result;
 1484|       |
 1485|    608|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    608|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1485:9): [True: 608, False: 5.79k]
  ------------------
 1486|    608|            *result_type =
 1487|    608|                array_bitset_container_xor(const_CAST_array(c1),
  ------------------
  |  |   55|    608|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1487:17): [True: 560, False: 48]
  ------------------
 1488|    608|                                           const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|    608|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1489|    608|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|    560|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1490|    608|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    656|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1491|    608|            return result;
 1492|       |
 1493|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1493:9): [True: 0, False: 6.40k]
  ------------------
 1494|      0|            *result_type =
 1495|      0|                run_bitset_container_xor(const_CAST_run(c2),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1495:17): [True: 0, False: 0]
  ------------------
 1496|      0|                                         const_CAST_bitset(c1), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1497|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1498|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1499|      0|            return result;
 1500|       |
 1501|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1501:9): [True: 650, False: 5.75k]
  ------------------
 1502|    650|            *result_type =
 1503|    650|                run_bitset_container_xor(const_CAST_run(c1),
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1503:17): [True: 592, False: 58]
  ------------------
 1504|    650|                                         const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1505|    650|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|    592|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1506|    650|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    708|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1507|    650|            return result;
 1508|       |
 1509|    784|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    784|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    784|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1509:9): [True: 784, False: 5.61k]
  ------------------
 1510|    784|            *result_type = (uint8_t)array_run_container_xor(
 1511|    784|                const_CAST_array(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   55|    784|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    784|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1512|    784|            return result;
 1513|       |
 1514|  1.23k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.23k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.23k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1514:9): [True: 1.23k, False: 5.16k]
  ------------------
 1515|  1.23k|            *result_type = (uint8_t)array_run_container_xor(
 1516|  1.23k|                const_CAST_array(c2), const_CAST_run(c1), &result);
  ------------------
  |  |   55|  1.23k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1), &result);
  ------------------
  |  |   78|  1.23k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1517|  1.23k|            return result;
 1518|       |
 1519|      0|        default:
  ------------------
  |  Branch (1519:9): [True: 0, False: 6.40k]
  ------------------
 1520|      0|            assert(false);
 1521|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1522|      0|            return NULL;  // unreached
 1523|  6.40k|    }
 1524|  6.40k|}
roaring.c:container_ixor:
 1667|  66.7k|                                          uint8_t *result_type) {
 1668|  66.7k|    c1 = get_writable_copy_if_shared(c1, &type1);
 1669|  66.7k|    c2 = container_unwrap_shared(c2, &type2);
 1670|  66.7k|    container_t *result = NULL;
 1671|  66.7k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  66.7k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1672|  1.53k|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|  1.53k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  1.53k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  1.53k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1672:9): [True: 1.53k, False: 65.2k]
  ------------------
 1673|  1.53k|            *result_type = bitset_bitset_container_ixor(
  ------------------
  |  Branch (1673:28): [True: 0, False: 1.53k]
  ------------------
 1674|  1.53k|                               CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   52|  1.53k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.53k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|  1.53k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.53k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1675|  1.53k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1676|  1.53k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  3.07k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1677|  1.53k|            return result;
 1678|       |
 1679|  5.20k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  5.20k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  5.20k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  5.20k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1679:9): [True: 5.20k, False: 61.5k]
  ------------------
 1680|  5.20k|            *result_type = array_array_container_ixor(
  ------------------
  |  Branch (1680:28): [True: 0, False: 5.20k]
  ------------------
 1681|  5.20k|                               CAST_array(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   54|  5.20k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  5.20k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_array(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   55|  5.20k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  5.20k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1682|  5.20k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1683|  5.20k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  10.4k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1684|  5.20k|            return result;
 1685|       |
 1686|  59.5k|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|  59.5k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  59.5k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  59.5k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1686:9): [True: 59.5k, False: 7.23k]
  ------------------
 1687|  59.5k|            *result_type = (uint8_t)run_run_container_ixor(
 1688|  59.5k|                CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   77|  59.5k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  59.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|  59.5k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  59.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1689|  59.5k|            return result;
 1690|       |
 1691|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1691:9): [True: 0, False: 66.7k]
  ------------------
 1692|      0|            *result_type = bitset_array_container_ixor(
  ------------------
  |  Branch (1692:28): [True: 0, False: 0]
  ------------------
 1693|      0|                               CAST_bitset(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1694|      0|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1695|      0|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1696|      0|            return result;
 1697|       |
 1698|     92|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|     92|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|     92|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|     92|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1698:9): [True: 92, False: 66.6k]
  ------------------
 1699|     92|            *result_type = array_bitset_container_ixor(
  ------------------
  |  Branch (1699:28): [True: 0, False: 92]
  ------------------
 1700|     92|                               CAST_array(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   54|     92|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     92|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_array(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|     92|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     92|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1701|     92|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1702|     92|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    184|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1703|     92|            return result;
 1704|       |
 1705|     69|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|     69|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|     69|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|     69|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1705:9): [True: 69, False: 66.7k]
  ------------------
 1706|     69|            *result_type = bitset_run_container_ixor(
  ------------------
  |  Branch (1706:28): [True: 0, False: 69]
  ------------------
 1707|     69|                               CAST_bitset(c1), const_CAST_run(c2), &result)
  ------------------
  |  |   52|     69|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     69|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_run(c2), &result)
  ------------------
  |  |   78|     69|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     69|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1708|     69|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1709|     69|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    138|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1710|       |
 1711|     69|            return result;
 1712|       |
 1713|     12|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|     12|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|     12|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|     12|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1713:9): [True: 12, False: 66.7k]
  ------------------
 1714|     12|            *result_type = run_bitset_container_ixor(
  ------------------
  |  Branch (1714:28): [True: 0, False: 12]
  ------------------
 1715|     12|                               CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   77|     12|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     12|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|     12|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     12|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1716|     12|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1717|     12|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|     24|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1718|     12|            return result;
 1719|       |
 1720|    266|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    266|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    266|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    266|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1720:9): [True: 266, False: 66.5k]
  ------------------
 1721|    266|            *result_type = (uint8_t)array_run_container_ixor(
 1722|    266|                CAST_array(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   54|    266|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    266|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              CAST_array(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    266|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    266|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1723|    266|            return result;
 1724|       |
 1725|     57|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|     57|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|     57|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|     57|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1725:9): [True: 57, False: 66.7k]
  ------------------
 1726|     57|            *result_type = (uint8_t)run_array_container_ixor(
 1727|     57|                CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   77|     57|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     57|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   55|     57|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     57|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1728|     57|            return result;
 1729|       |
 1730|      0|        default:
  ------------------
  |  Branch (1730:9): [True: 0, False: 66.7k]
  ------------------
 1731|      0|            assert(false);
 1732|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1733|      0|            return NULL;
 1734|  66.7k|    }
 1735|  66.7k|}
roaring.c:container_andnot:
 1787|  6.40k|                                            uint8_t *result_type) {
 1788|  6.40k|    c1 = container_unwrap_shared(c1, &type1);
 1789|  6.40k|    c2 = container_unwrap_shared(c2, &type2);
 1790|  6.40k|    container_t *result = NULL;
 1791|  6.40k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.40k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1792|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1792:9): [True: 0, False: 6.40k]
  ------------------
 1793|      0|            *result_type =
 1794|      0|                bitset_bitset_container_andnot(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1794:17): [True: 0, False: 0]
  ------------------
 1795|      0|                                               const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1796|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1797|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1798|      0|            return result;
 1799|       |
 1800|  2.40k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.40k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1800:9): [True: 2.40k, False: 4.00k]
  ------------------
 1801|  2.40k|            result = array_container_create();
 1802|  2.40k|            array_array_container_andnot(
 1803|  2.40k|                const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_array(c2), CAST_array(result));
  ------------------
  |  |   54|  2.40k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1804|  2.40k|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1805|  2.40k|            return result;
 1806|       |
 1807|    725|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    725|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1807:9): [True: 725, False: 5.67k]
  ------------------
 1808|    725|            if (run_container_is_full(const_CAST_run(c2))) {
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1808:17): [True: 15, False: 710]
  ------------------
 1809|     15|                result = array_container_create();
 1810|     15|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|     15|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1811|     15|                return result;
 1812|     15|            }
 1813|    710|            *result_type = (uint8_t)run_run_container_andnot(
 1814|    710|                const_CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    710|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    710|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    710|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    710|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1815|    710|            return result;
 1816|       |
 1817|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1817:9): [True: 0, False: 6.40k]
  ------------------
 1818|      0|            *result_type =
 1819|      0|                bitset_array_container_andnot(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1819:17): [True: 0, False: 0]
  ------------------
 1820|      0|                                              const_CAST_array(c2), &result)
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1821|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1822|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1823|      0|            return result;
 1824|       |
 1825|    608|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    608|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1825:9): [True: 608, False: 5.79k]
  ------------------
 1826|    608|            result = array_container_create();
 1827|    608|            array_bitset_container_andnot(const_CAST_array(c1),
  ------------------
  |  |   55|    608|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1828|    608|                                          const_CAST_bitset(c2),
  ------------------
  |  |   53|    608|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1829|    608|                                          CAST_array(result));
  ------------------
  |  |   54|    608|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1830|    608|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1831|    608|            return result;
 1832|       |
 1833|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1833:9): [True: 0, False: 6.40k]
  ------------------
 1834|      0|            if (run_container_is_full(const_CAST_run(c2))) {
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1834:17): [True: 0, False: 0]
  ------------------
 1835|      0|                result = array_container_create();
 1836|      0|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1837|      0|                return result;
 1838|      0|            }
 1839|      0|            *result_type =
 1840|      0|                bitset_run_container_andnot(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1840:17): [True: 0, False: 0]
  ------------------
 1841|      0|                                            const_CAST_run(c2), &result)
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1842|      0|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1843|      0|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1844|      0|            return result;
 1845|       |
 1846|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1846:9): [True: 650, False: 5.75k]
  ------------------
 1847|    650|            *result_type =
 1848|    650|                run_bitset_container_andnot(const_CAST_run(c1),
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1848:17): [True: 0, False: 650]
  ------------------
 1849|    650|                                            const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1850|    650|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1851|    650|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.30k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1852|    650|            return result;
 1853|       |
 1854|    784|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    784|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    784|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1854:9): [True: 784, False: 5.61k]
  ------------------
 1855|    784|            if (run_container_is_full(const_CAST_run(c2))) {
  ------------------
  |  |   78|    784|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (1855:17): [True: 30, False: 754]
  ------------------
 1856|     30|                result = array_container_create();
 1857|     30|                *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|     30|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1858|     30|                return result;
 1859|     30|            }
 1860|    754|            result = array_container_create();
 1861|    754|            array_run_container_andnot(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   55|    754|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    754|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_run_container_andnot(const_CAST_array(c1), const_CAST_run(c2),
  ------------------
  |  |   78|    754|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    754|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1862|    754|                                       CAST_array(result));
  ------------------
  |  |   54|    754|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    754|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1863|    754|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    754|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1864|    754|            return result;
 1865|       |
 1866|  1.23k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.23k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.23k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1866:9): [True: 1.23k, False: 5.16k]
  ------------------
 1867|  1.23k|            *result_type = (uint8_t)run_array_container_andnot(
 1868|  1.23k|                const_CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   78|  1.23k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   55|  1.23k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1869|  1.23k|            return result;
 1870|       |
 1871|      0|        default:
  ------------------
  |  Branch (1871:9): [True: 0, False: 6.40k]
  ------------------
 1872|      0|            assert(false);
 1873|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1874|      0|            return NULL;  // unreached
 1875|  6.40k|    }
 1876|  6.40k|}
roaring.c:container_iandnot:
 1891|  6.06k|                                             uint8_t *result_type) {
 1892|  6.06k|    c1 = get_writable_copy_if_shared(c1, &type1);
 1893|  6.06k|    c2 = container_unwrap_shared(c2, &type2);
 1894|  6.06k|    container_t *result = NULL;
 1895|  6.06k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.06k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
 1896|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1896:9): [True: 0, False: 6.06k]
  ------------------
 1897|      0|            *result_type = bitset_bitset_container_iandnot(
  ------------------
  |  Branch (1897:28): [True: 0, False: 0]
  ------------------
 1898|      0|                               CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1899|      0|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1900|      0|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1901|      0|            return result;
 1902|       |
 1903|  3.57k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  3.57k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  3.57k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  3.57k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1903:9): [True: 3.57k, False: 2.49k]
  ------------------
 1904|  3.57k|            array_array_container_iandnot(CAST_array(c1), const_CAST_array(c2));
  ------------------
  |  |   54|  3.57k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  3.57k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_array_container_iandnot(CAST_array(c1), const_CAST_array(c2));
  ------------------
  |  |   55|  3.57k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.57k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1905|  3.57k|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  3.57k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1906|  3.57k|            return c1;
 1907|       |
 1908|    470|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    470|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    470|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    470|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1908:9): [True: 470, False: 5.59k]
  ------------------
 1909|    470|            *result_type = (uint8_t)run_run_container_iandnot(
 1910|    470|                CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   77|    470|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    470|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              CAST_run(c1), const_CAST_run(c2), &result);
  ------------------
  |  |   78|    470|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    470|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1911|    470|            return result;
 1912|       |
 1913|  1.21k|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|  1.21k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  1.21k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.21k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1913:9): [True: 1.21k, False: 4.85k]
  ------------------
 1914|  1.21k|            *result_type = bitset_array_container_iandnot(
  ------------------
  |  Branch (1914:28): [True: 1.03k, False: 180]
  ------------------
 1915|  1.21k|                               CAST_bitset(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   52|  1.21k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.21k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_array(c2), &result)
  ------------------
  |  |   55|  1.21k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.21k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1916|  1.21k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|  1.03k|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1917|  1.21k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.39k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1918|  1.21k|            return result;
 1919|       |
 1920|      0|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1920:9): [True: 0, False: 6.06k]
  ------------------
 1921|      0|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1922|      0|            array_bitset_container_iandnot(CAST_array(c1),
  ------------------
  |  |   54|      0|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1923|      0|                                           const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1924|      0|            return c1;
 1925|       |
 1926|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1926:9): [True: 0, False: 6.06k]
  ------------------
 1927|      0|            *result_type = bitset_run_container_iandnot(
  ------------------
  |  Branch (1927:28): [True: 0, False: 0]
  ------------------
 1928|      0|                               CAST_bitset(c1), const_CAST_run(c2), &result)
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_bitset(c1), const_CAST_run(c2), &result)
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1929|      0|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1930|      0|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1931|      0|            return result;
 1932|       |
 1933|      0|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (1933:9): [True: 0, False: 6.06k]
  ------------------
 1934|      0|            *result_type = run_bitset_container_iandnot(
  ------------------
  |  Branch (1934:28): [True: 0, False: 0]
  ------------------
 1935|      0|                               CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                                             CAST_run(c1), const_CAST_bitset(c2), &result)
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1936|      0|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
 1937|      0|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1938|      0|            return result;
 1939|       |
 1940|     47|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|     47|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|     47|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|     47|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (1940:9): [True: 47, False: 6.02k]
  ------------------
 1941|     47|            *result_type = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|     47|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 1942|     47|            array_run_container_iandnot(CAST_array(c1), const_CAST_run(c2));
  ------------------
  |  |   54|     47|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     47|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_run_container_iandnot(CAST_array(c1), const_CAST_run(c2));
  ------------------
  |  |   78|     47|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     47|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1943|     47|            return c1;
 1944|       |
 1945|    765|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|    765|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    765|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    765|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (1945:9): [True: 765, False: 5.30k]
  ------------------
 1946|    765|            *result_type = (uint8_t)run_array_container_iandnot(
 1947|    765|                CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   77|    765|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    765|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              CAST_run(c1), const_CAST_array(c2), &result);
  ------------------
  |  |   55|    765|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    765|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1948|    765|            return result;
 1949|       |
 1950|      0|        default:
  ------------------
  |  Branch (1950:9): [True: 0, False: 6.06k]
  ------------------
 1951|      0|            assert(false);
 1952|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1953|      0|            return NULL;
 1954|  6.06k|    }
 1955|  6.06k|}
roaring.c:container_rank:
 2209|  6.51k|                                 uint16_t x) {
 2210|  6.51k|    c = container_unwrap_shared(c, &type);
 2211|  6.51k|    switch (type) {
 2212|      0|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2212:9): [True: 0, False: 6.51k]
  ------------------
 2213|      0|            return bitset_container_rank(const_CAST_bitset(c), x);
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2214|  3.87k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.87k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2214:9): [True: 3.87k, False: 2.64k]
  ------------------
 2215|  3.87k|            return array_container_rank(const_CAST_array(c), x);
  ------------------
  |  |   55|  3.87k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2216|  2.64k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  2.64k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2216:9): [True: 2.64k, False: 3.87k]
  ------------------
 2217|  2.64k|            return run_container_rank(const_CAST_run(c), x);
  ------------------
  |  |   78|  2.64k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.64k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2218|      0|        default:
  ------------------
  |  Branch (2218:9): [True: 0, False: 6.51k]
  ------------------
 2219|      0|            assert(false);
 2220|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2221|  6.51k|    }
 2222|  6.51k|    assert(false);
 2223|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2224|      0|    return false;
 2225|  6.51k|}
roaring.c:container_shrink_to_fit:
  279|  6.51k|static inline int container_shrink_to_fit(container_t *c, uint8_t type) {
  280|  6.51k|    c = container_mutable_unwrap_shared(c, &type);
  281|  6.51k|    switch (type) {
  ------------------
  |  Branch (281:13): [True: 6.51k, False: 0]
  ------------------
  282|      0|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (282:9): [True: 0, False: 6.51k]
  ------------------
  283|      0|            return 0;  // no shrinking possible
  284|  3.87k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.87k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (284:9): [True: 3.87k, False: 2.64k]
  ------------------
  285|  3.87k|            return array_container_shrink_to_fit(CAST_array(c));
  ------------------
  |  |   54|  3.87k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  3.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  286|  2.64k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  2.64k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (286:9): [True: 2.64k, False: 3.87k]
  ------------------
  287|  2.64k|            return run_container_shrink_to_fit(CAST_run(c));
  ------------------
  |  |   77|  2.64k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.64k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  288|  6.51k|    }
  289|  6.51k|    assert(false);
  290|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  291|      0|    return 0;  // unreached
  292|  6.51k|}
roaring.c:container_iterate:
 1964|  68.4k|                                     void *ptr) {
 1965|  68.4k|    c = container_unwrap_shared(c, &type);
 1966|  68.4k|    switch (type) {
 1967|  1.64k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  1.64k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (1967:9): [True: 1.64k, False: 66.7k]
  ------------------
 1968|  1.64k|            return bitset_container_iterate(const_CAST_bitset(c), base,
  ------------------
  |  |   53|  1.64k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.64k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1969|  1.64k|                                            iterator, ptr);
 1970|  6.65k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  6.65k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (1970:9): [True: 6.65k, False: 61.7k]
  ------------------
 1971|  6.65k|            return array_container_iterate(const_CAST_array(c), base, iterator,
  ------------------
  |  |   55|  6.65k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  6.65k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1972|  6.65k|                                           ptr);
 1973|  60.1k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  60.1k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (1973:9): [True: 60.1k, False: 8.30k]
  ------------------
 1974|  60.1k|            return run_container_iterate(const_CAST_run(c), base, iterator,
  ------------------
  |  |   78|  60.1k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  60.1k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1975|  60.1k|                                         ptr);
 1976|      0|        default:
  ------------------
  |  Branch (1976:9): [True: 0, False: 68.4k]
  ------------------
 1977|      0|            assert(false);
 1978|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1979|  68.4k|    }
 1980|  68.4k|    assert(false);
 1981|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 1982|      0|    return false;
 1983|  68.4k|}
roaring.c:container_maximum:
 2171|  12.9k|static inline uint16_t container_maximum(const container_t *c, uint8_t type) {
 2172|  12.9k|    c = container_unwrap_shared(c, &type);
 2173|  12.9k|    switch (type) {
 2174|  2.48k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  2.48k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2174:9): [True: 2.48k, False: 10.4k]
  ------------------
 2175|  2.48k|            return bitset_container_maximum(const_CAST_bitset(c));
  ------------------
  |  |   53|  2.48k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.48k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2176|  8.97k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  8.97k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2176:9): [True: 8.97k, False: 3.93k]
  ------------------
 2177|  8.97k|            return array_container_maximum(const_CAST_array(c));
  ------------------
  |  |   55|  8.97k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  8.97k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2178|  1.45k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  1.45k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2178:9): [True: 1.45k, False: 11.4k]
  ------------------
 2179|  1.45k|            return run_container_maximum(const_CAST_run(c));
  ------------------
  |  |   78|  1.45k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.45k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2180|      0|        default:
  ------------------
  |  Branch (2180:9): [True: 0, False: 12.9k]
  ------------------
 2181|      0|            assert(false);
 2182|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2183|  12.9k|    }
 2184|  12.9k|    assert(false);
 2185|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2186|      0|    return false;
 2187|  12.9k|}
roaring.c:container_equals:
  620|  83.7k|                                    const container_t *c2, uint8_t type2) {
  621|  83.7k|    c1 = container_unwrap_shared(c1, &type1);
  622|  83.7k|    c2 = container_unwrap_shared(c2, &type2);
  623|  83.7k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  83.7k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  624|  72.5k|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|  72.5k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  72.5k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  72.5k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (624:9): [True: 72.5k, False: 11.2k]
  ------------------
  625|  72.5k|            return bitset_container_equals(const_CAST_bitset(c1),
  ------------------
  |  |   53|  72.5k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  72.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  626|  72.5k|                                           const_CAST_bitset(c2));
  ------------------
  |  |   53|  72.5k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  72.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  627|       |
  628|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (628:9): [True: 0, False: 83.7k]
  ------------------
  629|      0|            return run_container_equals_bitset(const_CAST_run(c2),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  630|      0|                                               const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  631|       |
  632|      0|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (632:9): [True: 0, False: 83.7k]
  ------------------
  633|      0|            return run_container_equals_bitset(const_CAST_run(c1),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  634|      0|                                               const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  635|       |
  636|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (636:9): [True: 0, False: 83.7k]
  ------------------
  637|       |            // java would always return false?
  638|      0|            return array_container_equal_bitset(const_CAST_array(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  639|      0|                                                const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  640|       |
  641|    433|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    433|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    433|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    433|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (641:9): [True: 433, False: 83.3k]
  ------------------
  642|       |            // java would always return false?
  643|    433|            return array_container_equal_bitset(const_CAST_array(c1),
  ------------------
  |  |   55|    433|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    433|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  644|    433|                                                const_CAST_bitset(c2));
  ------------------
  |  |   53|    433|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    433|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  645|       |
  646|    634|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    634|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    634|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    634|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (646:9): [True: 634, False: 83.1k]
  ------------------
  647|    634|            return run_container_equals_array(const_CAST_run(c2),
  ------------------
  |  |   78|    634|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    634|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  648|    634|                                              const_CAST_array(c1));
  ------------------
  |  |   55|    634|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    634|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  649|       |
  650|    257|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|    257|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    257|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    257|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (650:9): [True: 257, False: 83.5k]
  ------------------
  651|    257|            return run_container_equals_array(const_CAST_run(c1),
  ------------------
  |  |   78|    257|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    257|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  652|    257|                                              const_CAST_array(c2));
  ------------------
  |  |   55|    257|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    257|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  653|       |
  654|  9.78k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  9.78k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  9.78k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  9.78k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (654:9): [True: 9.78k, False: 74.0k]
  ------------------
  655|  9.78k|            return array_container_equals(const_CAST_array(c1),
  ------------------
  |  |   55|  9.78k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  9.78k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  656|  9.78k|                                          const_CAST_array(c2));
  ------------------
  |  |   55|  9.78k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  9.78k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  657|       |
  658|    138|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    138|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    138|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    138|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (658:9): [True: 138, False: 83.6k]
  ------------------
  659|    138|            return run_container_equals(const_CAST_run(c1), const_CAST_run(c2));
  ------------------
  |  |   78|    138|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    138|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          return run_container_equals(const_CAST_run(c1), const_CAST_run(c2));
  ------------------
  |  |   78|    138|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    138|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  660|       |
  661|      0|        default:
  ------------------
  |  Branch (661:9): [True: 0, False: 83.7k]
  ------------------
  662|      0|            assert(false);
  663|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  664|      0|            return false;
  665|  83.7k|    }
  666|  83.7k|}
roaring.c:container_is_subset:
  673|  11.2k|                                       const container_t *c2, uint8_t type2) {
  674|  11.2k|    c1 = container_unwrap_shared(c1, &type1);
  675|  11.2k|    c2 = container_unwrap_shared(c2, &type2);
  676|  11.2k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  11.2k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  677|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (677:9): [True: 0, False: 11.2k]
  ------------------
  678|      0|            return bitset_container_is_subset(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  679|      0|                                              const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  680|       |
  681|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (681:9): [True: 0, False: 11.2k]
  ------------------
  682|      0|            return bitset_container_is_subset_run(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  683|      0|                                                  const_CAST_run(c2));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  684|       |
  685|      0|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (685:9): [True: 0, False: 11.2k]
  ------------------
  686|      0|            return run_container_is_subset_bitset(const_CAST_run(c1),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  687|      0|                                                  const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  688|       |
  689|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (689:9): [True: 0, False: 11.2k]
  ------------------
  690|      0|            return false;  // by construction, size(c1) > size(c2)
  691|       |
  692|  2.43k|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|  2.43k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.43k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  2.43k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (692:9): [True: 2.43k, False: 8.78k]
  ------------------
  693|  2.43k|            return array_container_is_subset_bitset(const_CAST_array(c1),
  ------------------
  |  |   55|  2.43k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.43k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  694|  2.43k|                                                    const_CAST_bitset(c2));
  ------------------
  |  |   53|  2.43k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.43k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  695|       |
  696|  2.31k|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|  2.31k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.31k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  2.31k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (696:9): [True: 2.31k, False: 8.89k]
  ------------------
  697|  2.31k|            return array_container_is_subset_run(const_CAST_array(c1),
  ------------------
  |  |   55|  2.31k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.31k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  698|  2.31k|                                                 const_CAST_run(c2));
  ------------------
  |  |   78|  2.31k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.31k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  699|       |
  700|    380|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|    380|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    380|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    380|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (700:9): [True: 380, False: 10.8k]
  ------------------
  701|    380|            return run_container_is_subset_array(const_CAST_run(c1),
  ------------------
  |  |   78|    380|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    380|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  702|    380|                                                 const_CAST_array(c2));
  ------------------
  |  |   55|    380|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    380|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  703|       |
  704|  5.85k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  5.85k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  5.85k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  5.85k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (704:9): [True: 5.85k, False: 5.36k]
  ------------------
  705|  5.85k|            return array_container_is_subset(const_CAST_array(c1),
  ------------------
  |  |   55|  5.85k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  5.85k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  706|  5.85k|                                             const_CAST_array(c2));
  ------------------
  |  |   55|  5.85k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  5.85k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  707|       |
  708|    234|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    234|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    234|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    234|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (708:9): [True: 234, False: 10.9k]
  ------------------
  709|    234|            return run_container_is_subset(const_CAST_run(c1),
  ------------------
  |  |   78|    234|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    234|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  710|    234|                                           const_CAST_run(c2));
  ------------------
  |  |   78|    234|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    234|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  711|       |
  712|      0|        default:
  ------------------
  |  Branch (712:9): [True: 0, False: 11.2k]
  ------------------
  713|      0|            assert(false);
  714|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  715|      0|            return false;
  716|  11.2k|    }
  717|  11.2k|}
roaring.c:container_inot_range:
 2110|  6.67k|                                                uint8_t *result_type) {
 2111|  6.67k|    c = get_writable_copy_if_shared(c, &type);
 2112|  6.67k|    container_t *result = NULL;
 2113|  6.67k|    switch (type) {
 2114|  1.35k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  1.35k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2114:9): [True: 1.35k, False: 5.32k]
  ------------------
 2115|  1.35k|            *result_type = bitset_container_negation_range_inplace(
  ------------------
  |  Branch (2115:28): [True: 1.32k, False: 28]
  ------------------
 2116|  1.35k|                               CAST_bitset(c), range_start, range_end, &result)
  ------------------
  |  |   52|  1.35k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.35k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2117|  1.35k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|  1.32k|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2118|  1.35k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.37k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2119|  1.35k|            return result;
 2120|  3.58k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.58k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2120:9): [True: 3.58k, False: 3.09k]
  ------------------
 2121|  3.58k|            *result_type = array_container_negation_range_inplace(
  ------------------
  |  Branch (2121:28): [True: 132, False: 3.44k]
  ------------------
 2122|  3.58k|                               CAST_array(c), range_start, range_end, &result)
  ------------------
  |  |   54|  3.58k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  3.58k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2123|  3.58k|                               ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|    132|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2124|  3.58k|                               : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  7.02k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2125|  3.58k|            return result;
 2126|  1.74k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  1.74k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2126:9): [True: 1.74k, False: 4.93k]
  ------------------
 2127|  1.74k|            *result_type = (uint8_t)run_container_negation_range_inplace(
 2128|  1.74k|                CAST_run(c), range_start, range_end, &result);
  ------------------
  |  |   77|  1.74k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.74k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2129|  1.74k|            return result;
 2130|       |
 2131|      0|        default:
  ------------------
  |  Branch (2131:9): [True: 0, False: 6.67k]
  ------------------
 2132|      0|            assert(false);
 2133|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2134|  6.67k|    }
 2135|  6.67k|    assert(false);
 2136|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2137|      0|    return NULL;
 2138|  6.67k|}
roaring.c:container_inot:
 2076|  5.61k|                                          uint8_t *result_type) {
 2077|  5.61k|    c = get_writable_copy_if_shared(c, &type);
 2078|  5.61k|    container_t *result = NULL;
 2079|  5.61k|    switch (type) {
 2080|     66|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|     66|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2080:9): [True: 66, False: 5.54k]
  ------------------
 2081|     66|            *result_type =
 2082|     66|                bitset_container_negation_inplace(CAST_bitset(c), &result)
  ------------------
  |  |   52|     66|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     66|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  |  Branch (2082:17): [True: 31, False: 35]
  ------------------
 2083|     66|                    ? BITSET_CONTAINER_TYPE
  ------------------
  |  |   48|     31|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2084|     66|                    : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|    101|#define ARRAY_CONTAINER_TYPE 2
  ------------------
 2085|     66|            return result;
 2086|    165|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|    165|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2086:9): [True: 165, False: 5.44k]
  ------------------
 2087|       |            // will never be inplace
 2088|    165|            result = bitset_container_create();
 2089|    165|            *result_type = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|    165|#define BITSET_CONTAINER_TYPE 1
  ------------------
 2090|    165|            array_container_negation(CAST_array(c), CAST_bitset(result));
  ------------------
  |  |   54|    165|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    165|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                          array_container_negation(CAST_array(c), CAST_bitset(result));
  ------------------
  |  |   52|    165|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    165|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2091|    165|            array_container_free(CAST_array(c));
  ------------------
  |  |   54|    165|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    165|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2092|    165|            return result;
 2093|  5.38k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  5.38k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2093:9): [True: 5.38k, False: 231]
  ------------------
 2094|  5.38k|            *result_type =
 2095|  5.38k|                (uint8_t)run_container_negation_inplace(CAST_run(c), &result);
  ------------------
  |  |   77|  5.38k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  5.38k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2096|  5.38k|            return result;
 2097|       |
 2098|      0|        default:
  ------------------
  |  Branch (2098:9): [True: 0, False: 5.61k]
  ------------------
 2099|      0|            assert(false);
 2100|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2101|  5.61k|    }
 2102|  5.61k|    assert(false);
 2103|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2104|      0|    return NULL;
 2105|  5.61k|}
roaring.c:container_unwrap_shared:
  106|  1.15M|    const container_t *candidate_shared_container, uint8_t *type) {
  107|  1.15M|    if (*type == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  1.15M|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (107:9): [True: 0, False: 1.15M]
  ------------------
  108|      0|        *type = const_CAST_shared(candidate_shared_container)->typecode;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  109|      0|        assert(*type != SHARED_CONTAINER_TYPE);
  110|      0|        return const_CAST_shared(candidate_shared_container)->container;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  111|  1.15M|    } else {
  112|  1.15M|        return candidate_shared_container;
  113|  1.15M|    }
  114|  1.15M|}
roaring.c:container_mutable_unwrap_shared:
  118|  6.51k|                                                           uint8_t *type) {
  119|  6.51k|    if (*type == SHARED_CONTAINER_TYPE) {  // the passed in container is shared
  ------------------
  |  |   51|  6.51k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (119:9): [True: 0, False: 6.51k]
  ------------------
  120|      0|        *type = CAST_shared(c)->typecode;
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  121|      0|        assert(*type != SHARED_CONTAINER_TYPE);
  122|      0|        return CAST_shared(c)->container;  // return the enclosed container
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  123|  6.51k|    } else {
  124|  6.51k|        return c;  // wasn't shared, so return as-is
  125|  6.51k|    }
  126|  6.51k|}
roaring.c:container_minimum:
 2189|  6.47k|static inline uint16_t container_minimum(const container_t *c, uint8_t type) {
 2190|  6.47k|    c = container_unwrap_shared(c, &type);
 2191|  6.47k|    switch (type) {
 2192|  1.26k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  1.26k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2192:9): [True: 1.26k, False: 5.20k]
  ------------------
 2193|  1.26k|            return bitset_container_minimum(const_CAST_bitset(c));
  ------------------
  |  |   53|  1.26k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.26k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2194|  3.67k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.67k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2194:9): [True: 3.67k, False: 2.79k]
  ------------------
 2195|  3.67k|            return array_container_minimum(const_CAST_array(c));
  ------------------
  |  |   55|  3.67k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.67k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2196|  1.53k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  1.53k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2196:9): [True: 1.53k, False: 4.94k]
  ------------------
 2197|  1.53k|            return run_container_minimum(const_CAST_run(c));
  ------------------
  |  |   78|  1.53k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.53k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2198|      0|        default:
  ------------------
  |  Branch (2198:9): [True: 0, False: 6.47k]
  ------------------
 2199|      0|            assert(false);
 2200|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2201|  6.47k|    }
 2202|  6.47k|    assert(false);
 2203|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2204|      0|    return false;
 2205|  6.47k|}
roaring.c:container_select:
 2150|  6.51k|                                    uint32_t *element) {
 2151|  6.51k|    c = container_unwrap_shared(c, &type);
 2152|  6.51k|    switch (type) {
 2153|      0|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2153:9): [True: 0, False: 6.51k]
  ------------------
 2154|      0|            return bitset_container_select(const_CAST_bitset(c), start_rank,
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2155|      0|                                           rank, element);
 2156|  3.87k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  3.87k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2156:9): [True: 3.87k, False: 2.64k]
  ------------------
 2157|  3.87k|            return array_container_select(const_CAST_array(c), start_rank, rank,
  ------------------
  |  |   55|  3.87k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2158|  3.87k|                                          element);
 2159|  2.64k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  2.64k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2159:9): [True: 2.64k, False: 3.87k]
  ------------------
 2160|  2.64k|            return run_container_select(const_CAST_run(c), start_rank, rank,
  ------------------
  |  |   78|  2.64k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.64k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2161|  2.64k|                                        element);
 2162|      0|        default:
  ------------------
  |  Branch (2162:9): [True: 0, False: 6.51k]
  ------------------
 2163|      0|            assert(false);
 2164|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2165|  6.51k|    }
 2166|  6.51k|    assert(false);
 2167|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2168|      0|    return false;
 2169|  6.51k|}
roaring.c:container_intersect:
  865|  6.40k|                                       const container_t *c2, uint8_t type2) {
  866|  6.40k|    c1 = container_unwrap_shared(c1, &type1);
  867|  6.40k|    c2 = container_unwrap_shared(c2, &type2);
  868|  6.40k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  6.40k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  869|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (869:9): [True: 0, False: 6.40k]
  ------------------
  870|      0|            return bitset_container_intersect(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  871|      0|                                              const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  872|       |
  873|  2.40k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  2.40k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.40k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (873:9): [True: 2.40k, False: 4.00k]
  ------------------
  874|  2.40k|            return array_container_intersect(const_CAST_array(c1),
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  875|  2.40k|                                             const_CAST_array(c2));
  ------------------
  |  |   55|  2.40k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.40k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  876|       |
  877|    725|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|    725|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    725|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (877:9): [True: 725, False: 5.67k]
  ------------------
  878|    725|            return run_container_intersect(const_CAST_run(c1),
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  879|    725|                                           const_CAST_run(c2));
  ------------------
  |  |   78|    725|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    725|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  880|       |
  881|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (881:9): [True: 0, False: 6.40k]
  ------------------
  882|      0|            return array_bitset_container_intersect(const_CAST_array(c2),
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  883|      0|                                                    const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  884|       |
  885|    608|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|    608|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    608|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    608|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (885:9): [True: 608, False: 5.79k]
  ------------------
  886|    608|            return array_bitset_container_intersect(const_CAST_array(c1),
  ------------------
  |  |   55|    608|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  887|    608|                                                    const_CAST_bitset(c2));
  ------------------
  |  |   53|    608|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    608|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  888|       |
  889|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (889:9): [True: 0, False: 6.40k]
  ------------------
  890|      0|            return run_bitset_container_intersect(const_CAST_run(c2),
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  891|      0|                                                  const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  892|       |
  893|    650|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|    650|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    650|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|    650|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (893:9): [True: 650, False: 5.75k]
  ------------------
  894|    650|            return run_bitset_container_intersect(const_CAST_run(c1),
  ------------------
  |  |   78|    650|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  895|    650|                                                  const_CAST_bitset(c2));
  ------------------
  |  |   53|    650|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    650|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  896|       |
  897|    784|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|    784|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|    784|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|    784|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (897:9): [True: 784, False: 5.61k]
  ------------------
  898|    784|            return array_run_container_intersect(const_CAST_array(c1),
  ------------------
  |  |   55|    784|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  899|    784|                                                 const_CAST_run(c2));
  ------------------
  |  |   78|    784|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    784|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  900|       |
  901|  1.23k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  1.23k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  1.23k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  1.23k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (901:9): [True: 1.23k, False: 5.16k]
  ------------------
  902|  1.23k|            return array_run_container_intersect(const_CAST_array(c2),
  ------------------
  |  |   55|  1.23k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  903|  1.23k|                                                 const_CAST_run(c1));
  ------------------
  |  |   78|  1.23k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.23k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  904|       |
  905|      0|        default:
  ------------------
  |  Branch (905:9): [True: 0, False: 6.40k]
  ------------------
  906|      0|            assert(false);
  907|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  908|      0|            return 0;
  909|  6.40k|    }
  910|  6.40k|}
roaring.c:container_and_cardinality:
  814|  25.6k|                                            uint8_t type2) {
  815|  25.6k|    c1 = container_unwrap_shared(c1, &type1);
  816|  25.6k|    c2 = container_unwrap_shared(c2, &type2);
  817|  25.6k|    switch (PAIR_CONTAINER_TYPES(type1, type2)) {
  ------------------
  |  |   62|  25.6k|#define PAIR_CONTAINER_TYPES(type1, type2) (4 * (type1) + (type2))
  ------------------
  818|      0|        case CONTAINER_PAIR(BITSET, BITSET):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (818:9): [True: 0, False: 25.6k]
  ------------------
  819|      0|            return bitset_container_and_justcard(const_CAST_bitset(c1),
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  820|      0|                                                 const_CAST_bitset(c2));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  821|       |
  822|  9.60k|        case CONTAINER_PAIR(ARRAY, ARRAY):
  ------------------
  |  |   65|  9.60k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  9.60k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  9.60k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (822:9): [True: 9.60k, False: 16.0k]
  ------------------
  823|  9.60k|            return array_container_intersection_cardinality(
  824|  9.60k|                const_CAST_array(c1), const_CAST_array(c2));
  ------------------
  |  |   55|  9.60k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  9.60k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_array(c2));
  ------------------
  |  |   55|  9.60k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  9.60k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  825|       |
  826|  2.90k|        case CONTAINER_PAIR(RUN, RUN):
  ------------------
  |  |   65|  2.90k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  2.90k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  2.90k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (826:9): [True: 2.90k, False: 22.7k]
  ------------------
  827|  2.90k|            return run_container_intersection_cardinality(const_CAST_run(c1),
  ------------------
  |  |   78|  2.90k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.90k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  828|  2.90k|                                                          const_CAST_run(c2));
  ------------------
  |  |   78|  2.90k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.90k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  829|       |
  830|      0|        case CONTAINER_PAIR(BITSET, ARRAY):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|      0|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (830:9): [True: 0, False: 25.6k]
  ------------------
  831|      0|            return array_bitset_container_intersection_cardinality(
  832|      0|                const_CAST_array(c2), const_CAST_bitset(c1));
  ------------------
  |  |   55|      0|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  833|       |
  834|  2.43k|        case CONTAINER_PAIR(ARRAY, BITSET):
  ------------------
  |  |   65|  2.43k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  2.43k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  2.43k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (834:9): [True: 2.43k, False: 23.1k]
  ------------------
  835|  2.43k|            return array_bitset_container_intersection_cardinality(
  836|  2.43k|                const_CAST_array(c1), const_CAST_bitset(c2));
  ------------------
  |  |   55|  2.43k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.43k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_bitset(c2));
  ------------------
  |  |   53|  2.43k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.43k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  837|       |
  838|      0|        case CONTAINER_PAIR(BITSET, RUN):
  ------------------
  |  |   65|      0|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (838:9): [True: 0, False: 25.6k]
  ------------------
  839|      0|            return run_bitset_container_intersection_cardinality(
  840|      0|                const_CAST_run(c2), const_CAST_bitset(c1));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c2), const_CAST_bitset(c1));
  ------------------
  |  |   53|      0|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  841|       |
  842|  2.60k|        case CONTAINER_PAIR(RUN, BITSET):
  ------------------
  |  |   65|  2.60k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  2.60k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   48|  2.60k|#define BITSET_CONTAINER_TYPE 1
  |  |  ------------------
  ------------------
  |  Branch (842:9): [True: 2.60k, False: 23.0k]
  ------------------
  843|  2.60k|            return run_bitset_container_intersection_cardinality(
  844|  2.60k|                const_CAST_run(c1), const_CAST_bitset(c2));
  ------------------
  |  |   78|  2.60k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.60k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_run(c1), const_CAST_bitset(c2));
  ------------------
  |  |   53|  2.60k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.60k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  845|       |
  846|  3.13k|        case CONTAINER_PAIR(ARRAY, RUN):
  ------------------
  |  |   65|  3.13k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  3.13k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  3.13k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  ------------------
  |  Branch (846:9): [True: 3.13k, False: 22.4k]
  ------------------
  847|  3.13k|            return array_run_container_intersection_cardinality(
  848|  3.13k|                const_CAST_array(c1), const_CAST_run(c2));
  ------------------
  |  |   55|  3.13k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.13k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c1), const_CAST_run(c2));
  ------------------
  |  |   78|  3.13k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.13k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  849|       |
  850|  4.94k|        case CONTAINER_PAIR(RUN, ARRAY):
  ------------------
  |  |   65|  4.94k|    (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   50|  4.94k|#define RUN_CONTAINER_TYPE 3
  |  |  ------------------
  |  |                   (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE))
  |  |  ------------------
  |  |  |  |   49|  4.94k|#define ARRAY_CONTAINER_TYPE 2
  |  |  ------------------
  ------------------
  |  Branch (850:9): [True: 4.94k, False: 20.6k]
  ------------------
  851|  4.94k|            return array_run_container_intersection_cardinality(
  852|  4.94k|                const_CAST_array(c2), const_CAST_run(c1));
  ------------------
  |  |   55|  4.94k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  4.94k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
                              const_CAST_array(c2), const_CAST_run(c1));
  ------------------
  |  |   78|  4.94k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  4.94k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  853|       |
  854|      0|        default:
  ------------------
  |  Branch (854:9): [True: 0, False: 25.6k]
  ------------------
  855|      0|            assert(false);
  856|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  857|      0|            return 0;
  858|  25.6k|    }
  859|  25.6k|}
roaring.c:container_contains_range:
  596|    510|) {
  597|    510|    c = container_unwrap_shared(c, &typecode);
  598|    510|    switch (typecode) {
  599|    239|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|    239|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (599:9): [True: 239, False: 271]
  ------------------
  600|    239|            return bitset_container_get_range(const_CAST_bitset(c), range_start,
  ------------------
  |  |   53|    239|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    239|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  601|    239|                                              range_end);
  602|     94|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|     94|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (602:9): [True: 94, False: 416]
  ------------------
  603|     94|            return array_container_contains_range(const_CAST_array(c),
  ------------------
  |  |   55|     94|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|     94|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  604|     94|                                                  range_start, range_end);
  605|    177|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|    177|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (605:9): [True: 177, False: 333]
  ------------------
  606|    177|            return run_container_contains_range(const_CAST_run(c), range_start,
  ------------------
  |  |   78|    177|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    177|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  607|    177|                                                range_end);
  608|      0|        default:
  ------------------
  |  Branch (608:9): [True: 0, False: 510]
  ------------------
  609|      0|            assert(false);
  610|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  611|      0|            return false;
  612|    510|    }
  613|    510|}
roaring_array.c:container_to_uint32_array:
  465|  68.3k|                                            uint8_t typecode, uint32_t base) {
  466|  68.3k|    c = container_unwrap_shared(c, &typecode);
  467|  68.3k|    switch (typecode) {
  ------------------
  |  Branch (467:13): [True: 68.3k, False: 0]
  ------------------
  468|  1.50k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  1.50k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (468:9): [True: 1.50k, False: 66.8k]
  ------------------
  469|  1.50k|            return bitset_container_to_uint32_array(output,
  470|  1.50k|                                                    const_CAST_bitset(c), base);
  ------------------
  |  |   53|  1.50k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.50k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  471|  6.03k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  6.03k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (471:9): [True: 6.03k, False: 62.2k]
  ------------------
  472|  6.03k|            return array_container_to_uint32_array(output, const_CAST_array(c),
  ------------------
  |  |   55|  6.03k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  6.03k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  473|  6.03k|                                                   base);
  474|  60.7k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  60.7k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (474:9): [True: 60.7k, False: 7.54k]
  ------------------
  475|  60.7k|            return run_container_to_uint32_array(output, const_CAST_run(c),
  ------------------
  |  |   78|  60.7k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  60.7k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  476|  60.7k|                                                 base);
  477|  68.3k|    }
  478|  68.3k|    assert(false);
  479|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  480|      0|    return 0;  // unreached
  481|  68.3k|}
roaring_array.c:container_unwrap_shared:
  106|   394k|    const container_t *candidate_shared_container, uint8_t *type) {
  107|   394k|    if (*type == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|   394k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (107:9): [True: 0, False: 394k]
  ------------------
  108|      0|        *type = const_CAST_shared(candidate_shared_container)->typecode;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  109|      0|        assert(*type != SHARED_CONTAINER_TYPE);
  110|      0|        return const_CAST_shared(candidate_shared_container)->container;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  111|   394k|    } else {
  112|   394k|        return candidate_shared_container;
  113|   394k|    }
  114|   394k|}
roaring_array.c:get_container_type:
  129|   166k|static inline uint8_t get_container_type(const container_t *c, uint8_t type) {
  130|   166k|    if (type == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|   166k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (130:9): [True: 0, False: 166k]
  ------------------
  131|      0|        return const_CAST_shared(c)->typecode;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  132|   166k|    } else {
  133|   166k|        return type;
  134|   166k|    }
  135|   166k|}
roaring_array.c:container_size_in_bytes:
  403|   166k|                                              uint8_t typecode) {
  404|   166k|    c = container_unwrap_shared(c, &typecode);
  405|   166k|    switch (typecode) {
  ------------------
  |  Branch (405:13): [True: 166k, False: 0]
  ------------------
  406|   145k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|   145k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (406:9): [True: 145k, False: 21.1k]
  ------------------
  407|   145k|            return bitset_container_size_in_bytes(const_CAST_bitset(c));
  ------------------
  |  |   53|   145k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   145k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  408|  18.5k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  18.5k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (408:9): [True: 18.5k, False: 147k]
  ------------------
  409|  18.5k|            return array_container_size_in_bytes(const_CAST_array(c));
  ------------------
  |  |   55|  18.5k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  18.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  410|  2.64k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|  2.64k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (410:9): [True: 2.64k, False: 163k]
  ------------------
  411|  2.64k|            return run_container_size_in_bytes(const_CAST_run(c));
  ------------------
  |  |   78|  2.64k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  2.64k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  412|   166k|    }
  413|   166k|    assert(false);
  414|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  415|      0|    return 0;  // unreached
  416|   166k|}
roaring_array.c:container_get_cardinality:
  244|  79.8k|                                            uint8_t typecode) {
  245|  79.8k|    c = container_unwrap_shared(c, &typecode);
  246|  79.8k|    switch (typecode) {
  ------------------
  |  Branch (246:13): [True: 79.8k, False: 0]
  ------------------
  247|  72.5k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  72.5k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (247:9): [True: 72.5k, False: 7.31k]
  ------------------
  248|  72.5k|            return bitset_container_cardinality(const_CAST_bitset(c));
  ------------------
  |  |   53|  72.5k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  72.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  249|  7.31k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  7.31k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (249:9): [True: 7.31k, False: 72.5k]
  ------------------
  250|  7.31k|            return array_container_cardinality(const_CAST_array(c));
  ------------------
  |  |   55|  7.31k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  7.31k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  251|      0|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (251:9): [True: 0, False: 79.8k]
  ------------------
  252|      0|            return run_container_cardinality(const_CAST_run(c));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  253|  79.8k|    }
  254|  79.8k|    assert(false);
  255|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  256|      0|    return 0;  // unreached
  257|  79.8k|}
roaring_array.c:container_write:
  382|  79.8k|                                      char *buf) {
  383|  79.8k|    c = container_unwrap_shared(c, &typecode);
  384|  79.8k|    switch (typecode) {
  ------------------
  |  Branch (384:13): [True: 79.8k, False: 0]
  ------------------
  385|  72.5k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  72.5k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (385:9): [True: 72.5k, False: 7.31k]
  ------------------
  386|  72.5k|            return bitset_container_write(const_CAST_bitset(c), buf);
  ------------------
  |  |   53|  72.5k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  72.5k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  387|  7.31k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  7.31k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (387:9): [True: 7.31k, False: 72.5k]
  ------------------
  388|  7.31k|            return array_container_write(const_CAST_array(c), buf);
  ------------------
  |  |   55|  7.31k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  7.31k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  389|      0|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (389:9): [True: 0, False: 79.8k]
  ------------------
  390|      0|            return run_container_write(const_CAST_run(c), buf);
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  391|  79.8k|    }
  392|  79.8k|    assert(false);
  393|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  394|      0|    return 0;  // unreached
  395|  79.8k|}
containers.c:container_unwrap_shared:
  106|  6.66k|    const container_t *candidate_shared_container, uint8_t *type) {
  107|  6.66k|    if (*type == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  6.66k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (107:9): [True: 0, False: 6.66k]
  ------------------
  108|      0|        *type = const_CAST_shared(candidate_shared_container)->typecode;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  109|      0|        assert(*type != SHARED_CONTAINER_TYPE);
  110|      0|        return const_CAST_shared(candidate_shared_container)->container;
  ------------------
  |  |   80|      0|#define const_CAST_shared(c) CAST(const shared_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  111|  6.66k|    } else {
  112|  6.66k|        return candidate_shared_container;
  113|  6.66k|    }
  114|  6.66k|}
containers.c:container_maximum:
 2171|  6.42k|static inline uint16_t container_maximum(const container_t *c, uint8_t type) {
 2172|  6.42k|    c = container_unwrap_shared(c, &type);
 2173|  6.42k|    switch (type) {
 2174|  1.87k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|  1.87k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (2174:9): [True: 1.87k, False: 4.54k]
  ------------------
 2175|  1.87k|            return bitset_container_maximum(const_CAST_bitset(c));
  ------------------
  |  |   53|  1.87k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2176|  4.54k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  4.54k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (2176:9): [True: 4.54k, False: 1.87k]
  ------------------
 2177|  4.54k|            return array_container_maximum(const_CAST_array(c));
  ------------------
  |  |   55|  4.54k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  4.54k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2178|      0|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (2178:9): [True: 0, False: 6.42k]
  ------------------
 2179|      0|            return run_container_maximum(const_CAST_run(c));
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 2180|      0|        default:
  ------------------
  |  Branch (2180:9): [True: 0, False: 6.42k]
  ------------------
 2181|      0|            assert(false);
 2182|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2183|  6.42k|    }
 2184|  6.42k|    assert(false);
 2185|      0|    roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
 2186|      0|    return false;
 2187|  6.42k|}

run_container_contains:
  260|  8.45k|inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
  261|  8.45k|    int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
  262|  8.45k|    if (index >= 0) return true;
  ------------------
  |  Branch (262:9): [True: 5.77k, False: 2.68k]
  ------------------
  263|  2.68k|    index = -index - 2;  // points to preceding value, possibly -1
  264|  2.68k|    if (index != -1) {   // possible match
  ------------------
  |  Branch (264:9): [True: 927, False: 1.75k]
  ------------------
  265|    927|        int32_t offset = pos - run->runs[index].value;
  266|    927|        int32_t le = run->runs[index].length;
  267|    927|        if (offset <= le) return true;
  ------------------
  |  Branch (267:13): [True: 580, False: 347]
  ------------------
  268|    927|    }
  269|  2.10k|    return false;
  270|  2.68k|}
interleavedBinarySearch:
  112|   432k|                                       uint16_t ikey) {
  113|   432k|    int32_t low = 0;
  114|   432k|    int32_t high = lenarray - 1;
  115|  2.01M|    while (low <= high) {
  ------------------
  |  Branch (115:12): [True: 1.77M, False: 234k]
  ------------------
  116|  1.77M|        int32_t middleIndex = (low + high) >> 1;
  117|  1.77M|        uint16_t middleValue = array[middleIndex].value;
  118|  1.77M|        if (middleValue < ikey) {
  ------------------
  |  Branch (118:13): [True: 681k, False: 1.09M]
  ------------------
  119|   681k|            low = middleIndex + 1;
  120|  1.09M|        } else if (middleValue > ikey) {
  ------------------
  |  Branch (120:20): [True: 898k, False: 198k]
  ------------------
  121|   898k|            high = middleIndex - 1;
  122|   898k|        } else {
  123|   198k|            return middleIndex;
  124|   198k|        }
  125|  1.77M|    }
  126|   234k|    return -(low + 1);
  127|   432k|}
run_container_minimum:
  563|  19.1k|inline uint16_t run_container_minimum(const run_container_t *run) {
  564|  19.1k|    if (run->n_runs == 0) return 0;
  ------------------
  |  Branch (564:9): [True: 0, False: 19.1k]
  ------------------
  565|  19.1k|    return run->runs[0].value;
  566|  19.1k|}
run_container_maximum:
  569|  18.6k|inline uint16_t run_container_maximum(const run_container_t *run) {
  570|  18.6k|    if (run->n_runs == 0) return 0;
  ------------------
  |  Branch (570:9): [True: 0, False: 18.6k]
  ------------------
  571|  18.6k|    return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length;
  572|  18.6k|}
roaring.c:run_container_create_range:
  532|  91.7k|                                                          uint32_t stop) {
  533|  91.7k|    run_container_t *rc = run_container_create_given_capacity(1);
  534|  91.7k|    if (rc) {
  ------------------
  |  Branch (534:9): [True: 91.7k, False: 0]
  ------------------
  535|  91.7k|        rle16_t r;
  536|  91.7k|        r.value = (uint16_t)start;
  537|  91.7k|        r.length = (uint16_t)(stop - start - 1);
  538|  91.7k|        run_container_append_first(rc, r);
  539|  91.7k|    }
  540|  91.7k|    return rc;
  541|  91.7k|}
roaring.c:run_container_append_first:
  351|  91.7k|                                                 rle16_t vl) {
  352|  91.7k|    run->runs[run->n_runs] = vl;
  353|  91.7k|    run->n_runs++;
  354|  91.7k|    return vl;
  355|  91.7k|}
roaring.c:rle16_count_greater:
  178|    435|                                          int32_t lenarray, uint16_t key) {
  179|    435|    if (lenarray == 0) return 0;
  ------------------
  |  Branch (179:9): [True: 0, False: 435]
  ------------------
  180|    435|    int32_t low = 0;
  181|    435|    int32_t high = lenarray - 1;
  182|  3.29k|    while (low <= high) {
  ------------------
  |  Branch (182:12): [True: 2.90k, False: 395]
  ------------------
  183|  2.90k|        int32_t middleIndex = (low + high) >> 1;
  184|  2.90k|        uint16_t min_value = array[middleIndex].value;
  185|  2.90k|        uint16_t max_value =
  186|  2.90k|            array[middleIndex].value + array[middleIndex].length;
  187|  2.90k|        if (max_value < key) {
  ------------------
  |  Branch (187:13): [True: 2.64k, False: 263]
  ------------------
  188|  2.64k|            low = middleIndex + 1;
  189|  2.64k|        } else if (key + UINT32_C(1) < min_value) {  // uint32 arithmetic
  ------------------
  |  Branch (189:20): [True: 223, False: 40]
  ------------------
  190|    223|            high = middleIndex - 1;
  191|    223|        } else {
  192|     40|            return lenarray - (middleIndex + 1);
  193|     40|        }
  194|  2.90k|    }
  195|    395|    return lenarray - low;
  196|    435|}
roaring.c:rle16_count_less:
  157|    435|                                       uint16_t key) {
  158|    435|    if (lenarray == 0) return 0;
  ------------------
  |  Branch (158:9): [True: 2, False: 433]
  ------------------
  159|    433|    int32_t low = 0;
  160|    433|    int32_t high = lenarray - 1;
  161|  2.76k|    while (low <= high) {
  ------------------
  |  Branch (161:12): [True: 2.56k, False: 198]
  ------------------
  162|  2.56k|        int32_t middleIndex = (low + high) >> 1;
  163|  2.56k|        uint16_t min_value = array[middleIndex].value;
  164|  2.56k|        uint16_t max_value =
  165|  2.56k|            array[middleIndex].value + array[middleIndex].length;
  166|  2.56k|        if (max_value + UINT32_C(1) < key) {  // uint32 arithmetic
  ------------------
  |  Branch (166:13): [True: 1.16k, False: 1.39k]
  ------------------
  167|  1.16k|            low = middleIndex + 1;
  168|  1.39k|        } else if (key < min_value) {
  ------------------
  |  Branch (168:20): [True: 1.16k, False: 235]
  ------------------
  169|  1.16k|            high = middleIndex - 1;
  170|  1.16k|        } else {
  171|    235|            return middleIndex;
  172|    235|        }
  173|  2.56k|    }
  174|    198|    return low;
  175|    433|}
roaring.c:run_container_add_range_nruns:
  610|    435|                                                 int32_t nruns_greater) {
  611|    435|    int32_t nruns_common = run->n_runs - nruns_less - nruns_greater;
  612|    435|    if (nruns_common == 0) {
  ------------------
  |  Branch (612:9): [True: 123, False: 312]
  ------------------
  613|    123|        makeRoomAtIndex(run, (uint16_t)nruns_less);
  614|    123|        run->runs[nruns_less].value = (uint16_t)min;
  615|    123|        run->runs[nruns_less].length = (uint16_t)(max - min);
  616|    312|    } else {
  617|    312|        uint32_t common_min = run->runs[nruns_less].value;
  618|    312|        uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value +
  619|    312|                              run->runs[nruns_less + nruns_common - 1].length;
  620|    312|        uint32_t result_min = (common_min < min) ? common_min : min;
  ------------------
  |  Branch (620:31): [True: 73, False: 239]
  ------------------
  621|    312|        uint32_t result_max = (common_max > max) ? common_max : max;
  ------------------
  |  Branch (621:31): [True: 30, False: 282]
  ------------------
  622|       |
  623|    312|        run->runs[nruns_less].value = (uint16_t)result_min;
  624|    312|        run->runs[nruns_less].length = (uint16_t)(result_max - result_min);
  625|       |
  626|    312|        memmove(&(run->runs[nruns_less + 1]),
  627|    312|                &(run->runs[run->n_runs - nruns_greater]),
  628|    312|                nruns_greater * sizeof(rle16_t));
  629|    312|        run->n_runs = nruns_less + 1 + nruns_greater;
  630|    312|    }
  631|    435|}
roaring.c:makeRoomAtIndex:
  208|    843|static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) {
  209|       |    /* This function calls realloc + memmove sequentially to move by one index.
  210|       |     * Potentially copying twice the array.
  211|       |     */
  212|    843|    if (run->n_runs + 1 > run->capacity)
  ------------------
  |  Branch (212:9): [True: 703, False: 140]
  ------------------
  213|    703|        run_container_grow(run, run->n_runs + 1, true);
  214|    843|    memmove(run->runs + 1 + index, run->runs + index,
  215|    843|            (run->n_runs - index) * sizeof(rle16_t));
  216|    843|    run->n_runs++;
  217|    843|}
roaring.c:run_container_remove_range:
  667|  1.90k|                                              uint32_t min, uint32_t max) {
  668|  1.90k|    int32_t first = rle16_find_run(run->runs, run->n_runs, (uint16_t)min);
  669|  1.90k|    int32_t last = rle16_find_run(run->runs, run->n_runs, (uint16_t)max);
  670|       |
  671|  1.90k|    if (first >= 0 && min > run->runs[first].value &&
  ------------------
  |  Branch (671:9): [True: 704, False: 1.20k]
  |  Branch (671:23): [True: 272, False: 432]
  ------------------
  672|    272|        max < ((uint32_t)run->runs[first].value +
  ------------------
  |  Branch (672:9): [True: 37, False: 235]
  ------------------
  673|    272|               (uint32_t)run->runs[first].length)) {
  674|       |        // split this run into two adjacent runs
  675|       |
  676|       |        // right subinterval
  677|     37|        makeRoomAtIndex(run, (uint16_t)(first + 1));
  678|     37|        run->runs[first + 1].value = (uint16_t)(max + 1);
  679|     37|        run->runs[first + 1].length =
  680|     37|            (uint16_t)((run->runs[first].value + run->runs[first].length) -
  681|     37|                       (max + 1));
  682|       |
  683|       |        // left subinterval
  684|     37|        run->runs[first].length =
  685|     37|            (uint16_t)((min - 1) - run->runs[first].value);
  686|       |
  687|     37|        return;
  688|     37|    }
  689|       |
  690|       |    // update left-most partial run
  691|  1.86k|    if (first >= 0) {
  ------------------
  |  Branch (691:9): [True: 667, False: 1.20k]
  ------------------
  692|    667|        if (min > run->runs[first].value) {
  ------------------
  |  Branch (692:13): [True: 235, False: 432]
  ------------------
  693|    235|            run->runs[first].length =
  694|    235|                (uint16_t)((min - 1) - run->runs[first].value);
  695|    235|            first++;
  696|    235|        }
  697|  1.20k|    } else {
  698|  1.20k|        first = -first - 1;
  699|  1.20k|    }
  700|       |
  701|       |    // update right-most run
  702|  1.86k|    if (last >= 0) {
  ------------------
  |  Branch (702:9): [True: 658, False: 1.21k]
  ------------------
  703|    658|        uint16_t run_max = run->runs[last].value + run->runs[last].length;
  704|    658|        if (run_max > max) {
  ------------------
  |  Branch (704:13): [True: 433, False: 225]
  ------------------
  705|    433|            run->runs[last].value = (uint16_t)(max + 1);
  706|    433|            run->runs[last].length = (uint16_t)(run_max - (max + 1));
  707|    433|            last--;
  708|    433|        }
  709|  1.21k|    } else {
  710|  1.21k|        last = (-last - 1) - 1;
  711|  1.21k|    }
  712|       |
  713|       |    // remove intermediate runs
  714|  1.86k|    if (first <= last) {
  ------------------
  |  Branch (714:9): [True: 155, False: 1.71k]
  ------------------
  715|    155|        run_container_shift_tail(run, run->n_runs - (last + 1),
  716|    155|                                 -(last - first + 1));
  717|    155|    }
  718|  1.86k|}
roaring.c:rle16_find_run:
  133|  3.81k|                                     uint16_t ikey) {
  134|  3.81k|    int32_t low = 0;
  135|  3.81k|    int32_t high = lenarray - 1;
  136|  15.6k|    while (low <= high) {
  ------------------
  |  Branch (136:12): [True: 13.2k, False: 2.41k]
  ------------------
  137|  13.2k|        int32_t middleIndex = (low + high) >> 1;
  138|  13.2k|        uint16_t min = array[middleIndex].value;
  139|  13.2k|        uint16_t max = array[middleIndex].value + array[middleIndex].length;
  140|  13.2k|        if (ikey > max) {
  ------------------
  |  Branch (140:13): [True: 1.42k, False: 11.8k]
  ------------------
  141|  1.42k|            low = middleIndex + 1;
  142|  11.8k|        } else if (ikey < min) {
  ------------------
  |  Branch (142:20): [True: 10.4k, False: 1.39k]
  ------------------
  143|  10.4k|            high = middleIndex - 1;
  144|  10.4k|        } else {
  145|  1.39k|            return middleIndex;
  146|  1.39k|        }
  147|  13.2k|    }
  148|  2.41k|    return -(low + 1);
  149|  3.81k|}
roaring.c:run_container_shift_tail:
  650|    155|                                            int32_t distance) {
  651|    155|    if (distance > 0) {
  ------------------
  |  Branch (651:9): [True: 0, False: 155]
  ------------------
  652|      0|        if (run->capacity < count + distance) {
  ------------------
  |  Branch (652:13): [True: 0, False: 0]
  ------------------
  653|       |            run_container_grow(run, count + distance, true);
  654|      0|        }
  655|      0|    }
  656|    155|    int32_t srcpos = run->n_runs - count;
  657|    155|    int32_t dstpos = srcpos + distance;
  658|    155|    memmove(&(run->runs[dstpos]), &(run->runs[srcpos]),
  659|    155|            sizeof(rle16_t) * count);
  660|    155|    run->n_runs += distance;
  661|    155|}
roaring.c:run_container_remove:
  223|  3.19k|static inline bool run_container_remove(run_container_t *run, uint16_t pos) {
  224|  3.19k|    int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
  225|  3.19k|    if (index >= 0) {
  ------------------
  |  Branch (225:9): [True: 1.24k, False: 1.94k]
  ------------------
  226|  1.24k|        int32_t le = run->runs[index].length;
  227|  1.24k|        if (le == 0) {
  ------------------
  |  Branch (227:13): [True: 267, False: 975]
  ------------------
  228|    267|            recoverRoomAtIndex(run, (uint16_t)index);
  229|    975|        } else {
  230|    975|            run->runs[index].value++;
  231|    975|            run->runs[index].length--;
  232|    975|        }
  233|  1.24k|        return true;
  234|  1.24k|    }
  235|  1.94k|    index = -index - 2;  // points to preceding value, possibly -1
  236|  1.94k|    if (index >= 0) {    // possible match
  ------------------
  |  Branch (236:9): [True: 902, False: 1.04k]
  ------------------
  237|    902|        int32_t offset = pos - run->runs[index].value;
  238|    902|        int32_t le = run->runs[index].length;
  239|    902|        if (offset < le) {
  ------------------
  |  Branch (239:13): [True: 683, False: 219]
  ------------------
  240|       |            // need to break in two
  241|    683|            run->runs[index].length = (uint16_t)(offset - 1);
  242|       |            // need to insert
  243|    683|            uint16_t newvalue = pos + 1;
  244|    683|            int32_t newlength = le - offset - 1;
  245|    683|            makeRoomAtIndex(run, (uint16_t)(index + 1));
  246|    683|            run->runs[index + 1].value = newvalue;
  247|    683|            run->runs[index + 1].length = (uint16_t)newlength;
  248|    683|            return true;
  249|       |
  250|    683|        } else if (offset == le) {
  ------------------
  |  Branch (250:20): [True: 24, False: 195]
  ------------------
  251|     24|            run->runs[index].length--;
  252|     24|            return true;
  253|     24|        }
  254|    902|    }
  255|       |    // no match
  256|  1.24k|    return false;
  257|  1.94k|}
roaring.c:recoverRoomAtIndex:
  102|    267|static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) {
  103|    267|    memmove(run->runs + index, run->runs + (1 + index),
  104|    267|            (run->n_runs - index - 1) * sizeof(rle16_t));
  105|    267|    run->n_runs--;
  106|    267|}
roaring.c:run_container_nonzero_cardinality:
  309|  66.1k|    const run_container_t *run) {
  310|  66.1k|    return run->n_runs > 0;  // runs never empty
  311|  66.1k|}
roaring.c:run_container_is_full:
  394|  6.68k|static inline bool run_container_is_full(const run_container_t *run) {
  395|  6.68k|    rle16_t vl = run->runs[0];
  396|  6.68k|    return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
  ------------------
  |  Branch (396:12): [True: 1.61k, False: 5.06k]
  |  Branch (396:34): [True: 1.39k, False: 224]
  |  Branch (396:53): [True: 1.25k, False: 139]
  ------------------
  397|  6.68k|}
roaring.c:run_container_equals:
  502|    138|                                        const run_container_t *container2) {
  503|    138|    if (container1->n_runs != container2->n_runs) {
  ------------------
  |  Branch (503:9): [True: 101, False: 37]
  ------------------
  504|    101|        return false;
  505|    101|    }
  506|     37|    return memequals(container1->runs, container2->runs,
  507|     37|                     container1->n_runs * sizeof(rle16_t));
  508|    138|}
roaring.c:run_container_contains_range:
  278|    177|                                                uint32_t pos_end) {
  279|    177|    uint32_t count = 0;
  280|    177|    int32_t index =
  281|    177|        interleavedBinarySearch(run->runs, run->n_runs, (uint16_t)pos_start);
  282|    177|    if (index < 0) {
  ------------------
  |  Branch (282:9): [True: 95, False: 82]
  ------------------
  283|     95|        index = -index - 2;
  284|     95|        if ((index == -1) ||
  ------------------
  |  Branch (284:13): [True: 20, False: 75]
  ------------------
  285|     75|            ((pos_start - run->runs[index].value) > run->runs[index].length)) {
  ------------------
  |  Branch (285:13): [True: 13, False: 62]
  ------------------
  286|     33|            return false;
  287|     33|        }
  288|     95|    }
  289|  2.31k|    for (int32_t i = index; i < run->n_runs; ++i) {
  ------------------
  |  Branch (289:29): [True: 2.24k, False: 72]
  ------------------
  290|  2.24k|        const uint32_t stop = run->runs[i].value + run->runs[i].length;
  291|  2.24k|        if (run->runs[i].value >= pos_end) break;
  ------------------
  |  Branch (291:13): [True: 11, False: 2.23k]
  ------------------
  292|  2.23k|        if (stop >= pos_end) {
  ------------------
  |  Branch (292:13): [True: 61, False: 2.17k]
  ------------------
  293|     61|            count += (((pos_end - run->runs[i].value) > 0)
  ------------------
  |  Branch (293:23): [True: 61, False: 0]
  ------------------
  294|     61|                          ? (pos_end - run->runs[i].value)
  295|     61|                          : 0);
  296|     61|            break;
  297|     61|        }
  298|  2.17k|        const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0;
  ------------------
  |  Branch (298:30): [True: 2.16k, False: 12]
  ------------------
  299|  2.17k|        count += (min < run->runs[i].length) ? min : run->runs[i].length;
  ------------------
  |  Branch (299:18): [True: 51, False: 2.12k]
  ------------------
  300|  2.17k|    }
  301|    144|    return count >= (pos_end - pos_start - 1);
  302|    177|}
roaring_array.c:run_container_size_in_bytes:
  493|  2.64k|    const run_container_t *container) {
  494|  2.64k|    return run_container_serialized_size_in_bytes(container->n_runs);
  495|  2.64k|}
roaring_array.c:run_container_serialized_size_in_bytes:
  456|  2.64k|static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
  457|  2.64k|    return sizeof(uint16_t) +
  458|  2.64k|           sizeof(rle16_t) * num_runs;  // each run requires 2 2-byte entries.
  459|  2.64k|}
convert.c:run_container_serialized_size_in_bytes:
  456|   151k|static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
  457|   151k|    return sizeof(uint16_t) +
  458|   151k|           sizeof(rle16_t) * num_runs;  // each run requires 2 2-byte entries.
  459|   151k|}
mixed_intersection.c:run_container_is_full:
  394|  17.2k|static inline bool run_container_is_full(const run_container_t *run) {
  395|  17.2k|    rle16_t vl = run->runs[0];
  396|  17.2k|    return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
  ------------------
  |  Branch (396:12): [True: 1.38k, False: 15.8k]
  |  Branch (396:34): [True: 676, False: 709]
  |  Branch (396:53): [True: 180, False: 496]
  ------------------
  397|  17.2k|}
mixed_union.c:run_container_is_full:
  394|  2.89k|static inline bool run_container_is_full(const run_container_t *run) {
  395|  2.89k|    rle16_t vl = run->runs[0];
  396|  2.89k|    return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
  ------------------
  |  Branch (396:12): [True: 341, False: 2.55k]
  |  Branch (396:34): [True: 224, False: 117]
  |  Branch (396:53): [True: 97, False: 127]
  ------------------
  397|  2.89k|}
mixed_union.c:run_container_append_first:
  351|  2.15k|                                                 rle16_t vl) {
  352|  2.15k|    run->runs[run->n_runs] = vl;
  353|  2.15k|    run->n_runs++;
  354|  2.15k|    return vl;
  355|  2.15k|}
mixed_union.c:run_container_append_value_first:
  385|    646|                                                       uint16_t val) {
  386|    646|    rle16_t newrle = CROARING_MAKE_RLE16(val, 0);
  ------------------
  |  |   60|    646|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  387|    646|    run->runs[run->n_runs] = newrle;
  388|    646|    run->n_runs++;
  389|    646|    return newrle;
  390|    646|}
mixed_union.c:run_container_append:
  332|   168k|                                        rle16_t *previousrl) {
  333|   168k|    const uint32_t previousend = previousrl->value + previousrl->length;
  334|   168k|    if (vl.value > previousend + 1) {  // we add a new one
  ------------------
  |  Branch (334:9): [True: 156k, False: 11.9k]
  ------------------
  335|   156k|        run->runs[run->n_runs] = vl;
  336|   156k|        run->n_runs++;
  337|   156k|        *previousrl = vl;
  338|   156k|    } else {
  339|  11.9k|        uint32_t newend = vl.value + vl.length + UINT32_C(1);
  340|  11.9k|        if (newend > previousend) {  // we merge
  ------------------
  |  Branch (340:13): [True: 11.9k, False: 0]
  ------------------
  341|  11.9k|            previousrl->length = (uint16_t)(newend - 1 - previousrl->value);
  342|  11.9k|            run->runs[run->n_runs - 1] = *previousrl;
  343|  11.9k|        }
  344|  11.9k|    }
  345|   168k|}
mixed_union.c:run_container_append_value:
  368|   520k|                                              rle16_t *previousrl) {
  369|   520k|    const uint32_t previousend = previousrl->value + previousrl->length;
  370|   520k|    if (val > previousend + 1) {  // we add a new one
  ------------------
  |  Branch (370:9): [True: 54.3k, False: 465k]
  ------------------
  371|  54.3k|        *previousrl = CROARING_MAKE_RLE16(val, 0);
  ------------------
  |  |   60|  54.3k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  372|  54.3k|        run->runs[run->n_runs] = *previousrl;
  373|  54.3k|        run->n_runs++;
  374|   465k|    } else if (val == previousend + 1) {  // we merge
  ------------------
  |  Branch (374:16): [True: 168k, False: 297k]
  ------------------
  375|   168k|        previousrl->length++;
  376|   168k|        run->runs[run->n_runs - 1] = *previousrl;
  377|   168k|    }
  378|   520k|}
run.c:run_container_is_full:
  394|   134k|static inline bool run_container_is_full(const run_container_t *run) {
  395|   134k|    rle16_t vl = run->runs[0];
  396|   134k|    return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
  ------------------
  |  Branch (396:12): [True: 119k, False: 15.3k]
  |  Branch (396:34): [True: 117k, False: 1.87k]
  |  Branch (396:53): [True: 115k, False: 2.29k]
  ------------------
  397|   134k|}
run.c:run_container_serialized_size_in_bytes:
  456|     56|static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
  457|     56|    return sizeof(uint16_t) +
  458|     56|           sizeof(rle16_t) * num_runs;  // each run requires 2 2-byte entries.
  459|     56|}
run.c:recoverRoomAtIndex:
  102|  6.71k|static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) {
  103|  6.71k|    memmove(run->runs + index, run->runs + (1 + index),
  104|  6.71k|            (run->n_runs - index - 1) * sizeof(rle16_t));
  105|  6.71k|    run->n_runs--;
  106|  6.71k|}
run.c:makeRoomAtIndex:
  208|  45.4k|static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) {
  209|       |    /* This function calls realloc + memmove sequentially to move by one index.
  210|       |     * Potentially copying twice the array.
  211|       |     */
  212|  45.4k|    if (run->n_runs + 1 > run->capacity)
  ------------------
  |  Branch (212:9): [True: 2.59k, False: 42.8k]
  ------------------
  213|  2.59k|        run_container_grow(run, run->n_runs + 1, true);
  214|  45.4k|    memmove(run->runs + 1 + index, run->runs + index,
  215|  45.4k|            (run->n_runs - index) * sizeof(rle16_t));
  216|  45.4k|    run->n_runs++;
  217|  45.4k|}
run.c:run_container_append_first:
  351|  2.41k|                                                 rle16_t vl) {
  352|  2.41k|    run->runs[run->n_runs] = vl;
  353|  2.41k|    run->n_runs++;
  354|  2.41k|    return vl;
  355|  2.41k|}
run.c:run_container_append:
  332|   306k|                                        rle16_t *previousrl) {
  333|   306k|    const uint32_t previousend = previousrl->value + previousrl->length;
  334|   306k|    if (vl.value > previousend + 1) {  // we add a new one
  ------------------
  |  Branch (334:9): [True: 158k, False: 148k]
  ------------------
  335|   158k|        run->runs[run->n_runs] = vl;
  336|   158k|        run->n_runs++;
  337|   158k|        *previousrl = vl;
  338|   158k|    } else {
  339|   148k|        uint32_t newend = vl.value + vl.length + UINT32_C(1);
  340|   148k|        if (newend > previousend) {  // we merge
  ------------------
  |  Branch (340:13): [True: 114k, False: 34.4k]
  ------------------
  341|   114k|            previousrl->length = (uint16_t)(newend - 1 - previousrl->value);
  342|   114k|            run->runs[run->n_runs - 1] = *previousrl;
  343|   114k|        }
  344|   148k|    }
  345|   306k|}
run.c:run_container_empty:
  314|     15|static inline bool run_container_empty(const run_container_t *run) {
  315|     15|    return run->n_runs == 0;  // runs never empty
  316|     15|}
run.c:run_container_size_in_bytes:
  493|     56|    const run_container_t *container) {
  494|     56|    return run_container_serialized_size_in_bytes(container->n_runs);
  495|     56|}

roaring_trailing_zeroes:
  235|  69.0M|inline int roaring_trailing_zeroes(unsigned long long input_num) {
  236|  69.0M|    return __builtin_ctzll(input_num);
  237|  69.0M|}
roaring_leading_zeroes:
  239|  4.36k|inline int roaring_leading_zeroes(unsigned long long input_num) {
  240|  4.36k|    return __builtin_clzll(input_num);
  241|  4.36k|}
roaring.c:roaring_hamming:
  280|   199k|static inline int roaring_hamming(uint64_t x) {
  281|       |#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
  282|       |    CROARING_REGULAR_VISUAL_STUDIO
  283|       |#ifdef CROARING_USENEON
  284|       |    return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
  285|       |#elif defined(_M_ARM64)
  286|       |    return roaring_hamming_backup(x);
  287|       |    // (int) _CountOneBits64(x); is unavailable
  288|       |#else   // _M_ARM64
  289|       |    return (int)__popcnt64(x);
  290|       |#endif  // _M_ARM64
  291|       |#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
  292|       |    CROARING_REGULAR_VISUAL_STUDIO
  293|       |#ifdef _M_ARM
  294|       |    return roaring_hamming_backup(x);
  295|       |    // _CountOneBits is unavailable
  296|       |#else   // _M_ARM
  297|       |    return (int)__popcnt((unsigned int)x) +
  298|       |           (int)__popcnt((unsigned int)(x >> 32));
  299|       |#endif  // _M_ARM
  300|       |#else
  301|   199k|    return __builtin_popcountll(x);
  302|   199k|#endif
  303|   199k|}
mixed_intersection.c:roaring_hamming:
  280|   223k|static inline int roaring_hamming(uint64_t x) {
  281|       |#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
  282|       |    CROARING_REGULAR_VISUAL_STUDIO
  283|       |#ifdef CROARING_USENEON
  284|       |    return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
  285|       |#elif defined(_M_ARM64)
  286|       |    return roaring_hamming_backup(x);
  287|       |    // (int) _CountOneBits64(x); is unavailable
  288|       |#else   // _M_ARM64
  289|       |    return (int)__popcnt64(x);
  290|       |#endif  // _M_ARM64
  291|       |#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \
  292|       |    CROARING_REGULAR_VISUAL_STUDIO
  293|       |#ifdef _M_ARM
  294|       |    return roaring_hamming_backup(x);
  295|       |    // _CountOneBits is unavailable
  296|       |#else   // _M_ARM
  297|       |    return (int)__popcnt((unsigned int)x) +
  298|       |           (int)__popcnt((unsigned int)(x >> 32));
  299|       |#endif  // _M_ARM
  300|       |#else
  301|   223k|    return __builtin_popcountll(x);
  302|   223k|#endif
  303|   223k|}

roaring_bitmap_init_cleared:
   72|  32.5k|inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) {
   73|  32.5k|    roaring_bitmap_init_with_capacity(r, 0);
   74|  32.5k|}
roaring_bitmap_add_range:
  434|  6.51k|                                     uint64_t max) {
  435|  6.51k|    if (max <= min || min > (uint64_t)UINT32_MAX + 1) {
  ------------------
  |  Branch (435:9): [True: 3.65k, False: 2.86k]
  |  Branch (435:23): [True: 0, False: 2.86k]
  ------------------
  436|  3.65k|        return;
  437|  3.65k|    }
  438|  2.86k|    roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
  439|  2.86k|}
roaring_bitmap_remove_range:
  456|  6.51k|                                        uint64_t max) {
  457|  6.51k|    if (max <= min || min > (uint64_t)UINT32_MAX + 1) {
  ------------------
  |  Branch (457:9): [True: 5.50k, False: 1.00k]
  |  Branch (457:23): [True: 0, False: 1.00k]
  ------------------
  458|  5.50k|        return;
  459|  5.50k|    }
  460|  1.00k|    roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
  461|  1.00k|}
roaring_bitmap_contains:
  478|  6.58k|inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
  479|       |    // For performance reasons, this function is inline and uses internal
  480|       |    // functions directly.
  481|  6.58k|#ifdef __cplusplus
  482|  6.58k|    using namespace ::roaring::internal;
  483|  6.58k|#endif
  484|  6.58k|    const uint16_t hb = val >> 16;
  485|       |    /*
  486|       |     * the next function call involves a binary search and lots of branching.
  487|       |     */
  488|  6.58k|    int32_t i = ra_get_index(&r->high_low_container, hb);
  489|  6.58k|    if (i < 0) return false;
  ------------------
  |  Branch (489:9): [True: 678, False: 5.90k]
  ------------------
  490|       |
  491|  5.90k|    uint8_t typecode;
  492|       |    // next call ought to be cheap
  493|  5.90k|    container_t *container = ra_get_container_at_index(&r->high_low_container,
  494|  5.90k|                                                       (uint16_t)i, &typecode);
  495|       |    // rest might be a tad expensive, possibly involving another round of binary
  496|       |    // search
  497|  5.90k|    return container_contains(container, val & 0xFFFF, typecode);
  498|  6.58k|}
roaring_bitmap_set_copy_on_write:
  117|  60.1k|inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) {
  118|  60.1k|    if (cow) {
  ------------------
  |  Branch (118:9): [True: 0, False: 60.1k]
  ------------------
  119|      0|        r->high_low_container.flags |= ROARING_FLAG_COW;
  ------------------
  |  |   46|      0|#define ROARING_FLAG_COW UINT8_C(0x1)
  ------------------
  120|  60.1k|    } else {
  121|  60.1k|        if (roaring_bitmap_get_copy_on_write(r)) {
  ------------------
  |  Branch (121:13): [True: 0, False: 60.1k]
  ------------------
  122|      0|            roaring_unshare_all(r);
  123|      0|        }
  124|       |        r->high_low_container.flags &= ~ROARING_FLAG_COW;
  ------------------
  |  |   46|  60.1k|#define ROARING_FLAG_COW UINT8_C(0x1)
  ------------------
  125|  60.1k|    }
  126|  60.1k|}
roaring_bitmap_get_copy_on_write:
  114|  73.2k|inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) {
  115|       |    return r->high_low_container.flags & ROARING_FLAG_COW;
  ------------------
  |  |   46|  73.2k|#define ROARING_FLAG_COW UINT8_C(0x1)
  ------------------
  116|  73.2k|}

ra_get_index:
   93|  88.7k|inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
   94|  88.7k|    if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1;
  ------------------
  |  Branch (94:9): [True: 13.1k, False: 75.6k]
  |  Branch (94:28): [True: 33.0k, False: 42.6k]
  ------------------
   95|  42.6k|    return binarySearch(ra->keys, (int32_t)ra->size, x);
   96|  88.7k|}
ra_get_container_at_index:
  102|   580k|                                              uint16_t i, uint8_t *typecode) {
  103|   580k|    *typecode = ra->typecodes[i];
  104|   580k|    return ra->containers[i];
  105|   580k|}
ra_get_key_at_index:
  110|   434k|inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
  111|   434k|    return ra->keys[i];
  112|   434k|}
ra_set_container_at_index:
  175|   115k|                                      container_t *c, uint8_t typecode) {
  176|       |    assert(i < ra->size);
  177|   115k|    ra->containers[i] = c;
  178|   115k|    ra->typecodes[i] = typecode;
  179|   115k|}
ra_get_size:
  191|  13.7k|inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }
ra_replace_key_and_container_at_index:
  205|   157k|                                                  uint8_t typecode) {
  206|   157k|    assert(i < ra->size);
  207|       |
  208|   157k|    ra->keys[i] = key;
  209|   157k|    ra->containers[i] = c;
  210|   157k|    ra->typecodes[i] = typecode;
  211|   157k|}
roaring.c:ra_advance_until:
  194|    504|                                       int32_t pos) {
  195|    504|    return advanceUntil(ra->keys, pos, ra->size, x);
  196|    504|}
roaring.c:ra_unshare_container_at_index:
  268|  66.6k|                                                 uint16_t i) {
  269|       |    assert(i < ra->size);
  270|  66.6k|    ra->containers[i] =
  271|  66.6k|        get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]);
  272|  66.6k|}

intersect_vector16:
  386|  1.82k|                           size_t s_b, uint16_t *C) {
  387|  1.82k|    size_t count = 0;
  388|  1.82k|    size_t i_a = 0, i_b = 0;
  389|  1.82k|    const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
  390|  1.82k|    const size_t st_a = (s_a / vectorlength) * vectorlength;
  391|  1.82k|    const size_t st_b = (s_b / vectorlength) * vectorlength;
  392|  1.82k|    __m128i v_a, v_b;
  393|  1.82k|    if ((i_a < st_a) && (i_b < st_b)) {
  ------------------
  |  Branch (393:9): [True: 1.69k, False: 130]
  |  Branch (393:25): [True: 1.53k, False: 160]
  ------------------
  394|  1.53k|        v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  395|  1.53k|        v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  396|  4.31k|        while ((A[i_a] == 0) || (B[i_b] == 0)) {
  ------------------
  |  Branch (396:16): [True: 2.73k, False: 1.58k]
  |  Branch (396:33): [True: 183, False: 1.40k]
  ------------------
  397|  2.91k|            const __m128i res_v = _mm_cmpestrm(
  398|  2.91k|                v_b, vectorlength, v_a, vectorlength,
  399|  2.91k|                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  400|  2.91k|            const int r = _mm_extract_epi32(res_v, 0);
  401|  2.91k|            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
  402|  2.91k|            __m128i p = _mm_shuffle_epi8(v_a, sm16);
  403|  2.91k|            _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
  404|  2.91k|            count += _mm_popcnt_u32(r);
  405|  2.91k|            const uint16_t a_max = A[i_a + vectorlength - 1];
  406|  2.91k|            const uint16_t b_max = B[i_b + vectorlength - 1];
  407|  2.91k|            if (a_max <= b_max) {
  ------------------
  |  Branch (407:17): [True: 1.56k, False: 1.34k]
  ------------------
  408|  1.56k|                i_a += vectorlength;
  409|  1.56k|                if (i_a == st_a) break;
  ------------------
  |  Branch (409:21): [True: 102, False: 1.46k]
  ------------------
  410|  1.46k|                v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  411|  1.46k|            }
  412|  2.81k|            if (b_max <= a_max) {
  ------------------
  |  Branch (412:17): [True: 1.45k, False: 1.35k]
  ------------------
  413|  1.45k|                i_b += vectorlength;
  414|  1.45k|                if (i_b == st_b) break;
  ------------------
  |  Branch (414:21): [True: 34, False: 1.42k]
  ------------------
  415|  1.42k|                v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  416|  1.42k|            }
  417|  2.81k|        }
  418|  1.53k|        if ((i_a < st_a) && (i_b < st_b))
  ------------------
  |  Branch (418:13): [True: 1.43k, False: 102]
  |  Branch (418:29): [True: 1.40k, False: 34]
  ------------------
  419|  44.5k|            while (true) {
  ------------------
  |  Branch (419:20): [True: 44.5k, Folded]
  ------------------
  420|  44.5k|                const __m128i res_v = _mm_cmpistrm(
  421|  44.5k|                    v_b, v_a,
  422|  44.5k|                    _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  423|  44.5k|                const int r = _mm_extract_epi32(res_v, 0);
  424|  44.5k|                __m128i sm16 =
  425|  44.5k|                    _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
  426|  44.5k|                __m128i p = _mm_shuffle_epi8(v_a, sm16);
  427|  44.5k|                _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
  428|  44.5k|                count += _mm_popcnt_u32(r);
  429|  44.5k|                const uint16_t a_max = A[i_a + vectorlength - 1];
  430|  44.5k|                const uint16_t b_max = B[i_b + vectorlength - 1];
  431|  44.5k|                if (a_max <= b_max) {
  ------------------
  |  Branch (431:21): [True: 20.0k, False: 24.4k]
  ------------------
  432|  20.0k|                    i_a += vectorlength;
  433|  20.0k|                    if (i_a == st_a) break;
  ------------------
  |  Branch (433:25): [True: 835, False: 19.2k]
  ------------------
  434|  19.2k|                    v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  435|  19.2k|                }
  436|  43.7k|                if (b_max <= a_max) {
  ------------------
  |  Branch (436:21): [True: 26.0k, False: 17.6k]
  ------------------
  437|  26.0k|                    i_b += vectorlength;
  438|  26.0k|                    if (i_b == st_b) break;
  ------------------
  |  Branch (438:25): [True: 568, False: 25.5k]
  ------------------
  439|  25.5k|                    v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  440|  25.5k|                }
  441|  43.7k|            }
  442|  1.53k|    }
  443|       |    // intersect the tail using scalar intersection
  444|  28.7k|    while (i_a < s_a && i_b < s_b) {
  ------------------
  |  Branch (444:12): [True: 27.2k, False: 1.44k]
  |  Branch (444:25): [True: 26.8k, False: 385]
  ------------------
  445|  26.8k|        uint16_t a = A[i_a];
  446|  26.8k|        uint16_t b = B[i_b];
  447|  26.8k|        if (a < b) {
  ------------------
  |  Branch (447:13): [True: 10.7k, False: 16.1k]
  ------------------
  448|  10.7k|            i_a++;
  449|  16.1k|        } else if (b < a) {
  ------------------
  |  Branch (449:20): [True: 10.2k, False: 5.84k]
  ------------------
  450|  10.2k|            i_b++;
  451|  10.2k|        } else {
  452|  5.84k|            C[count] = a;  //==b;
  453|  5.84k|            count++;
  454|  5.84k|            i_a++;
  455|  5.84k|            i_b++;
  456|  5.84k|        }
  457|  26.8k|    }
  458|  1.82k|    return (int32_t)count;
  459|  1.82k|}
array_container_to_uint32_array_vector16:
  464|  6.03k|                                             uint32_t base) {
  465|  6.03k|    int outpos = 0;
  466|  6.03k|    uint32_t *out = (uint32_t *)vout;
  467|  6.03k|    size_t i = 0;
  468|   190k|    for (; i + sizeof(__m128i) / sizeof(uint16_t) <= cardinality;
  ------------------
  |  Branch (468:12): [True: 184k, False: 6.03k]
  ------------------
  469|   184k|         i += sizeof(__m128i) / sizeof(uint16_t)) {
  470|   184k|        __m128i vinput = _mm_loadu_si128((const __m128i *)(array + i));
  471|   184k|        __m256i voutput = _mm256_add_epi32(_mm256_cvtepu16_epi32(vinput),
  472|   184k|                                           _mm256_set1_epi32(base));
  473|   184k|        _mm256_storeu_si256((__m256i *)(out + outpos), voutput);
  474|   184k|        outpos += sizeof(__m256i) / sizeof(uint32_t);
  475|   184k|    }
  476|  20.6k|    for (; i < cardinality; ++i) {
  ------------------
  |  Branch (476:12): [True: 14.6k, False: 6.03k]
  ------------------
  477|  14.6k|        const uint32_t val = base + array[i];
  478|  14.6k|        memcpy(out + outpos, &val,
  479|  14.6k|               sizeof(uint32_t));  // should be compiled as a MOV on x64
  480|  14.6k|        outpos++;
  481|  14.6k|    }
  482|  6.03k|    return outpos;
  483|  6.03k|}
intersect_vector16_inplace:
  486|  4.73k|                                   size_t s_b) {
  487|  4.73k|    size_t count = 0;
  488|  4.73k|    size_t i_a = 0, i_b = 0;
  489|  4.73k|    const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
  490|  4.73k|    const size_t st_a = (s_a / vectorlength) * vectorlength;
  491|  4.73k|    const size_t st_b = (s_b / vectorlength) * vectorlength;
  492|  4.73k|    __m128i v_a, v_b;
  493|  4.73k|    if ((i_a < st_a) && (i_b < st_b)) {
  ------------------
  |  Branch (493:9): [True: 2.34k, False: 2.38k]
  |  Branch (493:25): [True: 2.15k, False: 193]
  ------------------
  494|  2.15k|        v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  495|  2.15k|        v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  496|  2.15k|        __m128i tmp[2] = {_mm_setzero_si128()};
  497|  2.15k|        size_t tmp_count = 0;
  498|  4.25k|        while ((A[i_a] == 0) || (B[i_b] == 0)) {
  ------------------
  |  Branch (498:16): [True: 1.90k, False: 2.35k]
  |  Branch (498:33): [True: 285, False: 2.07k]
  ------------------
  499|  2.18k|            const __m128i res_v = _mm_cmpestrm(
  500|  2.18k|                v_b, vectorlength, v_a, vectorlength,
  501|  2.18k|                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  502|  2.18k|            const int r = _mm_extract_epi32(res_v, 0);
  503|  2.18k|            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
  504|  2.18k|            __m128i p = _mm_shuffle_epi8(v_a, sm16);
  505|  2.18k|            _mm_storeu_si128((__m128i *)&((uint16_t *)tmp)[tmp_count], p);
  506|  2.18k|            tmp_count += _mm_popcnt_u32(r);
  507|  2.18k|            const uint16_t a_max = A[i_a + vectorlength - 1];
  508|  2.18k|            const uint16_t b_max = B[i_b + vectorlength - 1];
  509|  2.18k|            if (a_max <= b_max) {
  ------------------
  |  Branch (509:17): [True: 2.04k, False: 147]
  ------------------
  510|  2.04k|                _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
  511|  2.04k|                _mm_storeu_si128(tmp, _mm_setzero_si128());
  512|  2.04k|                count += tmp_count;
  513|  2.04k|                tmp_count = 0;
  514|  2.04k|                i_a += vectorlength;
  515|  2.04k|                if (i_a == st_a) break;
  ------------------
  |  Branch (515:21): [True: 74, False: 1.96k]
  ------------------
  516|  1.96k|                v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  517|  1.96k|            }
  518|  2.11k|            if (b_max <= a_max) {
  ------------------
  |  Branch (518:17): [True: 356, False: 1.75k]
  ------------------
  519|    356|                i_b += vectorlength;
  520|    356|                if (i_b == st_b) break;
  ------------------
  |  Branch (520:21): [True: 12, False: 344]
  ------------------
  521|    344|                v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  522|    344|            }
  523|  2.11k|        }
  524|  2.15k|        if ((i_a < st_a) && (i_b < st_b)) {
  ------------------
  |  Branch (524:13): [True: 2.08k, False: 74]
  |  Branch (524:29): [True: 2.07k, False: 12]
  ------------------
  525|   238k|            while (true) {
  ------------------
  |  Branch (525:20): [True: 238k, Folded]
  ------------------
  526|   238k|                const __m128i res_v = _mm_cmpistrm(
  527|   238k|                    v_b, v_a,
  528|   238k|                    _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  529|   238k|                const int r = _mm_extract_epi32(res_v, 0);
  530|   238k|                __m128i sm16 =
  531|   238k|                    _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
  532|   238k|                __m128i p = _mm_shuffle_epi8(v_a, sm16);
  533|   238k|                _mm_storeu_si128((__m128i *)&((uint16_t *)tmp)[tmp_count], p);
  534|   238k|                tmp_count += _mm_popcnt_u32(r);
  535|   238k|                const uint16_t a_max = A[i_a + vectorlength - 1];
  536|   238k|                const uint16_t b_max = B[i_b + vectorlength - 1];
  537|   238k|                if (a_max <= b_max) {
  ------------------
  |  Branch (537:21): [True: 138k, False: 100k]
  ------------------
  538|   138k|                    _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
  539|   138k|                    _mm_storeu_si128(tmp, _mm_setzero_si128());
  540|   138k|                    count += tmp_count;
  541|   138k|                    tmp_count = 0;
  542|   138k|                    i_a += vectorlength;
  543|   138k|                    if (i_a == st_a) break;
  ------------------
  |  Branch (543:25): [True: 1.15k, False: 137k]
  ------------------
  544|   137k|                    v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  545|   137k|                }
  546|   237k|                if (b_max <= a_max) {
  ------------------
  |  Branch (546:21): [True: 132k, False: 105k]
  ------------------
  547|   132k|                    i_b += vectorlength;
  548|   132k|                    if (i_b == st_b) break;
  ------------------
  |  Branch (548:25): [True: 915, False: 131k]
  ------------------
  549|   131k|                    v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  550|   131k|                }
  551|   237k|            }
  552|  2.07k|        }
  553|       |        // tmp_count <= 8, so this does not affect efficiency so much
  554|  4.74k|        for (size_t i = 0; i < tmp_count; i++) {
  ------------------
  |  Branch (554:28): [True: 2.58k, False: 2.15k]
  ------------------
  555|  2.58k|            A[count] = ((uint16_t *)tmp)[i];
  556|  2.58k|            count++;
  557|  2.58k|        }
  558|  2.15k|        i_a += tmp_count;  // We can at least jump pass $tmp_count elements in A
  559|  2.15k|    }
  560|       |    // intersect the tail using scalar intersection
  561|  33.6k|    while (i_a < s_a && i_b < s_b) {
  ------------------
  |  Branch (561:12): [True: 29.3k, False: 4.29k]
  |  Branch (561:25): [True: 28.8k, False: 443]
  ------------------
  562|  28.8k|        uint16_t a = A[i_a];
  563|  28.8k|        uint16_t b = B[i_b];
  564|  28.8k|        if (a < b) {
  ------------------
  |  Branch (564:13): [True: 9.37k, False: 19.5k]
  ------------------
  565|  9.37k|            i_a++;
  566|  19.5k|        } else if (b < a) {
  ------------------
  |  Branch (566:20): [True: 7.27k, False: 12.2k]
  ------------------
  567|  7.27k|            i_b++;
  568|  12.2k|        } else {
  569|  12.2k|            A[count] = a;  //==b;
  570|  12.2k|            count++;
  571|  12.2k|            i_a++;
  572|  12.2k|            i_b++;
  573|  12.2k|        }
  574|  28.8k|    }
  575|  4.73k|    return (int32_t)count;
  576|  4.73k|}
intersect_vector16_cardinality:
  581|  7.31k|                                       const uint16_t *B, size_t s_b) {
  582|  7.31k|    size_t count = 0;
  583|  7.31k|    size_t i_a = 0, i_b = 0;
  584|  7.31k|    const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
  585|  7.31k|    const size_t st_a = (s_a / vectorlength) * vectorlength;
  586|  7.31k|    const size_t st_b = (s_b / vectorlength) * vectorlength;
  587|  7.31k|    __m128i v_a, v_b;
  588|  7.31k|    if ((i_a < st_a) && (i_b < st_b)) {
  ------------------
  |  Branch (588:9): [True: 6.79k, False: 520]
  |  Branch (588:25): [True: 6.15k, False: 640]
  ------------------
  589|  6.15k|        v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  590|  6.15k|        v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  591|  17.2k|        while ((A[i_a] == 0) || (B[i_b] == 0)) {
  ------------------
  |  Branch (591:16): [True: 10.9k, False: 6.34k]
  |  Branch (591:33): [True: 732, False: 5.61k]
  ------------------
  592|  11.6k|            const __m128i res_v = _mm_cmpestrm(
  593|  11.6k|                v_b, vectorlength, v_a, vectorlength,
  594|  11.6k|                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  595|  11.6k|            const int r = _mm_extract_epi32(res_v, 0);
  596|  11.6k|            count += _mm_popcnt_u32(r);
  597|  11.6k|            const uint16_t a_max = A[i_a + vectorlength - 1];
  598|  11.6k|            const uint16_t b_max = B[i_b + vectorlength - 1];
  599|  11.6k|            if (a_max <= b_max) {
  ------------------
  |  Branch (599:17): [True: 6.26k, False: 5.39k]
  ------------------
  600|  6.26k|                i_a += vectorlength;
  601|  6.26k|                if (i_a == st_a) break;
  ------------------
  |  Branch (601:21): [True: 408, False: 5.86k]
  ------------------
  602|  5.86k|                v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  603|  5.86k|            }
  604|  11.2k|            if (b_max <= a_max) {
  ------------------
  |  Branch (604:17): [True: 5.82k, False: 5.43k]
  ------------------
  605|  5.82k|                i_b += vectorlength;
  606|  5.82k|                if (i_b == st_b) break;
  ------------------
  |  Branch (606:21): [True: 136, False: 5.68k]
  ------------------
  607|  5.68k|                v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  608|  5.68k|            }
  609|  11.2k|        }
  610|  6.15k|        if ((i_a < st_a) && (i_b < st_b))
  ------------------
  |  Branch (610:13): [True: 5.74k, False: 408]
  |  Branch (610:29): [True: 5.61k, False: 136]
  ------------------
  611|   178k|            while (true) {
  ------------------
  |  Branch (611:20): [True: 178k, Folded]
  ------------------
  612|   178k|                const __m128i res_v = _mm_cmpistrm(
  613|   178k|                    v_b, v_a,
  614|   178k|                    _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  615|   178k|                const int r = _mm_extract_epi32(res_v, 0);
  616|   178k|                count += _mm_popcnt_u32(r);
  617|   178k|                const uint16_t a_max = A[i_a + vectorlength - 1];
  618|   178k|                const uint16_t b_max = B[i_b + vectorlength - 1];
  619|   178k|                if (a_max <= b_max) {
  ------------------
  |  Branch (619:21): [True: 80.1k, False: 97.9k]
  ------------------
  620|  80.1k|                    i_a += vectorlength;
  621|  80.1k|                    if (i_a == st_a) break;
  ------------------
  |  Branch (621:25): [True: 3.34k, False: 76.8k]
  ------------------
  622|  76.8k|                    v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  623|  76.8k|                }
  624|   174k|                if (b_max <= a_max) {
  ------------------
  |  Branch (624:21): [True: 104k, False: 70.5k]
  ------------------
  625|   104k|                    i_b += vectorlength;
  626|   104k|                    if (i_b == st_b) break;
  ------------------
  |  Branch (626:25): [True: 2.27k, False: 102k]
  ------------------
  627|   102k|                    v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  628|   102k|                }
  629|   174k|            }
  630|  6.15k|    }
  631|       |    // intersect the tail using scalar intersection
  632|   114k|    while (i_a < s_a && i_b < s_b) {
  ------------------
  |  Branch (632:12): [True: 109k, False: 5.77k]
  |  Branch (632:25): [True: 107k, False: 1.54k]
  ------------------
  633|   107k|        uint16_t a = A[i_a];
  634|   107k|        uint16_t b = B[i_b];
  635|   107k|        if (a < b) {
  ------------------
  |  Branch (635:13): [True: 43.0k, False: 64.5k]
  ------------------
  636|  43.0k|            i_a++;
  637|  64.5k|        } else if (b < a) {
  ------------------
  |  Branch (637:20): [True: 41.1k, False: 23.3k]
  ------------------
  638|  41.1k|            i_b++;
  639|  41.1k|        } else {
  640|  23.3k|            count++;
  641|  23.3k|            i_a++;
  642|  23.3k|            i_b++;
  643|  23.3k|        }
  644|   107k|    }
  645|  7.31k|    return (int32_t)count;
  646|  7.31k|}
difference_vector16:
  655|  2.40k|                            size_t s_b, uint16_t *C) {
  656|       |    // we handle the degenerate case
  657|  2.40k|    if (s_a == 0) return 0;
  ------------------
  |  Branch (657:9): [True: 0, False: 2.40k]
  ------------------
  658|  2.40k|    if (s_b == 0) {
  ------------------
  |  Branch (658:9): [True: 0, False: 2.40k]
  ------------------
  659|      0|        if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a);
  ------------------
  |  Branch (659:13): [True: 0, False: 0]
  ------------------
  660|      0|        return (int32_t)s_a;
  661|      0|    }
  662|       |    // handle the leading zeroes, it is messy but it allows us to use the fast
  663|       |    // _mm_cmpistrm instrinsic safely
  664|  2.40k|    int32_t count = 0;
  665|  2.40k|    if ((A[0] == 0) || (B[0] == 0)) {
  ------------------
  |  Branch (665:9): [True: 2.20k, False: 197]
  |  Branch (665:24): [True: 5, False: 192]
  ------------------
  666|  2.20k|        if ((A[0] == 0) && (B[0] == 0)) {
  ------------------
  |  Branch (666:13): [True: 2.20k, False: 5]
  |  Branch (666:28): [True: 330, False: 1.87k]
  ------------------
  667|    330|            A++;
  668|    330|            s_a--;
  669|    330|            B++;
  670|    330|            s_b--;
  671|  1.87k|        } else if (A[0] == 0) {
  ------------------
  |  Branch (671:20): [True: 1.87k, False: 5]
  ------------------
  672|  1.87k|            C[count++] = 0;
  673|  1.87k|            A++;
  674|  1.87k|            s_a--;
  675|  1.87k|        } else {
  676|      5|            B++;
  677|      5|            s_b--;
  678|      5|        }
  679|  2.20k|    }
  680|       |    // at this point, we have two non-empty arrays, made of non-zero
  681|       |    // increasing values.
  682|  2.40k|    size_t i_a = 0, i_b = 0;
  683|  2.40k|    const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t);
  684|  2.40k|    const size_t st_a = (s_a / vectorlength) * vectorlength;
  685|  2.40k|    const size_t st_b = (s_b / vectorlength) * vectorlength;
  686|  2.40k|    if ((i_a < st_a) && (i_b < st_b)) {  // this is the vectorized code path
  ------------------
  |  Branch (686:9): [True: 1.98k, False: 418]
  |  Branch (686:25): [True: 1.67k, False: 310]
  ------------------
  687|  1.67k|        __m128i v_a, v_b;                //, v_bmax;
  688|       |        // we load a vector from A and a vector from B
  689|  1.67k|        v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  690|  1.67k|        v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  691|       |        // we have a runningmask which indicates which values from A have been
  692|       |        // spotted in B, these don't get written out.
  693|  1.67k|        __m128i runningmask_a_found_in_b = _mm_setzero_si128();
  694|       |        /****
  695|       |         * start of the main vectorized loop
  696|       |         *****/
  697|  48.9k|        while (true) {
  ------------------
  |  Branch (697:16): [True: 48.9k, Folded]
  ------------------
  698|       |            // afoundinb will contain a mask indicate for each entry in A
  699|       |            // whether it is seen
  700|       |            // in B
  701|  48.9k|            const __m128i a_found_in_b = _mm_cmpistrm(
  702|  48.9k|                v_b, v_a,
  703|  48.9k|                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  704|  48.9k|            runningmask_a_found_in_b =
  705|  48.9k|                _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
  706|       |            // we always compare the last values of A and B
  707|  48.9k|            const uint16_t a_max = A[i_a + vectorlength - 1];
  708|  48.9k|            const uint16_t b_max = B[i_b + vectorlength - 1];
  709|  48.9k|            if (a_max <= b_max) {
  ------------------
  |  Branch (709:17): [True: 21.9k, False: 26.9k]
  ------------------
  710|       |                // Ok. In this code path, we are ready to write our v_a
  711|       |                // because there is no need to read more from B, they will
  712|       |                // all be large values.
  713|  21.9k|                const int bitmask_belongs_to_difference =
  714|  21.9k|                    _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
  715|       |                /*** next few lines are probably expensive *****/
  716|  21.9k|                __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
  717|  21.9k|                                               bitmask_belongs_to_difference);
  718|  21.9k|                __m128i p = _mm_shuffle_epi8(v_a, sm16);
  719|  21.9k|                _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
  720|  21.9k|                count += _mm_popcnt_u32(bitmask_belongs_to_difference);
  721|       |                // we advance a
  722|  21.9k|                i_a += vectorlength;
  723|  21.9k|                if (i_a == st_a)  // no more
  ------------------
  |  Branch (723:21): [True: 1.21k, False: 20.7k]
  ------------------
  724|  1.21k|                    break;
  725|  20.7k|                runningmask_a_found_in_b = _mm_setzero_si128();
  726|  20.7k|                v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
  727|  20.7k|            }
  728|  47.7k|            if (b_max <= a_max) {
  ------------------
  |  Branch (728:17): [True: 30.1k, False: 17.5k]
  ------------------
  729|       |                // in this code path, the current v_b has become useless
  730|  30.1k|                i_b += vectorlength;
  731|  30.1k|                if (i_b == st_b) break;
  ------------------
  |  Branch (731:21): [True: 461, False: 29.7k]
  ------------------
  732|  29.7k|                v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
  733|  29.7k|            }
  734|  47.7k|        }
  735|       |        // at this point, either we have i_a == st_a, which is the end of the
  736|       |        // vectorized processing,
  737|       |        // or we have i_b == st_b,  and we are not done processing the vector...
  738|       |        // so we need to finish it off.
  739|  1.67k|        if (i_a < st_a) {        // we have unfinished business...
  ------------------
  |  Branch (739:13): [True: 461, False: 1.21k]
  ------------------
  740|    461|            uint16_t buffer[8];  // buffer to do a masked load
  741|    461|            memset(buffer, 0, 8 * sizeof(uint16_t));
  742|    461|            memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
  743|    461|            v_b = _mm_lddqu_si128((__m128i *)buffer);
  744|    461|            const __m128i a_found_in_b = _mm_cmpistrm(
  745|    461|                v_b, v_a,
  746|    461|                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
  747|    461|            runningmask_a_found_in_b =
  748|    461|                _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
  749|    461|            const int bitmask_belongs_to_difference =
  750|    461|                _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
  751|    461|            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
  752|    461|                                           bitmask_belongs_to_difference);
  753|    461|            __m128i p = _mm_shuffle_epi8(v_a, sm16);
  754|    461|            _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
  755|    461|            count += _mm_popcnt_u32(bitmask_belongs_to_difference);
  756|    461|            i_a += vectorlength;
  757|    461|        }
  758|       |        // at this point we should have i_a == st_a and i_b == st_b
  759|  1.67k|    }
  760|       |    // do the tail using scalar code
  761|   105k|    while (i_a < s_a && i_b < s_b) {
  ------------------
  |  Branch (761:12): [True: 104k, False: 1.84k]
  |  Branch (761:25): [True: 103k, False: 553]
  ------------------
  762|   103k|        uint16_t a = A[i_a];
  763|   103k|        uint16_t b = B[i_b];
  764|   103k|        if (b < a) {
  ------------------
  |  Branch (764:13): [True: 71.4k, False: 32.0k]
  ------------------
  765|  71.4k|            i_b++;
  766|  71.4k|        } else if (a < b) {
  ------------------
  |  Branch (766:20): [True: 26.3k, False: 5.76k]
  ------------------
  767|  26.3k|            C[count] = a;
  768|  26.3k|            count++;
  769|  26.3k|            i_a++;
  770|  26.3k|        } else {  //==
  771|  5.76k|            i_a++;
  772|  5.76k|            i_b++;
  773|  5.76k|        }
  774|   103k|    }
  775|  2.40k|    if (i_a < s_a) {
  ------------------
  |  Branch (775:9): [True: 553, False: 1.84k]
  ------------------
  776|    553|        if (C == A) {
  ------------------
  |  Branch (776:13): [True: 0, False: 553]
  ------------------
  777|      0|            assert((size_t)count <= i_a);
  778|      0|            if ((size_t)count < i_a) {
  ------------------
  |  Branch (778:17): [True: 0, False: 0]
  ------------------
  779|      0|                memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
  780|      0|            }
  781|    553|        } else {
  782|  27.9k|            for (size_t i = 0; i < (s_a - i_a); i++) {
  ------------------
  |  Branch (782:32): [True: 27.3k, False: 553]
  ------------------
  783|  27.3k|                C[count + i] = A[i + i_a];
  784|  27.3k|            }
  785|    553|        }
  786|    553|        count += (int32_t)(s_a - i_a);
  787|    553|    }
  788|  2.40k|    return count;
  789|  2.40k|}
intersect_skewed_uint16:
  855|    792|                                uint16_t *buffer) {
  856|    792|    size_t pos = 0, idx_l = 0, idx_s = 0;
  857|       |
  858|    792|    if (0 == size_s) {
  ------------------
  |  Branch (858:9): [True: 0, False: 792]
  ------------------
  859|      0|        return 0;
  860|      0|    }
  861|    792|    int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
  862|  2.16k|    while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
  ------------------
  |  Branch (862:12): [True: 1.37k, False: 791]
  |  Branch (862:37): [True: 1.37k, False: 1]
  ------------------
  863|  1.37k|        uint16_t target1 = small[idx_s];
  864|  1.37k|        uint16_t target2 = small[idx_s + 1];
  865|  1.37k|        uint16_t target3 = small[idx_s + 2];
  866|  1.37k|        uint16_t target4 = small[idx_s + 3];
  867|  1.37k|        binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1,
  868|  1.37k|                      target2, target3, target4, &index1, &index2, &index3,
  869|  1.37k|                      &index4);
  870|  1.37k|        if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
  ------------------
  |  Branch (870:13): [True: 1.37k, False: 1]
  |  Branch (870:42): [True: 651, False: 723]
  ------------------
  871|    651|            buffer[pos++] = target1;
  872|    651|        }
  873|  1.37k|        if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
  ------------------
  |  Branch (873:13): [True: 1.37k, False: 3]
  |  Branch (873:42): [True: 818, False: 554]
  ------------------
  874|    818|            buffer[pos++] = target2;
  875|    818|        }
  876|  1.37k|        if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
  ------------------
  |  Branch (876:13): [True: 1.37k, False: 4]
  |  Branch (876:42): [True: 829, False: 542]
  ------------------
  877|    829|            buffer[pos++] = target3;
  878|    829|        }
  879|  1.37k|        if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
  ------------------
  |  Branch (879:13): [True: 1.36k, False: 8]
  |  Branch (879:42): [True: 777, False: 590]
  ------------------
  880|    777|            buffer[pos++] = target4;
  881|    777|        }
  882|  1.37k|        idx_s += 4;
  883|  1.37k|        idx_l += index4;
  884|  1.37k|    }
  885|    792|    if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
  ------------------
  |  Branch (885:9): [True: 410, False: 382]
  |  Branch (885:34): [True: 408, False: 2]
  ------------------
  886|    408|        uint16_t target1 = small[idx_s];
  887|    408|        uint16_t target2 = small[idx_s + 1];
  888|    408|        binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1,
  889|    408|                      target2, &index1, &index2);
  890|    408|        if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
  ------------------
  |  Branch (890:13): [True: 403, False: 5]
  |  Branch (890:42): [True: 265, False: 138]
  ------------------
  891|    265|            buffer[pos++] = target1;
  892|    265|        }
  893|    408|        if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
  ------------------
  |  Branch (893:13): [True: 386, False: 22]
  |  Branch (893:42): [True: 273, False: 113]
  ------------------
  894|    273|            buffer[pos++] = target2;
  895|    273|        }
  896|    408|        idx_s += 2;
  897|    408|        idx_l += index2;
  898|    408|    }
  899|    792|    if ((idx_s < size_s) && (idx_l < size_l)) {
  ------------------
  |  Branch (899:9): [True: 362, False: 430]
  |  Branch (899:29): [True: 357, False: 5]
  ------------------
  900|    357|        uint16_t val_s = small[idx_s];
  901|    357|        int32_t index =
  902|    357|            binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
  903|    357|        if (index >= 0) buffer[pos++] = val_s;
  ------------------
  |  Branch (903:13): [True: 253, False: 104]
  ------------------
  904|    357|    }
  905|    792|    return (int32_t)pos;
  906|    792|}
intersect_skewed_uint16_cardinality:
  912|  2.28k|                                            size_t size_l) {
  913|  2.28k|    size_t pos = 0, idx_l = 0, idx_s = 0;
  914|       |
  915|  2.28k|    if (0 == size_s) {
  ------------------
  |  Branch (915:9): [True: 0, False: 2.28k]
  ------------------
  916|      0|        return 0;
  917|      0|    }
  918|       |
  919|  2.28k|    uint16_t val_l = large[idx_l], val_s = small[idx_s];
  920|       |
  921|  31.0k|    while (true) {
  ------------------
  |  Branch (921:12): [True: 31.0k, Folded]
  ------------------
  922|  31.0k|        if (val_l < val_s) {
  ------------------
  |  Branch (922:13): [True: 6.34k, False: 24.7k]
  ------------------
  923|  6.34k|            idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
  924|  6.34k|            if (idx_l == size_l) break;
  ------------------
  |  Branch (924:17): [True: 140, False: 6.20k]
  ------------------
  925|  6.20k|            val_l = large[idx_l];
  926|  24.7k|        } else if (val_s < val_l) {
  ------------------
  |  Branch (926:20): [True: 10.9k, False: 13.7k]
  ------------------
  927|  10.9k|            idx_s++;
  928|  10.9k|            if (idx_s == size_s) break;
  ------------------
  |  Branch (928:17): [True: 932, False: 10.0k]
  ------------------
  929|  10.0k|            val_s = small[idx_s];
  930|  13.7k|        } else {
  931|  13.7k|            pos++;
  932|  13.7k|            idx_s++;
  933|  13.7k|            if (idx_s == size_s) break;
  ------------------
  |  Branch (933:17): [True: 1.13k, False: 12.6k]
  ------------------
  934|  12.6k|            val_s = small[idx_s];
  935|  12.6k|            idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
  936|  12.6k|            if (idx_l == size_l) break;
  ------------------
  |  Branch (936:17): [True: 80, False: 12.5k]
  ------------------
  937|  12.5k|            val_l = large[idx_l];
  938|  12.5k|        }
  939|  31.0k|    }
  940|       |
  941|  2.28k|    return (int32_t)pos;
  942|  2.28k|}
intersect_skewed_uint16_nonempty:
  945|    571|                                      const uint16_t *large, size_t size_l) {
  946|    571|    size_t idx_l = 0, idx_s = 0;
  947|       |
  948|    571|    if (0 == size_s) {
  ------------------
  |  Branch (948:9): [True: 0, False: 571]
  ------------------
  949|      0|        return false;
  950|      0|    }
  951|       |
  952|    571|    uint16_t val_l = large[idx_l], val_s = small[idx_s];
  953|       |
  954|  2.14k|    while (true) {
  ------------------
  |  Branch (954:12): [True: 2.14k, Folded]
  ------------------
  955|  2.14k|        if (val_l < val_s) {
  ------------------
  |  Branch (955:13): [True: 813, False: 1.33k]
  ------------------
  956|    813|            idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
  957|    813|            if (idx_l == size_l) break;
  ------------------
  |  Branch (957:17): [True: 33, False: 780]
  ------------------
  958|    780|            val_l = large[idx_l];
  959|  1.33k|        } else if (val_s < val_l) {
  ------------------
  |  Branch (959:20): [True: 875, False: 455]
  ------------------
  960|    875|            idx_s++;
  961|    875|            if (idx_s == size_s) break;
  ------------------
  |  Branch (961:17): [True: 83, False: 792]
  ------------------
  962|    792|            val_s = small[idx_s];
  963|    792|        } else {
  964|    455|            return true;
  965|    455|        }
  966|  2.14k|    }
  967|       |
  968|    116|    return false;
  969|    571|}
intersect_uint16_nonempty:
 1025|  1.82k|                               const uint16_t *B, const size_t lenB) {
 1026|  1.82k|    if (lenA == 0 || lenB == 0) return 0;
  ------------------
  |  Branch (1026:9): [True: 0, False: 1.82k]
  |  Branch (1026:22): [True: 0, False: 1.82k]
  ------------------
 1027|  1.82k|    const uint16_t *endA = A + lenA;
 1028|  1.82k|    const uint16_t *endB = B + lenB;
 1029|       |
 1030|  1.82k|    while (1) {
  ------------------
  |  Branch (1030:12): [True: 1.82k, Folded]
  ------------------
 1031|  10.5k|        while (*A < *B) {
  ------------------
  |  Branch (1031:16): [True: 8.18k, False: 2.36k]
  ------------------
 1032|  8.76k|        SKIP_FIRST_COMPARE:
 1033|  8.76k|            if (++A == endA) return false;
  ------------------
  |  Branch (1033:17): [True: 44, False: 8.72k]
  ------------------
 1034|  8.76k|        }
 1035|  5.36k|        while (*A > *B) {
  ------------------
  |  Branch (1035:16): [True: 3.01k, False: 2.35k]
  ------------------
 1036|  3.01k|            if (++B == endB) return false;
  ------------------
  |  Branch (1036:17): [True: 11, False: 2.99k]
  ------------------
 1037|  3.01k|        }
 1038|  2.35k|        if (*A == *B) {
  ------------------
  |  Branch (1038:13): [True: 1.77k, False: 581]
  ------------------
 1039|  1.77k|            return true;
 1040|  1.77k|        } else {
 1041|    581|            goto SKIP_FIRST_COMPARE;
 1042|    581|        }
 1043|  2.35k|    }
 1044|      0|    return false;  // NOTREACHED
 1045|  1.82k|}
union_uint16:
 1105|  4.79k|                    size_t size_2, uint16_t *buffer) {
 1106|  4.79k|    size_t pos = 0, idx_1 = 0, idx_2 = 0;
 1107|       |
 1108|  4.79k|    if (0 == size_2) {
  ------------------
  |  Branch (1108:9): [True: 69, False: 4.72k]
  ------------------
 1109|     69|        memmove(buffer, set_1, size_1 * sizeof(uint16_t));
 1110|     69|        return size_1;
 1111|     69|    }
 1112|  4.72k|    if (0 == size_1) {
  ------------------
  |  Branch (1112:9): [True: 0, False: 4.72k]
  ------------------
 1113|      0|        memmove(buffer, set_2, size_2 * sizeof(uint16_t));
 1114|      0|        return size_2;
 1115|      0|    }
 1116|       |
 1117|  4.72k|    uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
 1118|       |
 1119|   211k|    while (true) {
  ------------------
  |  Branch (1119:12): [True: 211k, Folded]
  ------------------
 1120|   211k|        if (val_1 < val_2) {
  ------------------
  |  Branch (1120:13): [True: 8.07k, False: 203k]
  ------------------
 1121|  8.07k|            buffer[pos++] = val_1;
 1122|  8.07k|            ++idx_1;
 1123|  8.07k|            if (idx_1 >= size_1) break;
  ------------------
  |  Branch (1123:17): [True: 403, False: 7.67k]
  ------------------
 1124|  7.67k|            val_1 = set_1[idx_1];
 1125|   203k|        } else if (val_2 < val_1) {
  ------------------
  |  Branch (1125:20): [True: 170k, False: 33.3k]
  ------------------
 1126|   170k|            buffer[pos++] = val_2;
 1127|   170k|            ++idx_2;
 1128|   170k|            if (idx_2 >= size_2) break;
  ------------------
  |  Branch (1128:17): [True: 200, False: 169k]
  ------------------
 1129|   169k|            val_2 = set_2[idx_2];
 1130|   169k|        } else {
 1131|  33.3k|            buffer[pos++] = val_1;
 1132|  33.3k|            ++idx_1;
 1133|  33.3k|            ++idx_2;
 1134|  33.3k|            if (idx_1 >= size_1 || idx_2 >= size_2) break;
  ------------------
  |  Branch (1134:17): [True: 3.98k, False: 29.4k]
  |  Branch (1134:36): [True: 136, False: 29.2k]
  ------------------
 1135|  29.2k|            val_1 = set_1[idx_1];
 1136|  29.2k|            val_2 = set_2[idx_2];
 1137|  29.2k|        }
 1138|   211k|    }
 1139|       |
 1140|  4.72k|    if (idx_1 < size_1) {
  ------------------
  |  Branch (1140:9): [True: 336, False: 4.38k]
  ------------------
 1141|    336|        const size_t n_elems = size_1 - idx_1;
 1142|    336|        memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t));
 1143|    336|        pos += n_elems;
 1144|  4.38k|    } else if (idx_2 < size_2) {
  ------------------
  |  Branch (1144:16): [True: 1.87k, False: 2.50k]
  ------------------
 1145|  1.87k|        const size_t n_elems = size_2 - idx_2;
 1146|  1.87k|        memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t));
 1147|  1.87k|        pos += n_elems;
 1148|  1.87k|    }
 1149|       |
 1150|  4.72k|    return pos;
 1151|  4.72k|}
difference_uint16:
 1154|  3.57k|                      int length2, uint16_t *a_out) {
 1155|  3.57k|    int out_card = 0;
 1156|  3.57k|    int k1 = 0, k2 = 0;
 1157|  3.57k|    if (length1 == 0) return 0;
  ------------------
  |  Branch (1157:9): [True: 0, False: 3.57k]
  ------------------
 1158|  3.57k|    if (length2 == 0) {
  ------------------
  |  Branch (1158:9): [True: 0, False: 3.57k]
  ------------------
 1159|      0|        if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1);
  ------------------
  |  Branch (1159:13): [True: 0, False: 0]
  ------------------
 1160|      0|        return length1;
 1161|      0|    }
 1162|  3.57k|    uint16_t s1 = a1[k1];
 1163|  3.57k|    uint16_t s2 = a2[k2];
 1164|   446k|    while (true) {
  ------------------
  |  Branch (1164:12): [True: 446k, Folded]
  ------------------
 1165|   446k|        if (s1 < s2) {
  ------------------
  |  Branch (1165:13): [True: 220k, False: 226k]
  ------------------
 1166|   220k|            a_out[out_card++] = s1;
 1167|   220k|            ++k1;
 1168|   220k|            if (k1 >= length1) {
  ------------------
  |  Branch (1168:17): [True: 0, False: 220k]
  ------------------
 1169|      0|                break;
 1170|      0|            }
 1171|   220k|            s1 = a1[k1];
 1172|   226k|        } else if (s1 == s2) {
  ------------------
  |  Branch (1172:20): [True: 226k, False: 0]
  ------------------
 1173|   226k|            ++k1;
 1174|   226k|            ++k2;
 1175|   226k|            if (k1 >= length1) {
  ------------------
  |  Branch (1175:17): [True: 2.31k, False: 223k]
  ------------------
 1176|  2.31k|                break;
 1177|  2.31k|            }
 1178|   223k|            if (k2 >= length2) {
  ------------------
  |  Branch (1178:17): [True: 1.25k, False: 222k]
  ------------------
 1179|  1.25k|                memmove(a_out + out_card, a1 + k1,
 1180|  1.25k|                        sizeof(uint16_t) * (length1 - k1));
 1181|  1.25k|                return out_card + length1 - k1;
 1182|  1.25k|            }
 1183|   222k|            s1 = a1[k1];
 1184|   222k|            s2 = a2[k2];
 1185|   222k|        } else {  // if (val1>val2)
 1186|      0|            ++k2;
 1187|      0|            if (k2 >= length2) {
  ------------------
  |  Branch (1187:17): [True: 0, False: 0]
  ------------------
 1188|      0|                memmove(a_out + out_card, a1 + k1,
 1189|      0|                        sizeof(uint16_t) * (length1 - k1));
 1190|      0|                return out_card + length1 - k1;
 1191|      0|            }
 1192|      0|            s2 = a2[k2];
 1193|      0|        }
 1194|   446k|    }
 1195|  2.31k|    return out_card;
 1196|  3.57k|}
xor_uint16:
 1199|  8.23k|                   const uint16_t *array_2, int32_t card_2, uint16_t *out) {
 1200|  8.23k|    int32_t pos1 = 0, pos2 = 0, pos_out = 0;
 1201|   226k|    while (pos1 < card_1 && pos2 < card_2) {
  ------------------
  |  Branch (1201:12): [True: 218k, False: 7.61k]
  |  Branch (1201:29): [True: 217k, False: 619]
  ------------------
 1202|   217k|        const uint16_t v1 = array_1[pos1];
 1203|   217k|        const uint16_t v2 = array_2[pos2];
 1204|   217k|        if (v1 == v2) {
  ------------------
  |  Branch (1204:13): [True: 41.2k, False: 176k]
  ------------------
 1205|  41.2k|            ++pos1;
 1206|  41.2k|            ++pos2;
 1207|  41.2k|            continue;
 1208|  41.2k|        }
 1209|   176k|        if (v1 < v2) {
  ------------------
  |  Branch (1209:13): [True: 31.8k, False: 144k]
  ------------------
 1210|  31.8k|            out[pos_out++] = v1;
 1211|  31.8k|            ++pos1;
 1212|   144k|        } else {
 1213|   144k|            out[pos_out++] = v2;
 1214|   144k|            ++pos2;
 1215|   144k|        }
 1216|   176k|    }
 1217|  8.23k|    if (pos1 < card_1) {
  ------------------
  |  Branch (1217:9): [True: 619, False: 7.61k]
  ------------------
 1218|    619|        const size_t n_elems = card_1 - pos1;
 1219|    619|        memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t));
 1220|    619|        pos_out += (int32_t)n_elems;
 1221|  7.61k|    } else if (pos2 < card_2) {
  ------------------
  |  Branch (1221:16): [True: 2.39k, False: 5.22k]
  ------------------
 1222|  2.39k|        const size_t n_elems = card_2 - pos2;
 1223|  2.39k|        memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t));
 1224|  2.39k|        pos_out += (int32_t)n_elems;
 1225|  2.39k|    }
 1226|  8.23k|    return pos_out;
 1227|  8.23k|}
union_vector16:
 1654|  4.79k|                        uint16_t *output) {
 1655|  4.79k|    if ((length1 < 8) || (length2 < 8)) {
  ------------------
  |  Branch (1655:9): [True: 1.05k, False: 3.73k]
  |  Branch (1655:26): [True: 0, False: 3.73k]
  ------------------
 1656|  1.05k|        return (uint32_t)union_uint16(array1, length1, array2, length2, output);
 1657|  1.05k|    }
 1658|  3.73k|    __m128i vA, vB, V, vecMin, vecMax;
 1659|  3.73k|    __m128i laststore;
 1660|  3.73k|    uint16_t *initoutput = output;
 1661|  3.73k|    uint32_t len1 = length1 / 8;
 1662|  3.73k|    uint32_t len2 = length2 / 8;
 1663|  3.73k|    uint32_t pos1 = 0;
 1664|  3.73k|    uint32_t pos2 = 0;
 1665|       |    // we start the machine
 1666|  3.73k|    vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
 1667|  3.73k|    pos1++;
 1668|  3.73k|    vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
 1669|  3.73k|    pos2++;
 1670|  3.73k|    sse_merge(&vA, &vB, &vecMin, &vecMax);
 1671|  3.73k|    laststore = _mm_set1_epi16(-1);
 1672|  3.73k|    output += store_unique(laststore, vecMin, output);
 1673|  3.73k|    laststore = vecMin;
 1674|  3.73k|    if ((pos1 < len1) && (pos2 < len2)) {
  ------------------
  |  Branch (1674:9): [True: 3.20k, False: 532]
  |  Branch (1674:26): [True: 3.20k, False: 0]
  ------------------
 1675|  3.20k|        uint16_t curA, curB;
 1676|  3.20k|        curA = array1[8 * pos1];
 1677|  3.20k|        curB = array2[8 * pos2];
 1678|   110k|        while (true) {
  ------------------
  |  Branch (1678:16): [True: 110k, Folded]
  ------------------
 1679|   110k|            if (curA <= curB) {
  ------------------
  |  Branch (1679:17): [True: 46.7k, False: 63.7k]
  ------------------
 1680|  46.7k|                V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
 1681|  46.7k|                pos1++;
 1682|  46.7k|                if (pos1 < len1) {
  ------------------
  |  Branch (1682:21): [True: 44.3k, False: 2.44k]
  ------------------
 1683|  44.3k|                    curA = array1[8 * pos1];
 1684|  44.3k|                } else {
 1685|  2.44k|                    break;
 1686|  2.44k|                }
 1687|  63.7k|            } else {
 1688|  63.7k|                V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
 1689|  63.7k|                pos2++;
 1690|  63.7k|                if (pos2 < len2) {
  ------------------
  |  Branch (1690:21): [True: 63.0k, False: 755]
  ------------------
 1691|  63.0k|                    curB = array2[8 * pos2];
 1692|  63.0k|                } else {
 1693|    755|                    break;
 1694|    755|                }
 1695|  63.7k|            }
 1696|   107k|            sse_merge(&V, &vecMax, &vecMin, &vecMax);
 1697|   107k|            output += store_unique(laststore, vecMin, output);
 1698|   107k|            laststore = vecMin;
 1699|   107k|        }
 1700|  3.20k|        sse_merge(&V, &vecMax, &vecMin, &vecMax);
 1701|  3.20k|        output += store_unique(laststore, vecMin, output);
 1702|  3.20k|        laststore = vecMin;
 1703|  3.20k|    }
 1704|       |    // we finish the rest off using a scalar algorithm
 1705|       |    // could be improved?
 1706|       |    //
 1707|       |    // copy the small end on a tmp buffer
 1708|  3.73k|    uint32_t len = (uint32_t)(output - initoutput);
 1709|  3.73k|    uint16_t buffer[16];
 1710|  3.73k|    uint32_t leftoversize = store_unique(laststore, vecMax, buffer);
 1711|  3.73k|    if (pos1 == len1) {
  ------------------
  |  Branch (1711:9): [True: 2.98k, False: 755]
  ------------------
 1712|  2.98k|        memcpy(buffer + leftoversize, array1 + 8 * pos1,
 1713|  2.98k|               (length1 - 8 * len1) * sizeof(uint16_t));
 1714|  2.98k|        leftoversize += length1 - 8 * len1;
 1715|  2.98k|        qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
 1716|       |
 1717|  2.98k|        leftoversize = unique(buffer, leftoversize);
 1718|  2.98k|        len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2,
 1719|  2.98k|                                      length2 - 8 * pos2, output);
 1720|  2.98k|    } else {
 1721|    755|        memcpy(buffer + leftoversize, array2 + 8 * pos2,
 1722|    755|               (length2 - 8 * len2) * sizeof(uint16_t));
 1723|    755|        leftoversize += length2 - 8 * len2;
 1724|    755|        qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
 1725|    755|        leftoversize = unique(buffer, leftoversize);
 1726|    755|        len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1,
 1727|    755|                                      length1 - 8 * pos1, output);
 1728|    755|    }
 1729|  3.73k|    return len;
 1730|  4.79k|}
xor_vector16:
 1778|  8.25k|                      uint16_t *output) {
 1779|  8.25k|    if ((length1 < 8) || (length2 < 8)) {
  ------------------
  |  Branch (1779:9): [True: 3.32k, False: 4.92k]
  |  Branch (1779:26): [True: 375, False: 4.55k]
  ------------------
 1780|  3.69k|        return xor_uint16(array1, length1, array2, length2, output);
 1781|  3.69k|    }
 1782|  4.55k|    __m128i vA, vB, V, vecMin, vecMax;
 1783|  4.55k|    __m128i laststore;
 1784|  4.55k|    uint16_t *initoutput = output;
 1785|  4.55k|    uint32_t len1 = length1 / 8;
 1786|  4.55k|    uint32_t len2 = length2 / 8;
 1787|  4.55k|    uint32_t pos1 = 0;
 1788|  4.55k|    uint32_t pos2 = 0;
 1789|       |    // we start the machine
 1790|  4.55k|    vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
 1791|  4.55k|    pos1++;
 1792|  4.55k|    vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
 1793|  4.55k|    pos2++;
 1794|  4.55k|    sse_merge(&vA, &vB, &vecMin, &vecMax);
 1795|  4.55k|    laststore = _mm_set1_epi16(-1);
 1796|  4.55k|    uint16_t buffer[17];
 1797|  4.55k|    output += store_unique_xor(laststore, vecMin, output);
 1798|       |
 1799|  4.55k|    laststore = vecMin;
 1800|  4.55k|    if ((pos1 < len1) && (pos2 < len2)) {
  ------------------
  |  Branch (1800:9): [True: 4.17k, False: 374]
  |  Branch (1800:26): [True: 4.06k, False: 117]
  ------------------
 1801|  4.06k|        uint16_t curA, curB;
 1802|  4.06k|        curA = array1[8 * pos1];
 1803|  4.06k|        curB = array2[8 * pos2];
 1804|   193k|        while (true) {
  ------------------
  |  Branch (1804:16): [True: 193k, Folded]
  ------------------
 1805|   193k|            if (curA <= curB) {
  ------------------
  |  Branch (1805:17): [True: 92.1k, False: 100k]
  ------------------
 1806|  92.1k|                V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
 1807|  92.1k|                pos1++;
 1808|  92.1k|                if (pos1 < len1) {
  ------------------
  |  Branch (1808:21): [True: 89.4k, False: 2.71k]
  ------------------
 1809|  89.4k|                    curA = array1[8 * pos1];
 1810|  89.4k|                } else {
 1811|  2.71k|                    break;
 1812|  2.71k|                }
 1813|   100k|            } else {
 1814|   100k|                V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
 1815|   100k|                pos2++;
 1816|   100k|                if (pos2 < len2) {
  ------------------
  |  Branch (1816:21): [True: 99.5k, False: 1.35k]
  ------------------
 1817|  99.5k|                    curB = array2[8 * pos2];
 1818|  99.5k|                } else {
 1819|  1.35k|                    break;
 1820|  1.35k|                }
 1821|   100k|            }
 1822|   189k|            sse_merge(&V, &vecMax, &vecMin, &vecMax);
 1823|       |            // conditionally stores the last value of laststore as well as all
 1824|       |            // but the
 1825|       |            // last value of vecMin
 1826|   189k|            output += store_unique_xor(laststore, vecMin, output);
 1827|   189k|            laststore = vecMin;
 1828|   189k|        }
 1829|  4.06k|        sse_merge(&V, &vecMax, &vecMin, &vecMax);
 1830|       |        // conditionally stores the last value of laststore as well as all but
 1831|       |        // the
 1832|       |        // last value of vecMin
 1833|  4.06k|        output += store_unique_xor(laststore, vecMin, output);
 1834|  4.06k|        laststore = vecMin;
 1835|  4.06k|    }
 1836|  4.55k|    uint32_t len = (uint32_t)(output - initoutput);
 1837|       |
 1838|       |    // we finish the rest off using a scalar algorithm
 1839|       |    // could be improved?
 1840|       |    // conditionally stores the last value of laststore as well as all but the
 1841|       |    // last value of vecMax,
 1842|       |    // we store to "buffer"
 1843|  4.55k|    int leftoversize = store_unique_xor(laststore, vecMax, buffer);
 1844|  4.55k|    uint16_t vec7 = (uint16_t)_mm_extract_epi16(vecMax, 7);
 1845|  4.55k|    uint16_t vec6 = (uint16_t)_mm_extract_epi16(vecMax, 6);
 1846|  4.55k|    if (vec7 != vec6) buffer[leftoversize++] = vec7;
  ------------------
  |  Branch (1846:9): [True: 4.49k, False: 54]
  ------------------
 1847|  4.55k|    if (pos1 == len1) {
  ------------------
  |  Branch (1847:9): [True: 3.08k, False: 1.46k]
  ------------------
 1848|  3.08k|        memcpy(buffer + leftoversize, array1 + 8 * pos1,
 1849|  3.08k|               (length1 - 8 * len1) * sizeof(uint16_t));
 1850|  3.08k|        leftoversize += length1 - 8 * len1;
 1851|  3.08k|        if (leftoversize == 0) {  // trivial case
  ------------------
  |  Branch (1851:13): [True: 9, False: 3.07k]
  ------------------
 1852|      9|            memcpy(output, array2 + 8 * pos2,
 1853|      9|                   (length2 - 8 * pos2) * sizeof(uint16_t));
 1854|      9|            len += (length2 - 8 * pos2);
 1855|  3.07k|        } else {
 1856|  3.07k|            qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
 1857|  3.07k|            leftoversize = unique_xor(buffer, leftoversize);
 1858|  3.07k|            len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2,
 1859|  3.07k|                              length2 - 8 * pos2, output);
 1860|  3.07k|        }
 1861|  3.08k|    } else {
 1862|  1.46k|        memcpy(buffer + leftoversize, array2 + 8 * pos2,
 1863|  1.46k|               (length2 - 8 * len2) * sizeof(uint16_t));
 1864|  1.46k|        leftoversize += length2 - 8 * len2;
 1865|  1.46k|        if (leftoversize == 0) {  // trivial case
  ------------------
  |  Branch (1865:13): [True: 3, False: 1.46k]
  ------------------
 1866|      3|            memcpy(output, array1 + 8 * pos1,
 1867|      3|                   (length1 - 8 * pos1) * sizeof(uint16_t));
 1868|      3|            len += (length1 - 8 * pos1);
 1869|  1.46k|        } else {
 1870|  1.46k|            qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
 1871|  1.46k|            leftoversize = unique_xor(buffer, leftoversize);
 1872|  1.46k|            len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1,
 1873|  1.46k|                              length1 - 8 * pos1, output);
 1874|  1.46k|        }
 1875|  1.46k|    }
 1876|  4.55k|    return len;
 1877|  8.25k|}
fast_union_uint16:
 1980|  4.79k|                         uint16_t *buffer) {
 1981|  4.79k|#if CROARING_IS_X64
 1982|  4.79k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1982:9): [True: 4.79k, False: 0]
  ------------------
 1983|       |        // compute union with smallest array first
 1984|  4.79k|        if (size_1 < size_2) {
  ------------------
  |  Branch (1984:13): [True: 2.47k, False: 2.31k]
  ------------------
 1985|  2.47k|            return union_vector16(set_1, (uint32_t)size_1, set_2,
 1986|  2.47k|                                  (uint32_t)size_2, buffer);
 1987|  2.47k|        } else {
 1988|  2.31k|            return union_vector16(set_2, (uint32_t)size_2, set_1,
 1989|  2.31k|                                  (uint32_t)size_1, buffer);
 1990|  2.31k|        }
 1991|  4.79k|    } else {
 1992|       |        // compute union with smallest array first
 1993|      0|        if (size_1 < size_2) {
  ------------------
  |  Branch (1993:13): [True: 0, False: 0]
  ------------------
 1994|      0|            return union_uint16(set_1, size_1, set_2, size_2, buffer);
 1995|      0|        } else {
 1996|      0|            return union_uint16(set_2, size_2, set_1, size_1, buffer);
 1997|      0|        }
 1998|      0|    }
 1999|       |#else
 2000|       |    // compute union with smallest array first
 2001|       |    if (size_1 < size_2) {
 2002|       |        return union_uint16(set_1, size_1, set_2, size_2, buffer);
 2003|       |    } else {
 2004|       |        return union_uint16(set_2, size_2, set_1, size_1, buffer);
 2005|       |    }
 2006|       |#endif
 2007|  4.79k|}
memequals:
 2111|  7.50k|bool memequals(const void *s1, const void *s2, size_t n) {
 2112|  7.50k|    if (n == 0) {
  ------------------
  |  Branch (2112:9): [True: 0, False: 7.50k]
  ------------------
 2113|      0|        return true;
 2114|      0|    }
 2115|  7.50k|#if CROARING_IS_X64
 2116|  7.50k|    int support = croaring_hardware_support();
 2117|  7.50k|#if CROARING_COMPILER_SUPPORTS_AVX512
 2118|  7.50k|    if (support & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (2118:9): [True: 0, False: 7.50k]
  ------------------
 2119|      0|        return _avx512_memequals(s1, s2, n);
 2120|      0|    } else
 2121|  7.50k|#endif  // CROARING_COMPILER_SUPPORTS_AVX512
 2122|  7.50k|        if (support & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (2122:13): [True: 7.50k, False: 0]
  ------------------
 2123|  7.50k|            return _avx2_memequals(s1, s2, n);
 2124|  7.50k|        } else {
 2125|      0|            return memcmp(s1, s2, n) == 0;
 2126|      0|        }
 2127|       |#else
 2128|       |    return memcmp(s1, s2, n) == 0;
 2129|       |#endif
 2130|  7.50k|}
array_util.c:binarySearch4:
  804|  1.37k|                          int32_t *index4) {
  805|  1.37k|    const uint16_t *base1 = array;
  806|  1.37k|    const uint16_t *base2 = array;
  807|  1.37k|    const uint16_t *base3 = array;
  808|  1.37k|    const uint16_t *base4 = array;
  809|  1.37k|    if (n == 0) return;
  ------------------
  |  Branch (809:9): [True: 0, False: 1.37k]
  ------------------
  810|  17.2k|    while (n > 1) {
  ------------------
  |  Branch (810:12): [True: 15.8k, False: 1.37k]
  ------------------
  811|  15.8k|        int32_t half = n >> 1;
  812|  15.8k|        base1 = (base1[half] < target1) ? &base1[half] : base1;
  ------------------
  |  Branch (812:17): [True: 1.27k, False: 14.5k]
  ------------------
  813|  15.8k|        base2 = (base2[half] < target2) ? &base2[half] : base2;
  ------------------
  |  Branch (813:17): [True: 2.27k, False: 13.5k]
  ------------------
  814|  15.8k|        base3 = (base3[half] < target3) ? &base3[half] : base3;
  ------------------
  |  Branch (814:17): [True: 2.92k, False: 12.9k]
  ------------------
  815|  15.8k|        base4 = (base4[half] < target4) ? &base4[half] : base4;
  ------------------
  |  Branch (815:17): [True: 3.44k, False: 12.4k]
  ------------------
  816|  15.8k|        n -= half;
  817|  15.8k|    }
  818|  1.37k|    *index1 = (int32_t)((*base1 < target1) + base1 - array);
  819|  1.37k|    *index2 = (int32_t)((*base2 < target2) + base2 - array);
  820|  1.37k|    *index3 = (int32_t)((*base3 < target3) + base3 - array);
  821|  1.37k|    *index4 = (int32_t)((*base4 < target4) + base4 - array);
  822|  1.37k|}
array_util.c:binarySearch2:
  833|    408|                          uint16_t target2, int32_t *index1, int32_t *index2) {
  834|    408|    const uint16_t *base1 = array;
  835|    408|    const uint16_t *base2 = array;
  836|    408|    if (n == 0) return;
  ------------------
  |  Branch (836:9): [True: 0, False: 408]
  ------------------
  837|  3.90k|    while (n > 1) {
  ------------------
  |  Branch (837:12): [True: 3.49k, False: 408]
  ------------------
  838|  3.49k|        int32_t half = n >> 1;
  839|  3.49k|        base1 = (base1[half] < target1) ? &base1[half] : base1;
  ------------------
  |  Branch (839:17): [True: 855, False: 2.64k]
  ------------------
  840|  3.49k|        base2 = (base2[half] < target2) ? &base2[half] : base2;
  ------------------
  |  Branch (840:17): [True: 1.44k, False: 2.04k]
  ------------------
  841|  3.49k|        n -= half;
  842|  3.49k|    }
  843|    408|    *index1 = (int32_t)((*base1 < target1) + base1 - array);
  844|    408|    *index2 = (int32_t)((*base2 < target2) + base2 - array);
  845|    408|}
array_util.c:sse_merge:
 1244|   311k|                             __m128i *vecMin, __m128i *vecMax) {  // output
 1245|   311k|    __m128i vecTmp;
 1246|   311k|    vecTmp = _mm_min_epu16(*vInput1, *vInput2);
 1247|   311k|    *vecMax = _mm_max_epu16(*vInput1, *vInput2);
 1248|   311k|    vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2);
 1249|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1250|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1251|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1252|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1253|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1254|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1255|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1256|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1257|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1258|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1259|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1260|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1261|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1262|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1263|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1264|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1265|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1266|   311k|    vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1267|   311k|    *vecMin = _mm_min_epu16(vecTmp, *vecMax);
 1268|   311k|    *vecMax = _mm_max_epu16(vecTmp, *vecMax);
 1269|       |    *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 1270|   311k|}
array_util.c:store_unique:
 1619|   118k|static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
 1620|   118k|    __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2);
 1621|       |    // lots of high latency instructions follow (optimize?)
 1622|   118k|    int M = _mm_movemask_epi8(
 1623|   118k|        _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128()));
 1624|   118k|    int numberofnewvalues = 8 - _mm_popcnt_u32(M);
 1625|   118k|    __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
 1626|   118k|    __m128i val = _mm_shuffle_epi8(newval, key);
 1627|   118k|    _mm_storeu_si128((__m128i *)output, val);
 1628|   118k|    return numberofnewvalues;
 1629|   118k|}
array_util.c:uint16_compare:
 1645|   133k|static int uint16_compare(const void *a, const void *b) {
 1646|   133k|    return (*(uint16_t *)a - *(uint16_t *)b);
 1647|   133k|}
array_util.c:unique:
 1634|  3.73k|static inline uint32_t unique(uint16_t *out, uint32_t len) {
 1635|  3.73k|    uint32_t pos = 1;
 1636|  38.6k|    for (uint32_t i = 1; i < len; ++i) {
  ------------------
  |  Branch (1636:26): [True: 34.9k, False: 3.73k]
  ------------------
 1637|  34.9k|        if (out[i] != out[i - 1]) {
  ------------------
  |  Branch (1637:13): [True: 34.7k, False: 108]
  ------------------
 1638|  34.7k|            out[pos++] = out[i];
 1639|  34.7k|        }
 1640|  34.9k|    }
 1641|  3.73k|    return pos;
 1642|  3.73k|}
array_util.c:store_unique_xor:
 1746|   202k|                                   uint16_t *output) {
 1747|   202k|    __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4);
 1748|       |    __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2);
 1749|   202k|    __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1);
 1750|   202k|    __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval);
 1751|   202k|    __m128i equalleftoright = _mm_or_si128(equalleft, equalright);
 1752|   202k|    int M = _mm_movemask_epi8(
 1753|   202k|        _mm_packs_epi16(equalleftoright, _mm_setzero_si128()));
 1754|   202k|    int numberofnewvalues = 8 - _mm_popcnt_u32(M);
 1755|   202k|    __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
 1756|   202k|    __m128i val = _mm_shuffle_epi8(vecTmp2, key);
 1757|   202k|    _mm_storeu_si128((__m128i *)output, val);
 1758|   202k|    return numberofnewvalues;
 1759|   202k|}
array_util.c:unique_xor:
 1764|  4.54k|static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
 1765|  4.54k|    uint32_t pos = 1;
 1766|  45.2k|    for (uint32_t i = 1; i < len; ++i) {
  ------------------
  |  Branch (1766:26): [True: 40.7k, False: 4.54k]
  ------------------
 1767|  40.7k|        if (out[i] != out[i - 1]) {
  ------------------
  |  Branch (1767:13): [True: 40.4k, False: 231]
  ------------------
 1768|  40.4k|            out[pos++] = out[i];
 1769|  40.4k|        } else
 1770|    231|            pos--;  // if it is identical to previous, delete it
 1771|  40.7k|    }
 1772|  4.54k|    return pos;
 1773|  4.54k|}
array_util.c:_avx2_memequals:
 2069|  7.50k|static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
 2070|  7.50k|    const uint8_t *ptr1 = (const uint8_t *)s1;
 2071|  7.50k|    const uint8_t *ptr2 = (const uint8_t *)s2;
 2072|  7.50k|    const uint8_t *end1 = ptr1 + n;
 2073|  7.50k|    const uint8_t *end8 = ptr1 + n / 8 * 8;
 2074|  7.50k|    const uint8_t *end32 = ptr1 + n / 32 * 32;
 2075|       |
 2076|   137k|    while (ptr1 < end32) {
  ------------------
  |  Branch (2076:12): [True: 130k, False: 7.44k]
  ------------------
 2077|   130k|        __m256i r1 = _mm256_loadu_si256((const __m256i *)ptr1);
 2078|   130k|        __m256i r2 = _mm256_loadu_si256((const __m256i *)ptr2);
 2079|   130k|        int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
 2080|   130k|        if ((uint32_t)mask != UINT32_MAX) {
  ------------------
  |  Branch (2080:13): [True: 60, False: 130k]
  ------------------
 2081|     60|            return false;
 2082|     60|        }
 2083|   130k|        ptr1 += 32;
 2084|   130k|        ptr2 += 32;
 2085|   130k|    }
 2086|       |
 2087|  13.3k|    while (ptr1 < end8) {
  ------------------
  |  Branch (2087:12): [True: 6.03k, False: 7.35k]
  ------------------
 2088|  6.03k|        uint64_t v1, v2;
 2089|  6.03k|        memcpy(&v1, ptr1, sizeof(uint64_t));
 2090|  6.03k|        memcpy(&v2, ptr2, sizeof(uint64_t));
 2091|  6.03k|        if (v1 != v2) {
  ------------------
  |  Branch (2091:13): [True: 89, False: 5.94k]
  ------------------
 2092|     89|            return false;
 2093|     89|        }
 2094|  5.94k|        ptr1 += 8;
 2095|  5.94k|        ptr2 += 8;
 2096|  5.94k|    }
 2097|       |
 2098|  26.5k|    while (ptr1 < end1) {
  ------------------
  |  Branch (2098:12): [True: 19.2k, False: 7.31k]
  ------------------
 2099|  19.2k|        if (*ptr1 != *ptr2) {
  ------------------
  |  Branch (2099:13): [True: 33, False: 19.1k]
  ------------------
 2100|     33|            return false;
 2101|     33|        }
 2102|  19.1k|        ptr1++;
 2103|  19.1k|        ptr2++;
 2104|  19.1k|    }
 2105|       |
 2106|  7.31k|    return true;
 2107|  7.35k|}

bitset_extract_setbits_avx2:
  685|  1.00k|                                   uint32_t base) {
  686|  1.00k|    uint32_t *initout = out;
  687|  1.00k|    __m256i baseVec = _mm256_set1_epi32(base - 1);
  688|  1.00k|    __m256i incVec = _mm256_set1_epi32(64);
  689|  1.00k|    __m256i add8 = _mm256_set1_epi32(8);
  690|  1.00k|    uint32_t *safeout = out + outcapacity;
  691|  1.00k|    size_t i = 0;
  692|   838k|    for (; (i < length) && (out + 64 <= safeout); ++i) {
  ------------------
  |  Branch (692:12): [True: 837k, False: 286]
  |  Branch (692:28): [True: 837k, False: 714]
  ------------------
  693|   837k|        uint64_t w = words[i];
  694|   837k|        if (w == 0) {
  ------------------
  |  Branch (694:13): [True: 117k, False: 719k]
  ------------------
  695|   117k|            baseVec = _mm256_add_epi32(baseVec, incVec);
  696|   719k|        } else {
  697|  3.59M|            for (int k = 0; k < 4; ++k) {
  ------------------
  |  Branch (697:29): [True: 2.87M, False: 719k]
  ------------------
  698|  2.87M|                uint8_t byteA = (uint8_t)w;
  699|  2.87M|                uint8_t byteB = (uint8_t)(w >> 8);
  700|  2.87M|                w >>= 16;
  701|  2.87M|                __m256i vecA =
  702|  2.87M|                    _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]);
  703|  2.87M|                __m256i vecB =
  704|  2.87M|                    _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]);
  705|  2.87M|                uint8_t advanceA = lengthTable[byteA];
  706|  2.87M|                uint8_t advanceB = lengthTable[byteB];
  707|  2.87M|                vecA = _mm256_add_epi32(baseVec, vecA);
  708|  2.87M|                baseVec = _mm256_add_epi32(baseVec, add8);
  709|  2.87M|                vecB = _mm256_add_epi32(baseVec, vecB);
  710|  2.87M|                baseVec = _mm256_add_epi32(baseVec, add8);
  711|  2.87M|                _mm256_storeu_si256((__m256i *)out, vecA);
  712|  2.87M|                out += advanceA;
  713|  2.87M|                _mm256_storeu_si256((__m256i *)out, vecB);
  714|  2.87M|                out += advanceB;
  715|  2.87M|            }
  716|   719k|        }
  717|   837k|    }
  718|  1.00k|    base += i * 64;
  719|  15.4k|    for (; (i < length) && (out < safeout); ++i) {
  ------------------
  |  Branch (719:12): [True: 15.0k, False: 390]
  |  Branch (719:28): [True: 14.4k, False: 610]
  ------------------
  720|  14.4k|        uint64_t w = words[i];
  721|  33.8k|        while ((w != 0) && (out < safeout)) {
  ------------------
  |  Branch (721:16): [True: 19.4k, False: 14.4k]
  |  Branch (721:28): [True: 19.4k, False: 0]
  ------------------
  722|  19.4k|            int r =
  723|  19.4k|                roaring_trailing_zeroes(w);  // on x64, should compile to TZCNT
  724|  19.4k|            uint32_t val = r + base;
  725|  19.4k|            memcpy(out, &val,
  726|  19.4k|                   sizeof(uint32_t));  // should be compiled as a MOV on x64
  727|  19.4k|            out++;
  728|  19.4k|            w &= (w - 1);
  729|  19.4k|        }
  730|  14.4k|        base += 64;
  731|  14.4k|    }
  732|  1.00k|    return out - initout;
  733|  1.00k|}
bitset_extract_setbits:
  738|    508|                              uint32_t *out, uint32_t base) {
  739|    508|    int outpos = 0;
  740|   520k|    for (size_t i = 0; i < length; ++i) {
  ------------------
  |  Branch (740:24): [True: 520k, False: 508]
  ------------------
  741|   520k|        uint64_t w = words[i];
  742|  3.01M|        while (w != 0) {
  ------------------
  |  Branch (742:16): [True: 2.49M, False: 520k]
  ------------------
  743|  2.49M|            int r =
  744|  2.49M|                roaring_trailing_zeroes(w);  // on x64, should compile to TZCNT
  745|  2.49M|            uint32_t val = r + base;
  746|  2.49M|            memcpy(out + outpos, &val,
  747|  2.49M|                   sizeof(uint32_t));  // should be compiled as a MOV on x64
  748|  2.49M|            outpos++;
  749|  2.49M|            w &= (w - 1);
  750|  2.49M|        }
  751|   520k|        base += 64;
  752|   520k|    }
  753|    508|    return outpos;
  754|    508|}
bitset_extract_setbits_uint16:
  848|  2.88k|                                     uint16_t *out, uint16_t base) {
  849|  2.88k|    int outpos = 0;
  850|  2.95M|    for (size_t i = 0; i < length; ++i) {
  ------------------
  |  Branch (850:24): [True: 2.94M, False: 2.88k]
  ------------------
  851|  2.94M|        uint64_t w = words[i];
  852|  5.66M|        while (w != 0) {
  ------------------
  |  Branch (852:16): [True: 2.71M, False: 2.94M]
  ------------------
  853|  2.71M|            int r = roaring_trailing_zeroes(w);
  854|  2.71M|            out[outpos++] = (uint16_t)(r + base);
  855|  2.71M|            w &= (w - 1);
  856|  2.71M|        }
  857|  2.94M|        base += 64;
  858|  2.94M|    }
  859|  2.88k|    return outpos;
  860|  2.88k|}
bitset_clear_list:
 1028|  1.67k|                           uint64_t length) {
 1029|  1.67k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1029:9): [True: 1.67k, False: 0]
  ------------------
 1030|  1.67k|        return _asm_bitset_clear_list(words, card, list, length);
 1031|  1.67k|    } else {
 1032|      0|        return _scalar_bitset_clear_list(words, card, list, length);
 1033|      0|    }
 1034|  1.67k|}
bitset_set_list_withcard:
 1037|  1.45k|                                  const uint16_t *list, uint64_t length) {
 1038|  1.45k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1038:9): [True: 1.45k, False: 0]
  ------------------
 1039|  1.45k|        return _asm_bitset_set_list_withcard(words, card, list, length);
 1040|  1.45k|    } else {
 1041|      0|        return _scalar_bitset_set_list_withcard(words, card, list, length);
 1042|      0|    }
 1043|  1.45k|}
bitset_set_list:
 1045|    196|void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
 1046|    196|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1046:9): [True: 196, False: 0]
  ------------------
 1047|    196|        _asm_bitset_set_list(words, list, length);
 1048|    196|    } else {
 1049|      0|        _scalar_bitset_set_list(words, list, length);
 1050|      0|    }
 1051|    196|}
bitset_flip_list_withcard:
 1107|  1.46k|                                   const uint16_t *list, uint64_t length) {
 1108|  1.46k|    uint64_t offset, load, newload, pos, index;
 1109|  1.46k|    const uint16_t *end = list + length;
 1110|  2.52M|    while (list != end) {
  ------------------
  |  Branch (1110:12): [True: 2.52M, False: 1.46k]
  ------------------
 1111|  2.52M|        pos = *list;
 1112|  2.52M|        offset = pos >> 6;
 1113|  2.52M|        index = pos % 64;
 1114|  2.52M|        load = words[offset];
 1115|  2.52M|        newload = load ^ (UINT64_C(1) << index);
 1116|       |        // todo: is a branch here all that bad?
 1117|  2.52M|        card +=
 1118|       |            (1 - 2 * (((UINT64_C(1) << index) & load) >> index));  // +1 or -1
 1119|  2.52M|        words[offset] = newload;
 1120|  2.52M|        list++;
 1121|  2.52M|    }
 1122|  1.46k|    return card;
 1123|  1.46k|}
bitset_util.c:_asm_bitset_clear_list:
  949|  1.67k|                                              uint64_t length) {
  950|  1.67k|    uint64_t offset, load, pos;
  951|  1.67k|    uint64_t shift = 6;
  952|  1.67k|    const uint16_t *end = list + length;
  953|  1.67k|    if (!length) return card;
  ------------------
  |  Branch (953:9): [True: 0, False: 1.67k]
  ------------------
  954|       |    // btr is not available as an intrinsic in GCC
  955|  1.67k|    __asm volatile(
  956|  1.67k|        "1:\n"
  957|  1.67k|        "movzwq (%[list]), %[pos]\n"
  958|  1.67k|        "shrx %[shift], %[pos], %[offset]\n"
  959|  1.67k|        "mov (%[words],%[offset],8), %[load]\n"
  960|  1.67k|        "btr %[pos], %[load]\n"
  961|  1.67k|        "mov %[load], (%[words],%[offset],8)\n"
  962|  1.67k|        "sbb $0, %[card]\n"
  963|  1.67k|        "add $2, %[list]\n"
  964|  1.67k|        "cmp %[list], %[end]\n"
  965|  1.67k|        "jnz 1b"
  966|  1.67k|        : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
  967|  1.67k|          [pos] "=&r"(pos), [offset] "=&r"(offset)
  968|  1.67k|        : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)
  969|  1.67k|        :
  970|  1.67k|        /* clobbers */ "memory");
  971|  1.67k|    return card;
  972|  1.67k|}
bitset_util.c:_asm_bitset_set_list_withcard:
  867|  1.45k|                                                     uint64_t length) {
  868|  1.45k|    uint64_t offset, load, pos;
  869|  1.45k|    uint64_t shift = 6;
  870|  1.45k|    const uint16_t *end = list + length;
  871|  1.45k|    if (!length) return card;
  ------------------
  |  Branch (871:9): [True: 0, False: 1.45k]
  ------------------
  872|       |    // TODO: could unroll for performance, see bitset_set_list
  873|       |    // bts is not available as an intrinsic in GCC
  874|  1.45k|    __asm volatile(
  875|  1.45k|        "1:\n"
  876|  1.45k|        "movzwq (%[list]), %[pos]\n"
  877|  1.45k|        "shrx %[shift], %[pos], %[offset]\n"
  878|  1.45k|        "mov (%[words],%[offset],8), %[load]\n"
  879|  1.45k|        "bts %[pos], %[load]\n"
  880|  1.45k|        "mov %[load], (%[words],%[offset],8)\n"
  881|  1.45k|        "sbb $-1, %[card]\n"
  882|  1.45k|        "add $2, %[list]\n"
  883|  1.45k|        "cmp %[list], %[end]\n"
  884|  1.45k|        "jnz 1b"
  885|  1.45k|        : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
  886|  1.45k|          [pos] "=&r"(pos), [offset] "=&r"(offset)
  887|  1.45k|        : [end] "r"(end), [words] "r"(words), [shift] "r"(shift));
  888|  1.45k|    return card;
  889|  1.45k|}
bitset_util.c:_asm_bitset_set_list:
  892|    196|                                        uint64_t length) {
  893|    196|    uint64_t pos;
  894|    196|    const uint16_t *end = list + length;
  895|       |
  896|    196|    uint64_t shift = 6;
  897|    196|    uint64_t offset;
  898|    196|    uint64_t load;
  899|  4.86k|    for (; list + 3 < end; list += 4) {
  ------------------
  |  Branch (899:12): [True: 4.67k, False: 196]
  ------------------
  900|  4.67k|        pos = list[0];
  901|  4.67k|        __asm volatile(
  902|  4.67k|            "shrx %[shift], %[pos], %[offset]\n"
  903|  4.67k|            "mov (%[words],%[offset],8), %[load]\n"
  904|  4.67k|            "bts %[pos], %[load]\n"
  905|  4.67k|            "mov %[load], (%[words],%[offset],8)"
  906|  4.67k|            : [load] "=&r"(load), [offset] "=&r"(offset)
  907|  4.67k|            : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
  908|  4.67k|        pos = list[1];
  909|  4.67k|        __asm volatile(
  910|  4.67k|            "shrx %[shift], %[pos], %[offset]\n"
  911|  4.67k|            "mov (%[words],%[offset],8), %[load]\n"
  912|  4.67k|            "bts %[pos], %[load]\n"
  913|  4.67k|            "mov %[load], (%[words],%[offset],8)"
  914|  4.67k|            : [load] "=&r"(load), [offset] "=&r"(offset)
  915|  4.67k|            : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
  916|  4.67k|        pos = list[2];
  917|  4.67k|        __asm volatile(
  918|  4.67k|            "shrx %[shift], %[pos], %[offset]\n"
  919|  4.67k|            "mov (%[words],%[offset],8), %[load]\n"
  920|  4.67k|            "bts %[pos], %[load]\n"
  921|  4.67k|            "mov %[load], (%[words],%[offset],8)"
  922|  4.67k|            : [load] "=&r"(load), [offset] "=&r"(offset)
  923|  4.67k|            : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
  924|  4.67k|        pos = list[3];
  925|  4.67k|        __asm volatile(
  926|  4.67k|            "shrx %[shift], %[pos], %[offset]\n"
  927|  4.67k|            "mov (%[words],%[offset],8), %[load]\n"
  928|  4.67k|            "bts %[pos], %[load]\n"
  929|  4.67k|            "mov %[load], (%[words],%[offset],8)"
  930|  4.67k|            : [load] "=&r"(load), [offset] "=&r"(offset)
  931|  4.67k|            : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
  932|  4.67k|    }
  933|       |
  934|    525|    while (list != end) {
  ------------------
  |  Branch (934:12): [True: 329, False: 196]
  ------------------
  935|    329|        pos = list[0];
  936|    329|        __asm volatile(
  937|    329|            "shrx %[shift], %[pos], %[offset]\n"
  938|    329|            "mov (%[words],%[offset],8), %[load]\n"
  939|    329|            "bts %[pos], %[load]\n"
  940|    329|            "mov %[load], (%[words],%[offset],8)"
  941|    329|            : [load] "=&r"(load), [offset] "=&r"(offset)
  942|    329|            : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos));
  943|    329|        list++;
  944|    329|    }
  945|    196|}

array_container_create_given_capacity:
   49|   149k|array_container_t *array_container_create_given_capacity(int32_t size) {
   50|   149k|    array_container_t *container;
   51|       |
   52|   149k|    if ((container = (array_container_t *)roaring_malloc(
  ------------------
  |  Branch (52:9): [True: 0, False: 149k]
  ------------------
   53|   149k|             sizeof(array_container_t))) == NULL) {
   54|      0|        return NULL;
   55|      0|    }
   56|       |
   57|   149k|    if (size <= 0) {  // we don't want to rely on malloc(0)
  ------------------
  |  Branch (57:9): [True: 91.5k, False: 57.8k]
  ------------------
   58|  91.5k|        container->array = NULL;
   59|  91.5k|    } else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) *
  ------------------
  |  Branch (59:16): [True: 0, False: 57.8k]
  ------------------
   60|  57.8k|                                                              size)) == NULL) {
   61|      0|        roaring_free(container);
   62|      0|        return NULL;
   63|      0|    }
   64|       |
   65|   149k|    container->capacity = size;
   66|   149k|    container->cardinality = 0;
   67|       |
   68|   149k|    return container;
   69|   149k|}
array_container_create:
   72|  26.5k|array_container_t *array_container_create(void) {
   73|  26.5k|    return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
   74|  26.5k|}
array_container_create_range:
   77|    149|array_container_t *array_container_create_range(uint32_t min, uint32_t max) {
   78|    149|    array_container_t *answer =
   79|    149|        array_container_create_given_capacity(max - min + 1);
   80|    149|    if (answer == NULL) return answer;
  ------------------
  |  Branch (80:9): [True: 0, False: 149]
  ------------------
   81|    149|    answer->cardinality = 0;
   82|    298|    for (uint32_t k = min; k < max; k++) {
  ------------------
  |  Branch (82:28): [True: 149, False: 149]
  ------------------
   83|    149|        answer->array[answer->cardinality++] = k;
   84|    149|    }
   85|    149|    return answer;
   86|    149|}
array_container_clone:
   90|  23.4k|array_container_t *array_container_clone(const array_container_t *src) {
   91|  23.4k|    array_container_t *newcontainer =
   92|  23.4k|        array_container_create_given_capacity(src->capacity);
   93|  23.4k|    if (newcontainer == NULL) return NULL;
  ------------------
  |  Branch (93:9): [True: 0, False: 23.4k]
  ------------------
   94|       |
   95|  23.4k|    newcontainer->cardinality = src->cardinality;
   96|       |
   97|  23.4k|    memcpy(newcontainer->array, src->array,
   98|  23.4k|           src->cardinality * sizeof(uint16_t));
   99|       |
  100|  23.4k|    return newcontainer;
  101|  23.4k|}
array_container_shrink_to_fit:
  131|  3.87k|int array_container_shrink_to_fit(array_container_t *src) {
  132|  3.87k|    if (src->cardinality == src->capacity) return 0;  // nothing to do
  ------------------
  |  Branch (132:9): [True: 511, False: 3.36k]
  ------------------
  133|  3.36k|    int savings = src->capacity - src->cardinality;
  134|  3.36k|    src->capacity = src->cardinality;
  135|  3.36k|    if (src->capacity ==
  ------------------
  |  Branch (135:9): [True: 0, False: 3.36k]
  ------------------
  136|  3.36k|        0) {  // we do not want to rely on realloc for zero allocs
  137|      0|        roaring_free(src->array);
  138|      0|        src->array = NULL;
  139|  3.36k|    } else {
  140|  3.36k|        uint16_t *oldarray = src->array;
  141|  3.36k|        src->array = (uint16_t *)roaring_realloc(
  142|  3.36k|            oldarray, src->capacity * sizeof(uint16_t));
  143|  3.36k|        if (src->array == NULL) roaring_free(oldarray);  // should never happen?
  ------------------
  |  Branch (143:13): [True: 0, False: 3.36k]
  ------------------
  144|  3.36k|    }
  145|  3.36k|    return savings;
  146|  3.87k|}
array_container_free:
  149|   149k|void array_container_free(array_container_t *arr) {
  150|   149k|    if (arr == NULL) return;
  ------------------
  |  Branch (150:9): [True: 0, False: 149k]
  ------------------
  151|   149k|    roaring_free(arr->array);
  152|   149k|    roaring_free(arr);
  153|   149k|}
array_container_grow:
  167|   121k|                          bool preserve) {
  168|   121k|    int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
  ------------------
  |  Branch (168:20): [True: 121k, False: 0]
  ------------------
  169|   121k|    int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
  170|       |
  171|   121k|    container->capacity = new_capacity;
  172|   121k|    uint16_t *array = container->array;
  173|       |
  174|   121k|    if (preserve) {
  ------------------
  |  Branch (174:9): [True: 111k, False: 10.1k]
  ------------------
  175|   111k|        container->array =
  176|   111k|            (uint16_t *)roaring_realloc(array, new_capacity * sizeof(uint16_t));
  177|   111k|        if (container->array == NULL) roaring_free(array);
  ------------------
  |  Branch (177:13): [True: 0, False: 111k]
  ------------------
  178|   111k|    } else {
  179|  10.1k|        roaring_free(array);
  180|  10.1k|        container->array =
  181|  10.1k|            (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
  182|  10.1k|    }
  183|       |
  184|       |    // if realloc fails, we have container->array == NULL.
  185|   121k|}
array_container_copy:
  189|     30|                          array_container_t *dst) {
  190|     30|    const int32_t cardinality = src->cardinality;
  191|     30|    if (cardinality > dst->capacity) {
  ------------------
  |  Branch (191:9): [True: 30, False: 0]
  ------------------
  192|     30|        array_container_grow(dst, cardinality, false);
  193|     30|    }
  194|       |
  195|     30|    dst->cardinality = cardinality;
  196|     30|    memcpy(dst->array, src->array, cardinality * sizeof(uint16_t));
  197|     30|}
array_container_union:
  211|  4.79k|                           array_container_t *out) {
  212|  4.79k|    const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
  213|  4.79k|    const int32_t max_cardinality = card_1 + card_2;
  214|       |
  215|  4.79k|    if (out->capacity < max_cardinality) {
  ------------------
  |  Branch (215:9): [True: 0, False: 4.79k]
  ------------------
  216|       |        array_container_grow(out, max_cardinality, false);
  217|      0|    }
  218|  4.79k|    out->cardinality = (int32_t)fast_union_uint16(
  219|  4.79k|        array_1->array, card_1, array_2->array, card_2, out->array);
  220|  4.79k|}
array_container_andnot:
  228|  5.97k|                            array_container_t *out) {
  229|  5.97k|    if (out->capacity < array_1->cardinality)
  ------------------
  |  Branch (229:9): [True: 2.40k, False: 3.57k]
  ------------------
  230|  2.40k|        array_container_grow(out, array_1->cardinality, false);
  231|  5.97k|#if CROARING_IS_X64
  232|  5.97k|    if ((croaring_hardware_support() & ROARING_SUPPORTS_AVX2) &&
  ------------------
  |  Branch (232:9): [True: 5.97k, False: 0]
  ------------------
  233|  5.97k|        (out != array_1) && (out != array_2)) {
  ------------------
  |  Branch (233:9): [True: 2.40k, False: 3.57k]
  |  Branch (233:29): [True: 2.40k, False: 0]
  ------------------
  234|  2.40k|        out->cardinality = difference_vector16(
  235|  2.40k|            array_1->array, array_1->cardinality, array_2->array,
  236|  2.40k|            array_2->cardinality, out->array);
  237|  3.57k|    } else {
  238|  3.57k|        out->cardinality =
  239|  3.57k|            difference_uint16(array_1->array, array_1->cardinality,
  240|  3.57k|                              array_2->array, array_2->cardinality, out->array);
  241|  3.57k|    }
  242|       |#else
  243|       |    out->cardinality =
  244|       |        difference_uint16(array_1->array, array_1->cardinality, array_2->array,
  245|       |                          array_2->cardinality, out->array);
  246|       |#endif
  247|  5.97k|}
array_container_xor:
  256|  8.25k|                         array_container_t *out) {
  257|  8.25k|    const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
  258|  8.25k|    const int32_t max_cardinality = card_1 + card_2;
  259|  8.25k|    if (out->capacity < max_cardinality) {
  ------------------
  |  Branch (259:9): [True: 0, False: 8.25k]
  ------------------
  260|      0|        array_container_grow(out, max_cardinality, false);
  261|      0|    }
  262|       |
  263|  8.25k|#if CROARING_IS_X64
  264|  8.25k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (264:9): [True: 8.25k, False: 0]
  ------------------
  265|  8.25k|        out->cardinality =
  266|  8.25k|            xor_vector16(array_1->array, array_1->cardinality, array_2->array,
  267|  8.25k|                         array_2->cardinality, out->array);
  268|  8.25k|    } else {
  269|      0|        out->cardinality =
  270|      0|            xor_uint16(array_1->array, array_1->cardinality, array_2->array,
  271|      0|                       array_2->cardinality, out->array);
  272|      0|    }
  273|       |#else
  274|       |    out->cardinality =
  275|       |        xor_uint16(array_1->array, array_1->cardinality, array_2->array,
  276|       |                   array_2->cardinality, out->array);
  277|       |#endif
  278|  8.25k|}
array_container_intersection:
  290|  2.40k|                                  array_container_t *out) {
  291|  2.40k|    int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
  292|  2.40k|            min_card = minimum_int32(card_1, card_2);
  293|  2.40k|    const int threshold = 64;  // subject to tuning
  294|  2.40k|#if CROARING_IS_X64
  295|  2.40k|    if (out->capacity < min_card) {
  ------------------
  |  Branch (295:9): [True: 2.40k, False: 0]
  ------------------
  296|  2.40k|        array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
  297|  2.40k|                             false);
  298|  2.40k|    }
  299|       |#else
  300|       |    if (out->capacity < min_card) {
  301|       |        array_container_grow(out, min_card, false);
  302|       |    }
  303|       |#endif
  304|       |
  305|  2.40k|    if (card_1 * threshold < card_2) {
  ------------------
  |  Branch (305:9): [True: 356, False: 2.04k]
  ------------------
  306|    356|        out->cardinality = intersect_skewed_uint16(
  307|    356|            array1->array, card_1, array2->array, card_2, out->array);
  308|  2.04k|    } else if (card_2 * threshold < card_1) {
  ------------------
  |  Branch (308:16): [True: 215, False: 1.82k]
  ------------------
  309|    215|        out->cardinality = intersect_skewed_uint16(
  310|    215|            array2->array, card_2, array1->array, card_1, out->array);
  311|  1.82k|    } else {
  312|  1.82k|#if CROARING_IS_X64
  313|  1.82k|        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (313:13): [True: 1.82k, False: 0]
  ------------------
  314|  1.82k|            out->cardinality = intersect_vector16(
  315|  1.82k|                array1->array, card_1, array2->array, card_2, out->array);
  316|  1.82k|        } else {
  317|      0|            out->cardinality = intersect_uint16(
  318|      0|                array1->array, card_1, array2->array, card_2, out->array);
  319|      0|        }
  320|       |#else
  321|       |        out->cardinality = intersect_uint16(array1->array, card_1,
  322|       |                                            array2->array, card_2, out->array);
  323|       |#endif
  324|  1.82k|    }
  325|  2.40k|}
array_container_intersection_cardinality:
  330|  9.60k|                                             const array_container_t *array2) {
  331|  9.60k|    int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
  332|  9.60k|    const int threshold = 64;  // subject to tuning
  333|  9.60k|    if (card_1 * threshold < card_2) {
  ------------------
  |  Branch (333:9): [True: 1.42k, False: 8.17k]
  ------------------
  334|  1.42k|        return intersect_skewed_uint16_cardinality(array1->array, card_1,
  335|  1.42k|                                                   array2->array, card_2);
  336|  8.17k|    } else if (card_2 * threshold < card_1) {
  ------------------
  |  Branch (336:16): [True: 860, False: 7.31k]
  ------------------
  337|    860|        return intersect_skewed_uint16_cardinality(array2->array, card_2,
  338|    860|                                                   array1->array, card_1);
  339|  7.31k|    } else {
  340|  7.31k|#if CROARING_IS_X64
  341|  7.31k|        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (341:13): [True: 7.31k, False: 0]
  ------------------
  342|  7.31k|            return intersect_vector16_cardinality(array1->array, card_1,
  343|  7.31k|                                                  array2->array, card_2);
  344|  7.31k|        } else {
  345|      0|            return intersect_uint16_cardinality(array1->array, card_1,
  346|      0|                                                array2->array, card_2);
  347|      0|        }
  348|       |#else
  349|       |        return intersect_uint16_cardinality(array1->array, card_1,
  350|       |                                            array2->array, card_2);
  351|       |#endif
  352|  7.31k|    }
  353|  9.60k|}
array_container_intersect:
  356|  2.40k|                               const array_container_t *array2) {
  357|  2.40k|    int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
  358|  2.40k|    const int threshold = 64;  // subject to tuning
  359|  2.40k|    if (card_1 * threshold < card_2) {
  ------------------
  |  Branch (359:9): [True: 356, False: 2.04k]
  ------------------
  360|    356|        return intersect_skewed_uint16_nonempty(array1->array, card_1,
  361|    356|                                                array2->array, card_2);
  362|  2.04k|    } else if (card_2 * threshold < card_1) {
  ------------------
  |  Branch (362:16): [True: 215, False: 1.82k]
  ------------------
  363|    215|        return intersect_skewed_uint16_nonempty(array2->array, card_2,
  364|    215|                                                array1->array, card_1);
  365|  1.82k|    } else {
  366|       |        // we do not bother vectorizing
  367|  1.82k|        return intersect_uint16_nonempty(array1->array, card_1, array2->array,
  368|  1.82k|                                         card_2);
  369|  1.82k|    }
  370|  2.40k|}
array_container_intersection_inplace:
  376|  4.95k|                                          const array_container_t *src_2) {
  377|  4.95k|    int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
  378|  4.95k|    const int threshold = 64;  // subject to tuning
  379|  4.95k|    if (card_1 * threshold < card_2) {
  ------------------
  |  Branch (379:9): [True: 0, False: 4.95k]
  ------------------
  380|      0|        src_1->cardinality = intersect_skewed_uint16(
  381|      0|            src_1->array, card_1, src_2->array, card_2, src_1->array);
  382|  4.95k|    } else if (card_2 * threshold < card_1) {
  ------------------
  |  Branch (382:16): [True: 221, False: 4.73k]
  ------------------
  383|    221|        src_1->cardinality = intersect_skewed_uint16(
  384|    221|            src_2->array, card_2, src_1->array, card_1, src_1->array);
  385|  4.73k|    } else {
  386|  4.73k|#if CROARING_IS_X64
  387|  4.73k|        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (387:13): [True: 4.73k, False: 0]
  ------------------
  388|  4.73k|            src_1->cardinality = intersect_vector16_inplace(
  389|  4.73k|                src_1->array, card_1, src_2->array, card_2);
  390|  4.73k|        } else {
  391|      0|            src_1->cardinality = intersect_uint16(
  392|      0|                src_1->array, card_1, src_2->array, card_2, src_1->array);
  393|      0|        }
  394|       |#else
  395|       |        src_1->cardinality = intersect_uint16(
  396|       |            src_1->array, card_1, src_2->array, card_2, src_1->array);
  397|       |#endif
  398|  4.73k|    }
  399|  4.95k|}
array_container_to_uint32_array:
  403|  6.03k|                                    uint32_t base) {
  404|  6.03k|#if CROARING_IS_X64
  405|  6.03k|    int support = croaring_hardware_support();
  406|  6.03k|#if CROARING_COMPILER_SUPPORTS_AVX512
  407|  6.03k|    if (support & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (407:9): [True: 0, False: 6.03k]
  ------------------
  408|      0|        return avx512_array_container_to_uint32_array(vout, cont->array,
  409|      0|                                                      cont->cardinality, base);
  410|      0|    }
  411|  6.03k|#endif
  412|  6.03k|    if (support & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (412:9): [True: 6.03k, False: 0]
  ------------------
  413|  6.03k|        return array_container_to_uint32_array_vector16(
  414|  6.03k|            vout, cont->array, cont->cardinality, base);
  415|  6.03k|    }
  416|      0|#endif  // CROARING_IS_X64
  417|      0|    int outpos = 0;
  418|      0|    uint32_t *out = (uint32_t *)vout;
  419|      0|    size_t i = 0;
  420|      0|    for (; i < (size_t)cont->cardinality; ++i) {
  ------------------
  |  Branch (420:12): [True: 0, False: 0]
  ------------------
  421|      0|        const uint32_t val = base + cont->array[i];
  422|      0|        memcpy(out + outpos, &val,
  423|      0|               sizeof(uint32_t));  // should be compiled as a MOV on x64
  424|      0|        outpos++;
  425|      0|    }
  426|      0|    return outpos;
  427|  6.03k|}
array_container_number_of_runs:
  495|  13.0k|int32_t array_container_number_of_runs(const array_container_t *ac) {
  496|       |    // Can SIMD work here?
  497|  13.0k|    int32_t nr_runs = 0;
  498|  13.0k|    int32_t prev = -2;
  499|  1.65M|    for (const uint16_t *p = ac->array; p != ac->array + ac->cardinality; ++p) {
  ------------------
  |  Branch (499:41): [True: 1.64M, False: 13.0k]
  ------------------
  500|  1.64M|        if (*p != prev + 1) nr_runs++;
  ------------------
  |  Branch (500:13): [True: 898k, False: 747k]
  ------------------
  501|  1.64M|        prev = *p;
  502|  1.64M|    }
  503|  13.0k|    return nr_runs;
  504|  13.0k|}
array_container_write:
  512|  7.31k|int32_t array_container_write(const array_container_t *container, char *buf) {
  513|       |#if CROARING_IS_BIG_ENDIAN
  514|       |    for (int32_t i = 0; i < container->cardinality; ++i) {
  515|       |        uint16_t v_le = croaring_htole16(container->array[i]);
  516|       |        memcpy(buf + i * sizeof(uint16_t), &v_le, sizeof(uint16_t));
  517|       |    }
  518|       |#else
  519|  7.31k|    memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
  520|  7.31k|#endif
  521|  7.31k|    return array_container_size_in_bytes(container);
  522|  7.31k|}
array_container_is_subset:
  525|  5.85k|                               const array_container_t *container2) {
  526|  5.85k|    if (container1->cardinality > container2->cardinality) {
  ------------------
  |  Branch (526:9): [True: 670, False: 5.18k]
  ------------------
  527|    670|        return false;
  528|    670|    }
  529|  5.18k|    int i1 = 0, i2 = 0;
  530|  49.0k|    while (i1 < container1->cardinality && i2 < container2->cardinality) {
  ------------------
  |  Branch (530:12): [True: 49.0k, False: 0]
  |  Branch (530:44): [True: 49.0k, False: 27]
  ------------------
  531|  49.0k|        if (container1->array[i1] == container2->array[i2]) {
  ------------------
  |  Branch (531:13): [True: 0, False: 49.0k]
  ------------------
  532|      0|            i1++;
  533|      0|            i2++;
  534|  49.0k|        } else if (container1->array[i1] > container2->array[i2]) {
  ------------------
  |  Branch (534:20): [True: 43.9k, False: 5.15k]
  ------------------
  535|  43.9k|            i2++;
  536|  43.9k|        } else {  // container1->array[i1] < container2->array[i2]
  537|  5.15k|            return false;
  538|  5.15k|        }
  539|  49.0k|    }
  540|     27|    if (i1 == container1->cardinality) {
  ------------------
  |  Branch (540:9): [True: 0, False: 27]
  ------------------
  541|      0|        return true;
  542|     27|    } else {
  543|       |        return false;
  544|     27|    }
  545|     27|}
array_container_read:
  548|  7.36k|                             const char *buf) {
  549|  7.36k|    if (container->capacity < cardinality) {
  ------------------
  |  Branch (549:9): [True: 0, False: 7.36k]
  ------------------
  550|      0|        array_container_grow(container, cardinality, false);
  551|      0|    }
  552|  7.36k|    container->cardinality = cardinality;
  553|       |#if CROARING_IS_BIG_ENDIAN
  554|       |    for (int32_t i = 0; i < cardinality; ++i) {
  555|       |        uint16_t v_le;
  556|       |        memcpy(&v_le, buf + i * sizeof(uint16_t), sizeof(uint16_t));
  557|       |        container->array[i] = croaring_letoh16(v_le);
  558|       |    }
  559|       |#else
  560|  7.36k|    memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));
  561|  7.36k|#endif
  562|       |
  563|  7.36k|    return array_container_size_in_bytes(container);
  564|  7.36k|}
array_container_iterate:
  567|  6.65k|                             roaring_iterator iterator, void *ptr) {
  568|  2.05M|    for (int i = 0; i < cont->cardinality; i++)
  ------------------
  |  Branch (568:21): [True: 2.04M, False: 6.65k]
  ------------------
  569|  2.04M|        if (!iterator(cont->array[i] + base, ptr)) return false;
  ------------------
  |  Branch (569:13): [True: 0, False: 2.04M]
  ------------------
  570|  6.65k|    return true;
  571|  6.65k|}
array.c:clamp:
  162|   121k|static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
  163|   121k|    return ((val < min) ? min : (val > max) ? max : val);
  ------------------
  |  Branch (163:13): [True: 27.1k, False: 94.6k]
  |  Branch (163:33): [True: 248, False: 94.4k]
  ------------------
  164|   121k|}
array.c:grow_capacity:
  155|   121k|static inline int32_t grow_capacity(int32_t capacity) {
  156|   121k|    return (capacity <= 0)   ? ARRAY_DEFAULT_INIT_SIZE
  ------------------
  |  Branch (156:12): [True: 26.5k, False: 95.2k]
  ------------------
  157|   121k|           : capacity < 64   ? capacity * 2
  ------------------
  |  Branch (157:14): [True: 69.3k, False: 25.9k]
  ------------------
  158|  95.2k|           : capacity < 1024 ? capacity * 3 / 2
  ------------------
  |  Branch (158:14): [True: 25.6k, False: 288]
  ------------------
  159|  25.9k|                             : capacity * 5 / 4;
  160|   121k|}
array.c:minimum_int32:
  280|  2.40k|static inline int32_t minimum_int32(int32_t a, int32_t b) {
  281|  2.40k|    return (a < b) ? a : b;
  ------------------
  |  Branch (281:12): [True: 1.02k, False: 1.37k]
  ------------------
  282|  2.40k|}

bitset_container_clear:
   51|   150k|void bitset_container_clear(bitset_container_t *bitset) {
   52|   150k|    memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
   53|   150k|    bitset->cardinality = 0;
   54|   150k|}
bitset_container_set_all:
   56|    165|void bitset_container_set_all(bitset_container_t *bitset) {
   57|       |    memset(bitset->words, INT64_C(-1),
   58|    165|           sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
   59|    165|    bitset->cardinality = (1 << 16);
   60|    165|}
bitset_container_create:
   63|   150k|bitset_container_t *bitset_container_create(void) {
   64|   150k|    bitset_container_t *bitset =
   65|   150k|        (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
   66|       |
   67|   150k|    if (!bitset) {
  ------------------
  |  Branch (67:9): [True: 0, False: 150k]
  ------------------
   68|      0|        return NULL;
   69|      0|    }
   70|       |
   71|   150k|    size_t align_size = 32;
   72|   150k|#if CROARING_IS_X64
   73|   150k|    int support = croaring_hardware_support();
   74|   150k|    if (support & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (74:9): [True: 0, False: 150k]
  ------------------
   75|       |        // sizeof(__m512i) == 64
   76|      0|        align_size = 64;
   77|   150k|    } else {
   78|       |        // sizeof(__m256i) == 32
   79|   150k|        align_size = 32;
   80|   150k|    }
   81|   150k|#endif
   82|   150k|    bitset->words = (uint64_t *)roaring_aligned_malloc(
   83|   150k|        align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
   84|   150k|    if (!bitset->words) {
  ------------------
  |  Branch (84:9): [True: 0, False: 150k]
  ------------------
   85|      0|        roaring_free(bitset);
   86|      0|        return NULL;
   87|      0|    }
   88|   150k|    bitset_container_clear(bitset);
   89|   150k|    return bitset;
   90|   150k|}
bitset_container_copy:
   94|  3.99k|                           bitset_container_t *dest) {
   95|  3.99k|    dest->cardinality = source->cardinality;
   96|  3.99k|    memcpy(dest->words, source->words,
   97|  3.99k|           sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
   98|  3.99k|}
bitset_container_free:
  129|   296k|void bitset_container_free(bitset_container_t *bitset) {
  130|   296k|    if (bitset == NULL) return;
  ------------------
  |  Branch (130:9): [True: 0, False: 296k]
  ------------------
  131|   296k|    roaring_aligned_free(bitset->words);
  132|   296k|    roaring_free(bitset);
  133|   296k|}
bitset_container_clone:
  137|   145k|bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
  138|   145k|    bitset_container_t *bitset =
  139|   145k|        (bitset_container_t *)roaring_malloc(sizeof(bitset_container_t));
  140|       |
  141|   145k|    if (!bitset) {
  ------------------
  |  Branch (141:9): [True: 0, False: 145k]
  ------------------
  142|      0|        return NULL;
  143|      0|    }
  144|       |
  145|   145k|    size_t align_size = 32;
  146|   145k|#if CROARING_IS_X64
  147|   145k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (147:9): [True: 0, False: 145k]
  ------------------
  148|       |        // sizeof(__m512i) == 64
  149|      0|        align_size = 64;
  150|   145k|    } else {
  151|       |        // sizeof(__m256i) == 32
  152|   145k|        align_size = 32;
  153|   145k|    }
  154|   145k|#endif
  155|   145k|    bitset->words = (uint64_t *)roaring_aligned_malloc(
  156|   145k|        align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
  157|   145k|    if (!bitset->words) {
  ------------------
  |  Branch (157:9): [True: 0, False: 145k]
  ------------------
  158|      0|        roaring_free(bitset);
  159|      0|        return NULL;
  160|      0|    }
  161|   145k|    bitset->cardinality = src->cardinality;
  162|   145k|    memcpy(bitset->words, src->words,
  163|   145k|           sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
  164|   145k|    return bitset;
  165|   145k|}
bitset_container_compute_cardinality:
  269|  3.44k|int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
  270|  3.44k|    int support = croaring_hardware_support();
  271|  3.44k|#if CROARING_COMPILER_SUPPORTS_AVX512
  272|  3.44k|    if (support & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (272:9): [True: 0, False: 3.44k]
  ------------------
  273|      0|        return (int)avx512_vpopcount(
  274|      0|            (const __m512i *)bitset->words,
  275|      0|            BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));
  ------------------
  |  |  253|      0|#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
  ------------------
  276|      0|    } else
  277|  3.44k|#endif  // CROARING_COMPILER_SUPPORTS_AVX512
  278|  3.44k|        if (support & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (278:13): [True: 3.44k, False: 0]
  ------------------
  279|  3.44k|            return (int)avx2_harley_seal_popcount256(
  280|  3.44k|                (const __m256i *)bitset->words,
  281|  3.44k|                BITSET_CONTAINER_SIZE_IN_WORDS / (CROARING_WORDS_IN_AVX2_REG));
  ------------------
  |  |  250|  3.44k|#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
  ------------------
  282|  3.44k|        } else {
  283|      0|            return _scalar_bitset_container_compute_cardinality(bitset);
  284|      0|        }
  285|  3.44k|}
bitset_container_and_nocard:
  724|  1.50k|                                         bitset_container_t *dst) {            \
  725|  1.50k|    int support = croaring_hardware_support();                                 \
  726|  1.50k|    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
  ------------------
  |  Branch (726:10): [True: 0, False: 1.50k]
  ------------------
  727|      0|      return _avx512_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
  728|      0|    }                                                                          \
  729|  1.50k|    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
  ------------------
  |  Branch (729:15): [True: 1.50k, False: 0]
  ------------------
  730|  1.50k|      return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst);      \
  731|  1.50k|    } else {                                                                   \
  732|      0|      return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
  733|      0|    }                                                                          \
  734|  1.50k|  }                                                                            \
bitset_container_and_justcard:
  736|  1.50k|                                           const bitset_container_t *src_2) {  \
  737|  1.50k|     int support = croaring_hardware_support();                                \
  738|  1.50k|    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
  ------------------
  |  Branch (738:10): [True: 0, False: 1.50k]
  ------------------
  739|      0|      return _avx512_bitset_container_##opname##_justcard(src_1, src_2);       \
  740|      0|    }                                                                          \
  741|  1.50k|    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
  ------------------
  |  Branch (741:15): [True: 1.50k, False: 0]
  ------------------
  742|  1.50k|      return _avx2_bitset_container_##opname##_justcard(src_1, src_2);         \
  743|  1.50k|    } else {                                                                   \
  744|      0|      return _scalar_bitset_container_##opname##_justcard(src_1, src_2);       \
  745|      0|    }                                                                          \
  746|  1.50k|  }
bitset_container_xor:
  711|  1.53k|                                bitset_container_t *dst) {                     \
  712|  1.53k|    int support = croaring_hardware_support();                                 \
  713|  1.53k|    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
  ------------------
  |  Branch (713:10): [True: 0, False: 1.53k]
  ------------------
  714|      0|      return _avx512_bitset_container_##opname(src_1, src_2, dst);             \
  715|      0|    }                                                                          \
  716|  1.53k|    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
  ------------------
  |  Branch (716:15): [True: 1.53k, False: 0]
  ------------------
  717|  1.53k|      return _avx2_bitset_container_##opname(src_1, src_2, dst);               \
  718|  1.53k|    } else {                                                                   \
  719|      0|      return _scalar_bitset_container_##opname(src_1, src_2, dst);             \
  720|      0|    }                                                                          \
  721|  1.53k|  }                                                                            \
bitset_container_to_uint32_array:
  936|  1.50k|){
  937|  1.50k|#if CROARING_IS_X64
  938|  1.50k|   int support = croaring_hardware_support();
  939|  1.50k|#if CROARING_COMPILER_SUPPORTS_AVX512
  940|  1.50k|   if(( support & ROARING_SUPPORTS_AVX512 ) &&  (bc->cardinality >= 8192))  // heuristic
  ------------------
  |  Branch (940:7): [True: 0, False: 1.50k]
  |  Branch (940:49): [True: 0, False: 0]
  ------------------
  941|      0|		return (int) bitset_extract_setbits_avx512(bc->words,
  942|      0|                BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
  943|  1.50k|   else
  944|  1.50k|#endif
  945|  1.50k|   if(( support & ROARING_SUPPORTS_AVX2 ) &&  (bc->cardinality >= 8192))  // heuristic
  ------------------
  |  Branch (945:7): [True: 1.50k, False: 0]
  |  Branch (945:47): [True: 1.00k, False: 508]
  ------------------
  946|  1.00k|		return (int) bitset_extract_setbits_avx2(bc->words,
  947|  1.00k|                BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
  948|    508|	else
  949|    508|		return (int) bitset_extract_setbits(bc->words,
  950|    508|                BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
  951|       |#else
  952|       |	return (int) bitset_extract_setbits(bc->words,
  953|       |                BITSET_CONTAINER_SIZE_IN_WORDS, out, base);
  954|       |#endif
  955|  1.50k|}
bitset_container_write:
 1050|  72.5k|                                  char *buf) {
 1051|       |#if CROARING_IS_BIG_ENDIAN
 1052|       |	for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
 1053|       |		uint64_t w_le = croaring_htole64(container->words[i]);
 1054|       |		memcpy(buf + i * sizeof(uint64_t), &w_le, sizeof(uint64_t));
 1055|       |	}
 1056|       |#else
 1057|  72.5k|	memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
 1058|  72.5k|#endif
 1059|  72.5k|	return bitset_container_size_in_bytes(container);
 1060|  72.5k|}
bitset_container_read:
 1064|  72.5k|		const char *buf)  {
 1065|  72.5k|	container->cardinality = cardinality;
 1066|       |#if CROARING_IS_BIG_ENDIAN
 1067|       |	for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
 1068|       |		uint64_t w_le;
 1069|       |		memcpy(&w_le, buf + i * sizeof(uint64_t), sizeof(uint64_t));
 1070|       |		container->words[i] = croaring_letoh64(w_le);
 1071|       |	}
 1072|       |#else
 1073|  72.5k|	memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
 1074|  72.5k|#endif
 1075|  72.5k|	return bitset_container_size_in_bytes(container);
 1076|  72.5k|}
bitset_container_iterate:
 1078|  1.64k|bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
 1079|  1.68M|  for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
  ------------------
  |  Branch (1079:23): [True: 1.68M, False: 1.64k]
  ------------------
 1080|  1.68M|    uint64_t w = cont->words[i];
 1081|  61.2M|    while (w != 0) {
  ------------------
  |  Branch (1081:12): [True: 59.5M, False: 1.68M]
  ------------------
 1082|  59.5M|      uint64_t t = w & (~w + 1);
 1083|  59.5M|      int r = roaring_trailing_zeroes(w);
 1084|  59.5M|      if(!iterator(r + base, ptr)) return false;
  ------------------
  |  Branch (1084:10): [True: 0, False: 59.5M]
  ------------------
 1085|  59.5M|      w ^= t;
 1086|  59.5M|    }
 1087|  1.68M|    base += 64;
 1088|  1.68M|  }
 1089|  1.64k|  return true;
 1090|  1.64k|}
bitset_container_equals:
 1144|  72.5k|bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
 1145|  72.5k|  if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
  ------------------
  |  Branch (1145:6): [True: 72.5k, False: 0]
  |  Branch (1145:65): [True: 72.5k, False: 0]
  ------------------
 1146|  72.5k|    if(container1->cardinality != container2->cardinality) {
  ------------------
  |  Branch (1146:8): [True: 0, False: 72.5k]
  ------------------
 1147|      0|      return false;
 1148|      0|    }
 1149|  72.5k|    if (container1->cardinality == INT32_C(0x10000)) {
  ------------------
  |  Branch (1149:9): [True: 68.5k, False: 4.04k]
  ------------------
 1150|  68.5k|      return true;
 1151|  68.5k|    }
 1152|  72.5k|  }
 1153|  4.04k|#if CROARING_IS_X64
 1154|  4.04k|  int support = croaring_hardware_support();
 1155|  4.04k|#if CROARING_COMPILER_SUPPORTS_AVX512
 1156|  4.04k|  if( support & ROARING_SUPPORTS_AVX512 ) {
  ------------------
  |  Branch (1156:7): [True: 0, False: 4.04k]
  ------------------
 1157|      0|    return _avx512_bitset_container_equals(container1, container2);
 1158|      0|  }
 1159|  4.04k|  else
 1160|  4.04k|#endif
 1161|  4.04k|  if( support & ROARING_SUPPORTS_AVX2 ) {
  ------------------
  |  Branch (1161:7): [True: 4.04k, False: 0]
  ------------------
 1162|  4.04k|    return _avx2_bitset_container_equals(container1, container2);
 1163|  4.04k|  }
 1164|      0|#endif
 1165|      0|  return memcmp(container1->words,
 1166|      0|                container2->words,
 1167|      0|                BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0;
 1168|  4.04k|}
bitset_container_minimum:
 1218|  1.26k|uint16_t bitset_container_minimum(const bitset_container_t *container) {
 1219|  18.1k|  for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
  ------------------
  |  Branch (1219:23): [True: 18.1k, False: 0]
  ------------------
 1220|  18.1k|    uint64_t w = container->words[i];
 1221|  18.1k|    if (w != 0) {
  ------------------
  |  Branch (1221:9): [True: 1.26k, False: 16.9k]
  ------------------
 1222|  1.26k|      int r = roaring_trailing_zeroes(w);
 1223|  1.26k|      return r + i * 64;
 1224|  1.26k|    }
 1225|  18.1k|  }
 1226|      0|  return UINT16_MAX;
 1227|  1.26k|}
bitset_container_maximum:
 1230|  4.36k|uint16_t bitset_container_maximum(const bitset_container_t *container) {
 1231|  1.23M|  for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
  ------------------
  |  Branch (1231:56): [True: 1.23M, False: 0]
  ------------------
 1232|  1.23M|    uint64_t w = container->words[i];
 1233|  1.23M|    if (w != 0) {
  ------------------
  |  Branch (1233:9): [True: 4.36k, False: 1.22M]
  ------------------
 1234|  4.36k|      int r = roaring_leading_zeroes(w);
 1235|  4.36k|      return i * 64 + 63  - r;
 1236|  4.36k|    }
 1237|  1.23M|  }
 1238|      0|  return 0;
 1239|  4.36k|}
bitset_container_index_equalorlarger:
 1299|  1.87k|int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
 1300|  1.87k|  uint32_t x32 = x;
 1301|  1.87k|  uint32_t k = x32 / 64;
 1302|  1.87k|  uint64_t word = container->words[k];
 1303|  1.87k|  const int diff = x32 - k * 64; // in [0,64)
 1304|  1.87k|  word = (word >> diff) << diff; // a mask is faster, but we don't care
 1305|  6.70k|  while(word == 0) {
  ------------------
  |  Branch (1305:9): [True: 4.83k, False: 1.87k]
  ------------------
 1306|  4.83k|    k++;
 1307|  4.83k|    if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
  ------------------
  |  Branch (1307:8): [True: 0, False: 4.83k]
  ------------------
 1308|  4.83k|    word = container->words[k];
 1309|  4.83k|  }
 1310|  1.87k|  return k * 64 + roaring_trailing_zeroes(word);
 1311|  1.87k|}
bitset.c:_avx2_bitset_container_and_nocard:
  503|  1.50k|      bitset_container_t *dst) {                                               \
  504|  1.50k|    const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words;       \
  505|  1.50k|    const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words;       \
  506|  1.50k|    /* not using the blocking optimization for some reason*/                   \
  507|  1.50k|    uint8_t *out = (uint8_t *)dst->words;                                      \
  508|  1.50k|    const int innerloop = 8;                                                   \
  509|  1.50k|    for (size_t i = 0;                                                         \
  510|  49.7k|         i < BITSET_CONTAINER_SIZE_IN_WORDS / (CROARING_WORDS_IN_AVX2_REG);             \
  ------------------
  |  |  250|  49.7k|#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
  ------------------
  |  Branch (510:10): [True: 48.2k, False: 1.50k]
  ------------------
  511|  48.2k|         i += innerloop) {                                                     \
  512|  48.2k|      __m256i A1, A2, AO;                                                      \
  513|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1));                     \
  514|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2));                     \
  515|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  516|  48.2k|      _mm256_storeu_si256((__m256i *)out, AO);                                 \
  517|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32));                \
  518|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32));                \
  519|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  520|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 32), AO);                          \
  521|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64));                \
  522|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64));                \
  523|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  524|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 64), AO);                          \
  525|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96));                \
  526|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96));                \
  527|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  528|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 96), AO);                          \
  529|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128));               \
  530|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128));               \
  531|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  532|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 128), AO);                         \
  533|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160));               \
  534|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160));               \
  535|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  536|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 160), AO);                         \
  537|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192));               \
  538|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192));               \
  539|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  540|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 192), AO);                         \
  541|  48.2k|      A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224));               \
  542|  48.2k|      A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224));               \
  543|  48.2k|      AO = avx_intrinsic(A2, A1);                                              \
  544|  48.2k|      _mm256_storeu_si256((__m256i *)(out + 224), AO);                         \
  545|  48.2k|      out += 256;                                                              \
  546|  48.2k|      words_1 += 256;                                                          \
  547|  48.2k|      words_2 += 256;                                                          \
  548|  48.2k|    }                                                                          \
  549|  1.50k|    dst->cardinality = BITSET_UNKNOWN_CARDINALITY;                             \
  550|  1.50k|    return dst->cardinality;                                                   \
  551|  1.50k|  }
bitset.c:_avx2_bitset_container_and_justcard:
  572|  1.50k|      const bitset_container_t *src_1, const bitset_container_t *src_2) {      \
  573|  1.50k|    const __m256i *__restrict__ data1 = (const __m256i *)src_1->words;         \
  574|  1.50k|    const __m256i *__restrict__ data2 = (const __m256i *)src_2->words;         \
  575|  1.50k|    return (int)avx2_harley_seal_popcount256_##opname(                         \
  576|  1.50k|        data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (CROARING_WORDS_IN_AVX2_REG));   \
  ------------------
  |  |  250|  1.50k|#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
  ------------------
  577|  1.50k|  }
bitset.c:_avx2_bitset_container_xor:
  558|  1.53k|                                      bitset_container_t *dst) {               \
  559|  1.53k|    const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words;       \
  560|  1.53k|    const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words;       \
  561|  1.53k|    __m256i *out = (__m256i *)dst->words;                                      \
  562|  1.53k|    dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \
  563|  1.53k|        words_2, words_1, out,                                                 \
  564|  1.53k|        BITSET_CONTAINER_SIZE_IN_WORDS / (CROARING_WORDS_IN_AVX2_REG));                 \
  ------------------
  |  |  250|  1.53k|#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
  ------------------
  565|  1.53k|    return dst->cardinality;                                                   \
  566|  1.53k|  }                                                                            \
bitset.c:_avx2_bitset_container_equals:
 1127|  4.04k|static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
 1128|  4.04k|    const __m256i *ptr1 = (const __m256i*)container1->words;
 1129|  4.04k|    const __m256i *ptr2 = (const __m256i*)container2->words;
 1130|  1.03M|    for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
  ------------------
  |  Branch (1130:24): [True: 1.03M, False: 4.04k]
  ------------------
 1131|  1.03M|      __m256i r1 = _mm256_loadu_si256(ptr1+i);
 1132|  1.03M|      __m256i r2 = _mm256_loadu_si256(ptr2+i);
 1133|  1.03M|      int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
 1134|  1.03M|      if ((uint32_t)mask != UINT32_MAX) {
  ------------------
  |  Branch (1134:11): [True: 0, False: 1.03M]
  ------------------
 1135|      0|          return false;
 1136|      0|      }
 1137|  1.03M|  }
 1138|  4.04k|	return true;
 1139|  4.04k|}

container_free:
   58|   629k|void container_free(container_t *c, uint8_t type) {
   59|   629k|    switch (type) {
   60|   293k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|   293k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (60:9): [True: 293k, False: 336k]
  ------------------
   61|   293k|            bitset_container_free(CAST_bitset(c));
  ------------------
  |  |   52|   293k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|   293k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
   62|   293k|            break;
   63|   134k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|   134k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (63:9): [True: 134k, False: 495k]
  ------------------
   64|   134k|            array_container_free(CAST_array(c));
  ------------------
  |  |   54|   134k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|   134k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
   65|   134k|            break;
   66|   201k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|   201k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (66:9): [True: 201k, False: 428k]
  ------------------
   67|   201k|            run_container_free(CAST_run(c));
  ------------------
  |  |   77|   201k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|   201k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
   68|   201k|            break;
   69|      0|        case SHARED_CONTAINER_TYPE:
  ------------------
  |  |   51|      0|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (69:9): [True: 0, False: 629k]
  ------------------
   70|      0|            shared_container_free(CAST_shared(c));
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
   71|      0|            break;
   72|      0|        default:
  ------------------
  |  Branch (72:9): [True: 0, False: 629k]
  ------------------
   73|      0|            assert(false);
   74|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
   75|   629k|    }
   76|   629k|}
get_copy_of_container:
  181|    246|                                   bool copy_on_write) {
  182|    246|    if (copy_on_write) {
  ------------------
  |  Branch (182:9): [True: 0, False: 246]
  ------------------
  183|      0|        shared_container_t *shared_container;
  184|      0|        if (*typecode == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|      0|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (184:13): [True: 0, False: 0]
  ------------------
  185|      0|            shared_container = CAST_shared(c);
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  186|      0|            croaring_refcount_inc(&shared_container->counter);
  187|      0|            return shared_container;
  188|      0|        }
  189|      0|        assert(*typecode != SHARED_CONTAINER_TYPE);
  190|       |
  191|      0|        if ((shared_container = (shared_container_t *)roaring_malloc(
  ------------------
  |  Branch (191:13): [True: 0, False: 0]
  ------------------
  192|      0|                 sizeof(shared_container_t))) == NULL) {
  193|      0|            return NULL;
  194|      0|        }
  195|       |
  196|      0|        shared_container->container = c;
  197|      0|        shared_container->typecode = *typecode;
  198|       |        // At this point, we are creating new shared container
  199|       |        // so there should be no other references, and setting
  200|       |        // the counter to 2 - even non-atomically - is safe as
  201|       |        // long as the value is set before the return statement.
  202|      0|        shared_container->counter = 2;
  203|      0|        *typecode = SHARED_CONTAINER_TYPE;
  ------------------
  |  |   51|      0|#define SHARED_CONTAINER_TYPE 4
  ------------------
  204|       |
  205|      0|        return shared_container;
  206|      0|    }  // copy_on_write
  207|       |    // otherwise, no copy on write...
  208|    246|    const container_t *actual_container = container_unwrap_shared(c, typecode);
  209|       |    assert(*typecode != SHARED_CONTAINER_TYPE);
  210|    246|    return container_clone(actual_container, *typecode);
  211|    246|}
container_clone:
  217|   347k|container_t *container_clone(const container_t *c, uint8_t typecode) {
  218|       |    // We do not want to allow cloning of shared containers.
  219|       |    // c = container_unwrap_shared(c, &typecode);
  220|   347k|    switch (typecode) {
  221|   145k|        case BITSET_CONTAINER_TYPE:
  ------------------
  |  |   48|   145k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (221:9): [True: 145k, False: 201k]
  ------------------
  222|   145k|            return bitset_container_clone(const_CAST_bitset(c));
  ------------------
  |  |   53|   145k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   145k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  223|  23.4k|        case ARRAY_CONTAINER_TYPE:
  ------------------
  |  |   49|  23.4k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (223:9): [True: 23.4k, False: 323k]
  ------------------
  224|  23.4k|            return array_container_clone(const_CAST_array(c));
  ------------------
  |  |   55|  23.4k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  23.4k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  225|   178k|        case RUN_CONTAINER_TYPE:
  ------------------
  |  |   50|   178k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (225:9): [True: 178k, False: 169k]
  ------------------
  226|   178k|            return run_container_clone(const_CAST_run(c));
  ------------------
  |  |   78|   178k|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|   178k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  227|      0|        case SHARED_CONTAINER_TYPE:
  ------------------
  |  |   51|      0|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (227:9): [True: 0, False: 347k]
  ------------------
  228|       |            // Shared containers are not cloneable. Are you mixing COW and
  229|       |            // non-COW bitmaps?
  230|      0|            return NULL;
  231|      0|        default:
  ------------------
  |  Branch (231:9): [True: 0, False: 347k]
  ------------------
  232|      0|            assert(false);
  233|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  234|      0|            return NULL;
  235|   347k|    }
  236|   347k|}
container_init_iterator:
  304|  19.2k|                                                     uint16_t *value) {
  305|  19.2k|    switch (typecode) {
  306|  3.80k|        case BITSET_CONTAINER_TYPE: {
  ------------------
  |  |   48|  3.80k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (306:9): [True: 3.80k, False: 15.4k]
  ------------------
  307|  3.80k|            const bitset_container_t *bc = const_CAST_bitset(c);
  ------------------
  |  |   53|  3.80k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  3.80k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  308|  3.80k|            uint32_t wordindex = 0;
  309|  3.80k|            uint64_t word;
  310|  28.0k|            while ((word = bc->words[wordindex]) == 0) {
  ------------------
  |  Branch (310:20): [True: 24.1k, False: 3.80k]
  ------------------
  311|  24.1k|                wordindex++;
  312|  24.1k|            }
  313|       |            // word is non-zero
  314|  3.80k|            int32_t index = wordindex * 64 + roaring_trailing_zeroes(word);
  315|  3.80k|            *value = index;
  316|  3.80k|            return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
  ------------------
  |  |   13|  3.80k|#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T (roaring_container_iterator_t)
  ------------------
  317|  3.80k|                .index = index,
  318|  3.80k|            };
  319|      0|        }
  320|  14.9k|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|  14.9k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (320:9): [True: 14.9k, False: 4.29k]
  ------------------
  321|  14.9k|            const array_container_t *ac = const_CAST_array(c);
  ------------------
  |  |   55|  14.9k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  14.9k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  322|  14.9k|            *value = ac->array[0];
  323|  14.9k|            return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
  ------------------
  |  |   13|  14.9k|#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T (roaring_container_iterator_t)
  ------------------
  324|  14.9k|                .index = 0,
  325|  14.9k|            };
  326|      0|        }
  327|    485|        case RUN_CONTAINER_TYPE: {
  ------------------
  |  |   50|    485|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (327:9): [True: 485, False: 18.7k]
  ------------------
  328|    485|            const run_container_t *rc = const_CAST_run(c);
  ------------------
  |  |   78|    485|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|    485|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  329|    485|            *value = rc->runs[0].value;
  330|    485|            return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{
  ------------------
  |  |   13|    485|#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T (roaring_container_iterator_t)
  ------------------
  331|    485|                .index = 0,
  332|    485|            };
  333|      0|        }
  334|      0|        default:
  ------------------
  |  Branch (334:9): [True: 0, False: 19.2k]
  ------------------
  335|      0|            assert(false);
  336|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  337|      0|            return ROARING_INIT_ROARING_CONTAINER_ITERATOR_T{0};
  ------------------
  |  |   13|      0|#define ROARING_INIT_ROARING_CONTAINER_ITERATOR_T (roaring_container_iterator_t)
  ------------------
  338|  19.2k|    }
  339|  19.2k|}
container_iterator_lower_bound:
  386|  6.42k|                                    uint16_t *value_out, uint16_t val) {
  387|  6.42k|    if (val > container_maximum(c, typecode)) {
  ------------------
  |  Branch (387:9): [True: 0, False: 6.42k]
  ------------------
  388|      0|        return false;
  389|      0|    }
  390|  6.42k|    switch (typecode) {
  391|  1.87k|        case BITSET_CONTAINER_TYPE: {
  ------------------
  |  |   48|  1.87k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  |  Branch (391:9): [True: 1.87k, False: 4.54k]
  ------------------
  392|  1.87k|            const bitset_container_t *bc = const_CAST_bitset(c);
  ------------------
  |  |   53|  1.87k|#define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  1.87k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  393|  1.87k|            it->index = bitset_container_index_equalorlarger(bc, val);
  394|  1.87k|            *value_out = it->index;
  395|  1.87k|            return true;
  396|      0|        }
  397|  4.54k|        case ARRAY_CONTAINER_TYPE: {
  ------------------
  |  |   49|  4.54k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (397:9): [True: 4.54k, False: 1.87k]
  ------------------
  398|  4.54k|            const array_container_t *ac = const_CAST_array(c);
  ------------------
  |  |   55|  4.54k|#define const_CAST_array(c) CAST(const array_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|  4.54k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  399|  4.54k|            it->index = array_container_index_equalorlarger(ac, val);
  400|  4.54k|            *value_out = ac->array[it->index];
  401|  4.54k|            return true;
  402|      0|        }
  403|      0|        case RUN_CONTAINER_TYPE: {
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (403:9): [True: 0, False: 6.42k]
  ------------------
  404|      0|            const run_container_t *rc = const_CAST_run(c);
  ------------------
  |  |   78|      0|#define const_CAST_run(c) CAST(const run_container_t *, c)
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  405|      0|            it->index = run_container_index_equalorlarger(rc, val);
  406|      0|            if (rc->runs[it->index].value <= val) {
  ------------------
  |  Branch (406:17): [True: 0, False: 0]
  ------------------
  407|      0|                *value_out = val;
  408|      0|            } else {
  409|      0|                *value_out = rc->runs[it->index].value;
  410|      0|            }
  411|      0|            return true;
  412|      0|        }
  413|      0|        default:
  ------------------
  |  Branch (413:9): [True: 0, False: 6.42k]
  ------------------
  414|      0|            assert(false);
  415|      0|            roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  416|      0|            return false;
  417|  6.42k|    }
  418|  6.42k|}

bitset_container_from_array:
   22|  2.32k|bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
   23|  2.32k|    bitset_container_t *ans = bitset_container_create();
   24|  2.32k|    int limit = array_container_cardinality(ac);
   25|  2.33M|    for (int i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
  ------------------
  |  Branch (25:21): [True: 2.33M, False: 2.32k]
  ------------------
   26|  2.32k|    return ans;
   27|  2.32k|}
bitset_container_from_run:
   29|    538|bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
   30|    538|    int card = run_container_cardinality(arr);
   31|    538|    bitset_container_t *answer = bitset_container_create();
   32|  18.7k|    for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
  ------------------
  |  Branch (32:26): [True: 18.2k, False: 538]
  ------------------
   33|  18.2k|        rle16_t vl = arr->runs[rlepos];
   34|  18.2k|        bitset_set_lenrange(answer->words, vl.value, vl.length);
   35|  18.2k|    }
   36|    538|    answer->cardinality = card;
   37|    538|    return answer;
   38|    538|}
array_container_from_run:
   40|  1.17k|array_container_t *array_container_from_run(const run_container_t *arr) {
   41|  1.17k|    array_container_t *answer =
   42|  1.17k|        array_container_create_given_capacity(run_container_cardinality(arr));
   43|  1.17k|    answer->cardinality = 0;
   44|  86.3k|    for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
  ------------------
  |  Branch (44:26): [True: 85.1k, False: 1.17k]
  ------------------
   45|  85.1k|        int run_start = arr->runs[rlepos].value;
   46|  85.1k|        int run_end = run_start + arr->runs[rlepos].length;
   47|       |
   48|   414k|        for (int run_value = run_start; run_value <= run_end; ++run_value) {
  ------------------
  |  Branch (48:41): [True: 329k, False: 85.1k]
  ------------------
   49|   329k|            answer->array[answer->cardinality++] = (uint16_t)run_value;
   50|   329k|        }
   51|  85.1k|    }
   52|  1.17k|    return answer;
   53|  1.17k|}
array_container_from_bitset:
   55|  2.79k|array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
   56|  2.79k|    array_container_t *result =
   57|  2.79k|        array_container_create_given_capacity(bits->cardinality);
   58|  2.79k|    result->cardinality = bits->cardinality;
   59|  2.79k|#if CROARING_IS_X64
   60|  2.79k|#if CROARING_COMPILER_SUPPORTS_AVX512
   61|  2.79k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
  ------------------
  |  Branch (61:9): [True: 0, False: 2.79k]
  ------------------
   62|      0|        bitset_extract_setbits_avx512_uint16(
   63|      0|            bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array,
   64|      0|            bits->cardinality, 0);
   65|      0|    } else
   66|  2.79k|#endif
   67|  2.79k|    {
   68|       |        //  sse version ends up being slower here
   69|       |        // (bitset_extract_setbits_sse_uint16)
   70|       |        // because of the sparsity of the data
   71|  2.79k|        bitset_extract_setbits_uint16(
   72|  2.79k|            bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array, 0);
   73|  2.79k|    }
   74|       |#else
   75|       |    // If the system is not x64, then we have no accelerated function.
   76|       |    bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
   77|       |                                  result->array, 0);
   78|       |#endif
   79|       |
   80|  2.79k|    return result;
   81|  2.79k|}
convert_to_bitset_or_array_container:
  120|  72.0k|                                                  uint8_t *resulttype) {
  121|  72.0k|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (121:9): [True: 1.28k, False: 70.8k]
  ------------------
  122|  1.28k|        array_container_t *answer = array_container_create_given_capacity(card);
  123|  1.28k|        answer->cardinality = 0;
  124|  69.6k|        for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
  ------------------
  |  Branch (124:30): [True: 68.3k, False: 1.28k]
  ------------------
  125|  68.3k|            uint16_t run_start = rc->runs[rlepos].value;
  126|  68.3k|            uint16_t run_end = run_start + rc->runs[rlepos].length;
  127|   685k|            for (uint16_t run_value = run_start; run_value < run_end;
  ------------------
  |  Branch (127:50): [True: 616k, False: 68.3k]
  ------------------
  128|   616k|                 ++run_value) {
  129|   616k|                answer->array[answer->cardinality++] = run_value;
  130|   616k|            }
  131|  68.3k|            answer->array[answer->cardinality++] = run_end;
  132|  68.3k|        }
  133|  1.28k|        assert(card == answer->cardinality);
  134|  1.28k|        *resulttype = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.28k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  135|       |        // run_container_free(r);
  136|  1.28k|        return answer;
  137|  1.28k|    }
  138|  70.8k|    bitset_container_t *answer = bitset_container_create();
  139|   160k|    for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
  ------------------
  |  Branch (139:26): [True: 89.6k, False: 70.8k]
  ------------------
  140|  89.6k|        uint16_t run_start = rc->runs[rlepos].value;
  141|  89.6k|        bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
  142|  89.6k|    }
  143|  70.8k|    answer->cardinality = card;
  144|  70.8k|    *resulttype = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|  70.8k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  145|       |    // run_container_free(r);
  146|  70.8k|    return answer;
  147|  72.0k|}
convert_run_to_efficient_container:
  155|   138k|                                                uint8_t *typecode_after) {
  156|   138k|    int32_t size_as_run_container =
  157|   138k|        run_container_serialized_size_in_bytes(c->n_runs);
  158|       |
  159|   138k|    int32_t size_as_bitset_container =
  160|   138k|        bitset_container_serialized_size_in_bytes();
  161|   138k|    int32_t card = run_container_cardinality(c);
  162|   138k|    int32_t size_as_array_container =
  163|   138k|        array_container_serialized_size_in_bytes(card);
  164|       |
  165|   138k|    int32_t min_size_non_run =
  166|   138k|        size_as_bitset_container < size_as_array_container
  ------------------
  |  Branch (166:9): [True: 62.3k, False: 76.1k]
  ------------------
  167|   138k|            ? size_as_bitset_container
  168|   138k|            : size_as_array_container;
  169|   138k|    if (size_as_run_container <= min_size_non_run) {  // no conversion
  ------------------
  |  Branch (169:9): [True: 71.0k, False: 67.4k]
  ------------------
  170|  71.0k|        *typecode_after = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|  71.0k|#define RUN_CONTAINER_TYPE 3
  ------------------
  171|  71.0k|        return c;
  172|  71.0k|    }
  173|  67.4k|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (173:9): [True: 67.4k, False: 0]
  ------------------
  174|       |        // to array
  175|  67.4k|        array_container_t *answer = array_container_create_given_capacity(card);
  176|  67.4k|        answer->cardinality = 0;
  177|   217k|        for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
  ------------------
  |  Branch (177:30): [True: 149k, False: 67.4k]
  ------------------
  178|   149k|            int run_start = c->runs[rlepos].value;
  179|   149k|            int run_end = run_start + c->runs[rlepos].length;
  180|       |
  181|   399k|            for (int run_value = run_start; run_value <= run_end; ++run_value) {
  ------------------
  |  Branch (181:45): [True: 249k, False: 149k]
  ------------------
  182|   249k|                answer->array[answer->cardinality++] = (uint16_t)run_value;
  183|   249k|            }
  184|   149k|        }
  185|  67.4k|        *typecode_after = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  67.4k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  186|  67.4k|        return answer;
  187|  67.4k|    }
  188|       |
  189|       |    // else to bitset
  190|      0|    bitset_container_t *answer = bitset_container_create();
  191|       |
  192|      0|    for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
  ------------------
  |  Branch (192:26): [True: 0, False: 0]
  ------------------
  193|      0|        int start = c->runs[rlepos].value;
  194|      0|        int end = start + c->runs[rlepos].length;
  195|      0|        bitset_set_range(answer->words, start, end + 1);
  196|      0|    }
  197|      0|    answer->cardinality = card;
  198|      0|    *typecode_after = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  199|      0|    return answer;
  200|  67.4k|}
convert_run_to_efficient_container_and_free:
  204|   127k|    run_container_t *c, uint8_t *typecode_after) {
  205|   127k|    container_t *answer = convert_run_to_efficient_container(c, typecode_after);
  206|   127k|    if (answer != c) run_container_free(c);
  ------------------
  |  Branch (206:9): [True: 61.4k, False: 65.5k]
  ------------------
  207|   127k|    return answer;
  208|   127k|}
convert_run_optimize:
  218|  13.0k|                                  uint8_t *typecode_after) {
  219|  13.0k|    if (typecode_original == RUN_CONTAINER_TYPE) {
  ------------------
  |  |   50|  13.0k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (219:9): [True: 0, False: 13.0k]
  ------------------
  220|      0|        container_t *newc =
  221|      0|            convert_run_to_efficient_container(CAST_run(c), typecode_after);
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  222|      0|        if (newc != c) {
  ------------------
  |  Branch (222:13): [True: 0, False: 0]
  ------------------
  223|      0|            container_free(c, typecode_original);
  224|      0|        }
  225|      0|        return newc;
  226|  13.0k|    } else if (typecode_original == ARRAY_CONTAINER_TYPE) {
  ------------------
  |  |   49|  13.0k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (226:16): [True: 13.0k, False: 0]
  ------------------
  227|       |        // it might need to be converted to a run container.
  228|  13.0k|        array_container_t *c_qua_array = CAST_array(c);
  ------------------
  |  |   54|  13.0k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  13.0k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  229|  13.0k|        int32_t n_runs = array_container_number_of_runs(c_qua_array);
  230|  13.0k|        int32_t size_as_run_container =
  231|  13.0k|            run_container_serialized_size_in_bytes(n_runs);
  232|  13.0k|        int32_t card = array_container_cardinality(c_qua_array);
  233|  13.0k|        int32_t size_as_array_container =
  234|  13.0k|            array_container_serialized_size_in_bytes(card);
  235|       |
  236|  13.0k|        if (size_as_run_container >= size_as_array_container) {
  ------------------
  |  Branch (236:13): [True: 8.93k, False: 4.09k]
  ------------------
  237|  8.93k|            *typecode_after = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  8.93k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  238|  8.93k|            return c;
  239|  8.93k|        }
  240|       |        // else convert array to run container
  241|  4.09k|        run_container_t *answer = run_container_create_given_capacity(n_runs);
  242|  4.09k|        int prev = -2;
  243|  4.09k|        int run_start = -1;
  244|       |
  245|  4.09k|        assert(card > 0);
  246|   661k|        for (int i = 0; i < card; ++i) {
  ------------------
  |  Branch (246:25): [True: 657k, False: 4.09k]
  ------------------
  247|   657k|            uint16_t cur_val = c_qua_array->array[i];
  248|   657k|            if (cur_val != prev + 1) {
  ------------------
  |  Branch (248:17): [True: 267k, False: 389k]
  ------------------
  249|       |                // new run starts; flush old one, if any
  250|   267k|                if (run_start != -1) add_run(answer, run_start, prev);
  ------------------
  |  Branch (250:21): [True: 263k, False: 4.09k]
  ------------------
  251|   267k|                run_start = cur_val;
  252|   267k|            }
  253|   657k|            prev = c_qua_array->array[i];
  254|   657k|        }
  255|  4.09k|        assert(run_start >= 0);
  256|       |        // now prev is the last seen value
  257|  4.09k|        add_run(answer, run_start, prev);
  258|  4.09k|        *typecode_after = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|  4.09k|#define RUN_CONTAINER_TYPE 3
  ------------------
  259|  4.09k|        array_container_free(c_qua_array);
  260|  4.09k|        return answer;
  261|  13.0k|    } else if (typecode_original ==
  ------------------
  |  Branch (261:16): [True: 0, False: 0]
  ------------------
  262|      0|               BITSET_CONTAINER_TYPE) {  // run conversions on bitset
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  263|       |        // does bitset need conversion to run?
  264|      0|        bitset_container_t *c_qua_bitset = CAST_bitset(c);
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  265|      0|        int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
  266|      0|        int32_t size_as_run_container =
  267|      0|            run_container_serialized_size_in_bytes(n_runs);
  268|      0|        int32_t size_as_bitset_container =
  269|      0|            bitset_container_serialized_size_in_bytes();
  270|       |
  271|      0|        if (size_as_bitset_container <= size_as_run_container) {
  ------------------
  |  Branch (271:13): [True: 0, False: 0]
  ------------------
  272|       |            // no conversion needed.
  273|      0|            *typecode_after = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|      0|#define BITSET_CONTAINER_TYPE 1
  ------------------
  274|      0|            return c;
  275|      0|        }
  276|       |        // bitset to runcontainer (ported from Java  RunContainer(
  277|       |        // BitmapContainer bc, int nbrRuns))
  278|      0|        assert(n_runs > 0);  // no empty bitmaps
  279|      0|        run_container_t *answer = run_container_create_given_capacity(n_runs);
  280|       |
  281|      0|        int long_ctr = 0;
  282|      0|        uint64_t cur_word = c_qua_bitset->words[0];
  283|      0|        while (true) {
  ------------------
  |  Branch (283:16): [True: 0, Folded]
  ------------------
  284|      0|            while (cur_word == UINT64_C(0) &&
  ------------------
  |  Branch (284:20): [True: 0, False: 0]
  ------------------
  285|      0|                   long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
  ------------------
  |  Branch (285:20): [True: 0, False: 0]
  ------------------
  286|      0|                cur_word = c_qua_bitset->words[++long_ctr];
  287|       |
  288|      0|            if (cur_word == UINT64_C(0)) {
  ------------------
  |  Branch (288:17): [True: 0, False: 0]
  ------------------
  289|      0|                bitset_container_free(c_qua_bitset);
  290|      0|                *typecode_after = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  291|      0|                return answer;
  292|      0|            }
  293|       |
  294|      0|            int local_run_start = roaring_trailing_zeroes(cur_word);
  295|      0|            int run_start = local_run_start + 64 * long_ctr;
  296|      0|            uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
  297|       |
  298|      0|            int run_end = 0;
  299|      0|            while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
  ------------------
  |  Branch (299:20): [True: 0, False: 0]
  ------------------
  300|      0|                   long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
  ------------------
  |  Branch (300:20): [True: 0, False: 0]
  ------------------
  301|      0|                cur_word_with_1s = c_qua_bitset->words[++long_ctr];
  302|       |
  303|      0|            if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
  ------------------
  |  Branch (303:17): [True: 0, False: 0]
  ------------------
  304|      0|                run_end = 64 + long_ctr * 64;  // exclusive, I guess
  305|      0|                add_run(answer, run_start, run_end - 1);
  306|      0|                bitset_container_free(c_qua_bitset);
  307|      0|                *typecode_after = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  308|      0|                return answer;
  309|      0|            }
  310|      0|            int local_run_end = roaring_trailing_zeroes(~cur_word_with_1s);
  311|      0|            run_end = local_run_end + long_ctr * 64;
  312|      0|            add_run(answer, run_start, run_end - 1);
  313|      0|            cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
  314|      0|        }
  315|      0|        return answer;
  316|      0|    } else {
  317|      0|        assert(false);
  318|      0|        roaring_unreachable;
  ------------------
  |  |  233|      0|#define roaring_unreachable __builtin_unreachable()
  ------------------
  319|      0|        return NULL;
  320|      0|    }
  321|  13.0k|}
convert.c:add_run:
   84|   267k|static void add_run(run_container_t *rc, int s, int e) {
   85|   267k|    rc->runs[rc->n_runs].value = s;
   86|   267k|    rc->runs[rc->n_runs].length = e - s;
   87|   267k|    rc->n_runs++;
   88|   267k|}

array_bitset_container_andnot:
   26|    608|                                   array_container_t *dst) {
   27|       |    // follows Java implementation as of June 2016
   28|    608|    if (dst->capacity < src_1->cardinality) {
  ------------------
  |  Branch (28:9): [True: 608, False: 0]
  ------------------
   29|    608|        array_container_grow(dst, src_1->cardinality, false);
   30|    608|    }
   31|    608|    int32_t newcard = 0;
   32|    608|    const int32_t origcard = src_1->cardinality;
   33|   103k|    for (int i = 0; i < origcard; ++i) {
  ------------------
  |  Branch (33:21): [True: 102k, False: 608]
  ------------------
   34|   102k|        uint16_t key = src_1->array[i];
   35|   102k|        dst->array[newcard] = key;
   36|   102k|        newcard += 1 - bitset_container_contains(src_2, key);
   37|   102k|    }
   38|    608|    dst->cardinality = newcard;
   39|    608|}
bitset_array_container_iandnot:
   83|  1.50k|                                    container_t **dst) {
   84|  1.50k|    *dst = src_1;
   85|  1.50k|    src_1->cardinality =
   86|  1.50k|        (int32_t)bitset_clear_list(src_1->words, (uint64_t)src_1->cardinality,
   87|  1.50k|                                   src_2->array, (uint64_t)src_2->cardinality);
   88|       |
   89|  1.50k|    if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (89:9): [True: 198, False: 1.31k]
  ------------------
   90|    198|        *dst = array_container_from_bitset(src_1);
   91|    198|        bitset_container_free(src_1);
   92|    198|        return false;  // not bitset
   93|    198|    } else
   94|  1.31k|        return true;
   95|  1.50k|}
run_bitset_container_andnot:
  106|    650|                                 container_t **dst) {
  107|       |    // follows the Java implementation as of June 2016
  108|    650|    int card = run_container_cardinality(src_1);
  109|    650|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (109:9): [True: 650, False: 0]
  ------------------
  110|       |        // must be an array
  111|    650|        array_container_t *answer = array_container_create_given_capacity(card);
  112|    650|        answer->cardinality = 0;
  113|  54.9k|        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (113:34): [True: 54.2k, False: 650]
  ------------------
  114|  54.2k|            rle16_t rle = src_1->runs[rlepos];
  115|   194k|            for (int run_value = rle.value; run_value <= rle.value + rle.length;
  ------------------
  |  Branch (115:45): [True: 140k, False: 54.2k]
  ------------------
  116|   140k|                 ++run_value) {
  117|   140k|                if (!bitset_container_get(src_2, (uint16_t)run_value)) {
  ------------------
  |  Branch (117:21): [True: 39.5k, False: 101k]
  ------------------
  118|  39.5k|                    answer->array[answer->cardinality++] = (uint16_t)run_value;
  119|  39.5k|                }
  120|   140k|            }
  121|  54.2k|        }
  122|    650|        *dst = answer;
  123|    650|        return false;
  124|    650|    } else {  // we guess it will be a bitset, though have to check guess when
  125|       |              // done
  126|      0|        bitset_container_t *answer = bitset_container_clone(src_2);
  127|       |
  128|      0|        uint32_t last_pos = 0;
  129|      0|        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (129:34): [True: 0, False: 0]
  ------------------
  130|      0|            rle16_t rle = src_1->runs[rlepos];
  131|       |
  132|      0|            uint32_t start = rle.value;
  133|      0|            uint32_t end = start + rle.length + 1;
  134|      0|            bitset_reset_range(answer->words, last_pos, start);
  135|      0|            bitset_flip_range(answer->words, start, end);
  136|      0|            last_pos = end;
  137|      0|        }
  138|      0|        bitset_reset_range(answer->words, last_pos, (uint32_t)(1 << 16));
  139|       |
  140|      0|        answer->cardinality = bitset_container_compute_cardinality(answer);
  141|       |
  142|      0|        if (answer->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (142:13): [True: 0, False: 0]
  ------------------
  143|      0|            *dst = array_container_from_bitset(answer);
  144|      0|            bitset_container_free(answer);
  145|      0|            return false;  // not bitset
  146|      0|        }
  147|      0|        *dst = answer;
  148|       |        return true;  // bitset
  149|      0|    }
  150|    650|}
run_array_container_andnot:
  279|  2.00k|                               container_t **dst) {
  280|       |    // follows the Java impl as of June 2016
  281|       |
  282|  2.00k|    int card = run_container_cardinality(src_1);
  283|  2.00k|    const int arbitrary_threshold = 32;
  284|       |
  285|  2.00k|    if (card <= arbitrary_threshold) {
  ------------------
  |  Branch (285:9): [True: 378, False: 1.62k]
  ------------------
  286|    378|        if (src_2->cardinality == 0) {
  ------------------
  |  Branch (286:13): [True: 0, False: 378]
  ------------------
  287|      0|            *dst = run_container_clone(src_1);
  288|      0|            return RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  289|      0|        }
  290|       |        // Java's "lazyandNot.toEfficientContainer" thing
  291|    378|        run_container_t *answer = run_container_create_given_capacity(
  292|    378|            card + array_container_cardinality(src_2));
  293|       |
  294|    378|        int rlepos = 0;
  295|    378|        int xrlepos = 0;  // "x" is src_2
  296|    378|        rle16_t rle = src_1->runs[rlepos];
  297|    378|        int32_t start = rle.value;
  298|    378|        int32_t end = start + rle.length + 1;
  299|    378|        int32_t xstart = src_2->array[xrlepos];
  300|       |
  301|  12.5k|        while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
  ------------------
  |  Branch (301:16): [True: 12.2k, False: 299]
  |  Branch (301:44): [True: 12.1k, False: 79]
  ------------------
  302|  12.1k|            if (end <= xstart) {
  ------------------
  |  Branch (302:17): [True: 664, False: 11.4k]
  ------------------
  303|       |                // output the first run
  304|    664|                answer->runs[answer->n_runs++] =
  305|    664|                    CROARING_MAKE_RLE16(start, end - start - 1);
  ------------------
  |  |   60|    664|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  306|    664|                rlepos++;
  307|    664|                if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (307:21): [True: 624, False: 40]
  ------------------
  308|    624|                    start = src_1->runs[rlepos].value;
  309|    624|                    end = start + src_1->runs[rlepos].length + 1;
  310|    624|                }
  311|  11.4k|            } else if (xstart + 1 <= start) {
  ------------------
  |  Branch (311:24): [True: 7.19k, False: 4.29k]
  ------------------
  312|       |                // exit the second run
  313|  7.19k|                xrlepos++;
  314|  7.19k|                if (xrlepos < src_2->cardinality) {
  ------------------
  |  Branch (314:21): [True: 7.11k, False: 79]
  ------------------
  315|  7.11k|                    xstart = src_2->array[xrlepos];
  316|  7.11k|                }
  317|  7.19k|            } else {
  318|  4.29k|                if (start < xstart) {
  ------------------
  |  Branch (318:21): [True: 352, False: 3.94k]
  ------------------
  319|    352|                    answer->runs[answer->n_runs++] =
  320|    352|                        CROARING_MAKE_RLE16(start, xstart - start - 1);
  ------------------
  |  |   60|    352|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  321|    352|                }
  322|  4.29k|                if (xstart + 1 < end) {
  ------------------
  |  Branch (322:21): [True: 2.60k, False: 1.69k]
  ------------------
  323|  2.60k|                    start = xstart + 1;
  324|  2.60k|                } else {
  325|  1.69k|                    rlepos++;
  326|  1.69k|                    if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (326:25): [True: 1.43k, False: 259]
  ------------------
  327|  1.43k|                        start = src_1->runs[rlepos].value;
  328|  1.43k|                        end = start + src_1->runs[rlepos].length + 1;
  329|  1.43k|                    }
  330|  1.69k|                }
  331|  4.29k|            }
  332|  12.1k|        }
  333|    378|        if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (333:13): [True: 79, False: 299]
  ------------------
  334|     79|            answer->runs[answer->n_runs++] =
  335|     79|                CROARING_MAKE_RLE16(start, end - start - 1);
  ------------------
  |  |   60|     79|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  336|     79|            rlepos++;
  337|     79|            if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (337:17): [True: 27, False: 52]
  ------------------
  338|     27|                memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
  339|     27|                       (src_1->n_runs - rlepos) * sizeof(rle16_t));
  340|     27|                answer->n_runs += (src_1->n_runs - rlepos);
  341|     27|            }
  342|     79|        }
  343|    378|        uint8_t return_type;
  344|    378|        *dst = convert_run_to_efficient_container(answer, &return_type);
  345|    378|        if (answer != *dst) run_container_free(answer);
  ------------------
  |  Branch (345:13): [True: 244, False: 134]
  ------------------
  346|    378|        return return_type;
  347|    378|    }
  348|       |    // else it's a bitmap or array
  349|       |
  350|  1.62k|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (350:9): [True: 1.32k, False: 297]
  ------------------
  351|  1.32k|        array_container_t *ac = array_container_create_given_capacity(card);
  352|       |        // nb Java code used a generic iterator-based merge to compute
  353|       |        // difference
  354|  1.32k|        ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
  355|  1.32k|        *dst = ac;
  356|  1.32k|        return ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.32k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  357|  1.32k|    }
  358|    297|    bitset_container_t *ans = bitset_container_from_run(src_1);
  359|    297|    bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
  360|    297|    return (result_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
  ------------------
  |  |   48|    279|#define BITSET_CONTAINER_TYPE 1
  ------------------
                  return (result_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
  ------------------
  |  |   49|     18|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (360:13): [True: 279, False: 18]
  ------------------
  361|  1.62k|}
run_array_container_iandnot:
  372|    765|                                container_t **dst) {
  373|       |    // dummy implementation same as June 2016 Java
  374|    765|    int ans = run_array_container_andnot(src_1, src_2, dst);
  375|    765|    run_container_free(src_1);
  376|    765|    return ans;
  377|    765|}
array_run_container_andnot:
  383|    801|                                array_container_t *dst) {
  384|       |    // basically following Java impl as of June 2016
  385|    801|    if (src_1->cardinality > dst->capacity) {
  ------------------
  |  Branch (385:9): [True: 754, False: 47]
  ------------------
  386|    754|        array_container_grow(dst, src_1->cardinality, false);
  387|    754|    }
  388|       |
  389|    801|    if (src_2->n_runs == 0) {
  ------------------
  |  Branch (389:9): [True: 0, False: 801]
  ------------------
  390|      0|        memmove(dst->array, src_1->array,
  391|      0|                sizeof(uint16_t) * src_1->cardinality);
  392|      0|        dst->cardinality = src_1->cardinality;
  393|      0|        return;
  394|      0|    }
  395|    801|    int32_t run_start = src_2->runs[0].value;
  396|    801|    int32_t run_end = run_start + src_2->runs[0].length;
  397|    801|    int which_run = 0;
  398|       |
  399|    801|    uint16_t val = 0;
  400|    801|    int dest_card = 0;
  401|   174k|    for (int i = 0; i < src_1->cardinality; ++i) {
  ------------------
  |  Branch (401:21): [True: 173k, False: 801]
  ------------------
  402|   173k|        val = src_1->array[i];
  403|   173k|        if (val < run_start)
  ------------------
  |  Branch (403:13): [True: 43.7k, False: 130k]
  ------------------
  404|  43.7k|            dst->array[dest_card++] = val;
  405|   130k|        else if (val <= run_end) {
  ------------------
  |  Branch (405:18): [True: 93.6k, False: 36.3k]
  ------------------
  406|  93.6k|            ;  // omitted item
  407|  93.6k|        } else {
  408|  50.1k|            do {
  409|  50.1k|                if (which_run + 1 < src_2->n_runs) {
  ------------------
  |  Branch (409:21): [True: 49.9k, False: 188]
  ------------------
  410|  49.9k|                    ++which_run;
  411|  49.9k|                    run_start = src_2->runs[which_run].value;
  412|  49.9k|                    run_end = run_start + src_2->runs[which_run].length;
  413|       |
  414|  49.9k|                } else
  415|    188|                    run_start = run_end = (1 << 16) + 1;
  416|  50.1k|            } while (val > run_end);
  ------------------
  |  Branch (416:22): [True: 13.7k, False: 36.3k]
  ------------------
  417|  36.3k|            --i;
  418|  36.3k|        }
  419|   173k|    }
  420|    801|    dst->cardinality = dest_card;
  421|    801|}
array_run_container_iandnot:
  428|     47|                                 const run_container_t *src_2) {
  429|     47|    array_run_container_andnot(src_1, src_2, src_1);
  430|     47|}
run_run_container_andnot:
  437|  1.18k|                             const run_container_t *src_2, container_t **dst) {
  438|  1.18k|    run_container_t *ans = run_container_create();
  439|  1.18k|    run_container_andnot(src_1, src_2, ans);
  440|  1.18k|    uint8_t typecode_after;
  441|  1.18k|    *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
  442|  1.18k|    return typecode_after;
  443|  1.18k|}
run_run_container_iandnot:
  453|    470|                              const run_container_t *src_2, container_t **dst) {
  454|       |    // following Java impl as of June 2016 (dummy)
  455|    470|    int ans = run_run_container_andnot(src_1, src_2, dst);
  456|    470|    run_container_free(src_1);
  457|    470|    return ans;
  458|    470|}
array_array_container_andnot:
  466|  2.40k|                                  array_container_t *dst) {
  467|  2.40k|    array_container_andnot(src_1, src_2, dst);
  468|  2.40k|}
array_array_container_iandnot:
  473|  3.57k|                                   const array_container_t *src_2) {
  474|  3.57k|    array_container_andnot(src_1, src_2, src_1);
  475|  3.57k|}
mixed_andnot.c:run_array_array_subtract:
  235|  1.32k|                                    array_container_t *a_out) {
  236|  1.32k|    int out_card = 0;
  237|  1.32k|    int32_t in_array_pos =
  238|  1.32k|        -1;  // since advanceUntil always assumes we start the search AFTER this
  239|       |
  240|  98.1k|    for (int rlepos = 0; rlepos < rc->n_runs; rlepos++) {
  ------------------
  |  Branch (240:26): [True: 96.7k, False: 1.32k]
  ------------------
  241|  96.7k|        int32_t start = rc->runs[rlepos].value;
  242|  96.7k|        int32_t end = start + rc->runs[rlepos].length + 1;
  243|       |
  244|  96.7k|        in_array_pos = advanceUntil(a_in->array, in_array_pos,
  245|  96.7k|                                    a_in->cardinality, (uint16_t)start);
  246|       |
  247|  96.7k|        if (in_array_pos >= a_in->cardinality) {  // run has no items subtracted
  ------------------
  |  Branch (247:13): [True: 5.67k, False: 91.1k]
  ------------------
  248|  66.7k|            for (int32_t i = start; i < end; ++i)
  ------------------
  |  Branch (248:37): [True: 61.1k, False: 5.67k]
  ------------------
  249|  61.1k|                a_out->array[out_card++] = (uint16_t)i;
  250|  91.1k|        } else {
  251|  91.1k|            uint16_t next_nonincluded = a_in->array[in_array_pos];
  252|  91.1k|            if (next_nonincluded >= end) {
  ------------------
  |  Branch (252:17): [True: 31.1k, False: 59.9k]
  ------------------
  253|       |                // another case when run goes unaltered
  254|  83.7k|                for (int32_t i = start; i < end; ++i)
  ------------------
  |  Branch (254:41): [True: 52.6k, False: 31.1k]
  ------------------
  255|  52.6k|                    a_out->array[out_card++] = (uint16_t)i;
  256|  31.1k|                in_array_pos--;  // ensure we see this item again if necessary
  257|  59.9k|            } else {
  258|   298k|                for (int32_t i = start; i < end; ++i)
  ------------------
  |  Branch (258:41): [True: 238k, False: 59.9k]
  ------------------
  259|   238k|                    if (i != next_nonincluded)
  ------------------
  |  Branch (259:25): [True: 104k, False: 134k]
  ------------------
  260|   104k|                        a_out->array[out_card++] = (uint16_t)i;
  261|   134k|                    else  // 0 should ensure  we don't match
  262|   134k|                        next_nonincluded =
  263|   134k|                            (in_array_pos + 1 >= a_in->cardinality)
  ------------------
  |  Branch (263:29): [True: 1.04k, False: 133k]
  ------------------
  264|   134k|                                ? 0
  265|   134k|                                : a_in->array[++in_array_pos];
  266|  59.9k|                in_array_pos--;  // see again
  267|  59.9k|            }
  268|  91.1k|        }
  269|  96.7k|    }
  270|  1.32k|    return out_card;
  271|  1.32k|}

array_container_equal_bitset:
   10|    433|                                  const bitset_container_t* container2) {
   11|    433|    if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
  ------------------
  |  Branch (11:9): [True: 433, False: 0]
  ------------------
   12|    433|        if (container2->cardinality != container1->cardinality) {
  ------------------
  |  Branch (12:13): [True: 433, False: 0]
  ------------------
   13|    433|            return false;
   14|    433|        }
   15|    433|    }
   16|      0|    int32_t pos = 0;
   17|      0|    for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
  ------------------
  |  Branch (17:25): [True: 0, False: 0]
  ------------------
   18|      0|        uint64_t w = container2->words[i];
   19|      0|        while (w != 0) {
  ------------------
  |  Branch (19:16): [True: 0, False: 0]
  ------------------
   20|      0|            uint64_t t = w & (~w + 1);
   21|      0|            uint16_t r = i * 64 + roaring_trailing_zeroes(w);
   22|      0|            if (pos >= container1->cardinality) {
  ------------------
  |  Branch (22:17): [True: 0, False: 0]
  ------------------
   23|      0|                return false;
   24|      0|            }
   25|      0|            if (container1->array[pos] != r) {
  ------------------
  |  Branch (25:17): [True: 0, False: 0]
  ------------------
   26|      0|                return false;
   27|      0|            }
   28|      0|            ++pos;
   29|      0|            w ^= t;
   30|      0|        }
   31|      0|    }
   32|      0|    return (pos == container1->cardinality);
   33|      0|}
run_container_equals_array:
   36|    891|                                const array_container_t* container2) {
   37|    891|    if (run_container_cardinality(container1) != container2->cardinality)
  ------------------
  |  Branch (37:9): [True: 846, False: 45]
  ------------------
   38|    846|        return false;
   39|     45|    int32_t pos = 0;
   40|     45|    for (int i = 0; i < container1->n_runs; ++i) {
  ------------------
  |  Branch (40:21): [True: 45, False: 0]
  ------------------
   41|     45|        const uint32_t run_start = container1->runs[i].value;
   42|     45|        const uint32_t le = container1->runs[i].length;
   43|       |
   44|     45|        if (container2->array[pos] != run_start) {
  ------------------
  |  Branch (44:13): [True: 45, False: 0]
  ------------------
   45|     45|            return false;
   46|     45|        }
   47|       |
   48|      0|        if (container2->array[pos + le] != run_start + le) {
  ------------------
  |  Branch (48:13): [True: 0, False: 0]
  ------------------
   49|      0|            return false;
   50|      0|        }
   51|       |
   52|      0|        pos += le + 1;
   53|      0|    }
   54|      0|    return true;
   55|     45|}

array_bitset_container_intersection:
   21|    652|                                         array_container_t *dst) {
   22|    652|    if (dst->capacity < src_1->cardinality) {
  ------------------
  |  Branch (22:9): [True: 652, False: 0]
  ------------------
   23|    652|        array_container_grow(dst, src_1->cardinality, false);
   24|    652|    }
   25|    652|    int32_t newcard = 0;  // dst could be src_1
   26|    652|    const int32_t origcard = src_1->cardinality;
   27|   282k|    for (int i = 0; i < origcard; ++i) {
  ------------------
  |  Branch (27:21): [True: 281k, False: 652]
  ------------------
   28|   281k|        uint16_t key = src_1->array[i];
   29|       |        // this branchless approach is much faster...
   30|   281k|        dst->array[newcard] = key;
   31|   281k|        newcard += bitset_container_contains(src_2, key);
   32|       |        /**
   33|       |         * we could do it this way instead...
   34|       |         * if (bitset_container_contains(src_2, key)) {
   35|       |         * dst->array[newcard++] = key;
   36|       |         * }
   37|       |         * but if the result is unpredictible, the processor generates
   38|       |         * many mispredicted branches.
   39|       |         * Difference can be huge (from 3 cycles when predictible all the way
   40|       |         * to 16 cycles when unpredictible.
   41|       |         * See
   42|       |         * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c
   43|       |         */
   44|   281k|    }
   45|    652|    dst->cardinality = newcard;
   46|    652|}
array_bitset_container_intersection_cardinality:
   50|  2.43k|    const array_container_t *src_1, const bitset_container_t *src_2) {
   51|  2.43k|    int32_t newcard = 0;
   52|  2.43k|    const int32_t origcard = src_1->cardinality;
   53|   412k|    for (int i = 0; i < origcard; ++i) {
  ------------------
  |  Branch (53:21): [True: 409k, False: 2.43k]
  ------------------
   54|   409k|        uint16_t key = src_1->array[i];
   55|   409k|        newcard += bitset_container_contains(src_2, key);
   56|   409k|    }
   57|  2.43k|    return newcard;
   58|  2.43k|}
array_bitset_container_intersect:
   61|    608|                                      const bitset_container_t *src_2) {
   62|    608|    const int32_t origcard = src_1->cardinality;
   63|  6.67k|    for (int i = 0; i < origcard; ++i) {
  ------------------
  |  Branch (63:21): [True: 6.64k, False: 31]
  ------------------
   64|  6.64k|        uint16_t key = src_1->array[i];
   65|  6.64k|        if (bitset_container_contains(src_2, key)) return true;
  ------------------
  |  Branch (65:13): [True: 577, False: 6.06k]
  ------------------
   66|  6.64k|    }
   67|     31|    return false;
   68|    608|}
array_run_container_intersection:
   75|  3.25k|                                      array_container_t *dst) {
   76|  3.25k|    if (run_container_is_full(src_2)) {
  ------------------
  |  Branch (76:9): [True: 30, False: 3.22k]
  ------------------
   77|     30|        if (dst != src_1) array_container_copy(src_1, dst);
  ------------------
  |  Branch (77:13): [True: 30, False: 0]
  ------------------
   78|     30|        return;
   79|     30|    }
   80|  3.22k|    if (dst->capacity < src_1->cardinality) {
  ------------------
  |  Branch (80:9): [True: 3.22k, False: 0]
  ------------------
   81|  3.22k|        array_container_grow(dst, src_1->cardinality, false);
   82|  3.22k|    }
   83|  3.22k|    if (src_2->n_runs == 0) {
  ------------------
  |  Branch (83:9): [True: 0, False: 3.22k]
  ------------------
   84|      0|        return;
   85|      0|    }
   86|  3.22k|    int32_t rlepos = 0;
   87|  3.22k|    int32_t arraypos = 0;
   88|  3.22k|    rle16_t rle = src_2->runs[rlepos];
   89|  3.22k|    int32_t newcard = 0;
   90|   449k|    while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (90:12): [True: 446k, False: 2.65k]
  ------------------
   91|   446k|        const uint16_t arrayval = src_1->array[arraypos];
   92|   595k|        while (rle.value + rle.length <
  ------------------
  |  Branch (92:16): [True: 149k, False: 446k]
  ------------------
   93|   595k|               arrayval) {  // this will frequently be false
   94|   149k|            ++rlepos;
   95|   149k|            if (rlepos == src_2->n_runs) {
  ------------------
  |  Branch (95:17): [True: 564, False: 148k]
  ------------------
   96|    564|                dst->cardinality = newcard;
   97|    564|                return;  // we are done
   98|    564|            }
   99|   148k|            rle = src_2->runs[rlepos];
  100|   148k|        }
  101|   446k|        if (rle.value > arrayval) {
  ------------------
  |  Branch (101:13): [True: 30.2k, False: 415k]
  ------------------
  102|  30.2k|            arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
  103|  30.2k|                                    rle.value);
  104|   415k|        } else {
  105|   415k|            dst->array[newcard] = arrayval;
  106|   415k|            newcard++;
  107|   415k|            arraypos++;
  108|   415k|        }
  109|   446k|    }
  110|  2.65k|    dst->cardinality = newcard;
  111|  2.65k|}
run_bitset_container_intersection:
  119|    650|                                       container_t **dst) {
  120|    650|    if (run_container_is_full(src_1)) {
  ------------------
  |  Branch (120:9): [True: 0, False: 650]
  ------------------
  121|      0|        if (*dst != src_2) *dst = bitset_container_clone(src_2);
  ------------------
  |  Branch (121:13): [True: 0, False: 0]
  ------------------
  122|      0|        return true;
  123|      0|    }
  124|    650|    int32_t card = run_container_cardinality(src_1);
  125|    650|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (125:9): [True: 650, False: 0]
  ------------------
  126|       |        // result can only be an array (assuming that we never make a
  127|       |        // RunContainer)
  128|    650|        if (card > src_2->cardinality) {
  ------------------
  |  Branch (128:13): [True: 0, False: 650]
  ------------------
  129|      0|            card = src_2->cardinality;
  130|      0|        }
  131|    650|        array_container_t *answer = array_container_create_given_capacity(card);
  132|    650|        *dst = answer;
  133|    650|        if (*dst == NULL) {
  ------------------
  |  Branch (133:13): [True: 0, False: 650]
  ------------------
  134|      0|            return false;
  135|      0|        }
  136|  54.9k|        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (136:34): [True: 54.2k, False: 650]
  ------------------
  137|  54.2k|            rle16_t rle = src_1->runs[rlepos];
  138|  54.2k|            uint32_t endofrun = (uint32_t)rle.value + rle.length;
  139|   194k|            for (uint32_t runValue = rle.value; runValue <= endofrun;
  ------------------
  |  Branch (139:49): [True: 140k, False: 54.2k]
  ------------------
  140|   140k|                 ++runValue) {
  141|   140k|                answer->array[answer->cardinality] = (uint16_t)runValue;
  142|   140k|                answer->cardinality +=
  143|   140k|                    bitset_container_contains(src_2, runValue);
  144|   140k|            }
  145|  54.2k|        }
  146|    650|        return false;
  147|    650|    }
  148|      0|    if (*dst == src_2) {  // we attempt in-place
  ------------------
  |  Branch (148:9): [True: 0, False: 0]
  ------------------
  149|      0|        bitset_container_t *answer = CAST_bitset(*dst);
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  150|      0|        uint32_t start = 0;
  151|      0|        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (151:34): [True: 0, False: 0]
  ------------------
  152|      0|            const rle16_t rle = src_1->runs[rlepos];
  153|      0|            uint32_t end = rle.value;
  154|      0|            bitset_reset_range(src_2->words, start, end);
  155|       |
  156|      0|            start = end + rle.length + 1;
  157|      0|        }
  158|      0|        bitset_reset_range(src_2->words, start, UINT32_C(1) << 16);
  159|      0|        answer->cardinality = bitset_container_compute_cardinality(answer);
  160|      0|        if (src_2->cardinality > DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (160:13): [True: 0, False: 0]
  ------------------
  161|      0|            return true;
  162|      0|        } else {
  163|      0|            array_container_t *newanswer = array_container_from_bitset(src_2);
  164|      0|            if (newanswer == NULL) {
  ------------------
  |  Branch (164:17): [True: 0, False: 0]
  ------------------
  165|      0|                *dst = NULL;
  166|      0|                return false;
  167|      0|            }
  168|      0|            *dst = newanswer;
  169|      0|            return false;
  170|      0|        }
  171|      0|    } else {  // no inplace
  172|       |        // we expect the answer to be a bitmap (if we are lucky)
  173|      0|        bitset_container_t *answer = bitset_container_clone(src_2);
  174|       |
  175|      0|        *dst = answer;
  176|      0|        if (answer == NULL) {
  ------------------
  |  Branch (176:13): [True: 0, False: 0]
  ------------------
  177|      0|            return true;
  178|      0|        }
  179|      0|        uint32_t start = 0;
  180|      0|        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (180:34): [True: 0, False: 0]
  ------------------
  181|      0|            const rle16_t rle = src_1->runs[rlepos];
  182|      0|            uint32_t end = rle.value;
  183|      0|            bitset_reset_range(answer->words, start, end);
  184|      0|            start = end + rle.length + 1;
  185|      0|        }
  186|      0|        bitset_reset_range(answer->words, start, UINT32_C(1) << 16);
  187|      0|        answer->cardinality = bitset_container_compute_cardinality(answer);
  188|       |
  189|      0|        if (answer->cardinality > DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (189:13): [True: 0, False: 0]
  ------------------
  190|      0|            return true;
  191|      0|        } else {
  192|      0|            array_container_t *newanswer = array_container_from_bitset(answer);
  193|      0|            bitset_container_free(CAST_bitset(*dst));
  ------------------
  |  |   52|      0|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  194|      0|            if (newanswer == NULL) {
  ------------------
  |  Branch (194:17): [True: 0, False: 0]
  ------------------
  195|      0|                *dst = NULL;
  196|      0|                return false;
  197|      0|            }
  198|      0|            *dst = newanswer;
  199|       |            return false;
  200|      0|        }
  201|      0|    }
  202|      0|}
array_run_container_intersection_cardinality:
  206|  8.07k|                                                 const run_container_t *src_2) {
  207|  8.07k|    if (run_container_is_full(src_2)) {
  ------------------
  |  Branch (207:9): [True: 120, False: 7.95k]
  ------------------
  208|    120|        return src_1->cardinality;
  209|    120|    }
  210|  7.95k|    if (src_2->n_runs == 0) {
  ------------------
  |  Branch (210:9): [True: 0, False: 7.95k]
  ------------------
  211|      0|        return 0;
  212|      0|    }
  213|  7.95k|    int32_t rlepos = 0;
  214|  7.95k|    int32_t arraypos = 0;
  215|  7.95k|    rle16_t rle = src_2->runs[rlepos];
  216|  7.95k|    int32_t newcard = 0;
  217|   786k|    while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (217:12): [True: 780k, False: 6.10k]
  ------------------
  218|   780k|        const uint16_t arrayval = src_1->array[arraypos];
  219|  1.16M|        while (rle.value + rle.length <
  ------------------
  |  Branch (219:16): [True: 385k, False: 778k]
  ------------------
  220|  1.16M|               arrayval) {  // this will frequently be false
  221|   385k|            ++rlepos;
  222|   385k|            if (rlepos == src_2->n_runs) {
  ------------------
  |  Branch (222:17): [True: 1.84k, False: 383k]
  ------------------
  223|  1.84k|                return newcard;  // we are done
  224|  1.84k|            }
  225|   383k|            rle = src_2->runs[rlepos];
  226|   383k|        }
  227|   778k|        if (rle.value > arrayval) {
  ------------------
  |  Branch (227:13): [True: 105k, False: 673k]
  ------------------
  228|   105k|            arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
  229|   105k|                                    rle.value);
  230|   673k|        } else {
  231|   673k|            newcard++;
  232|   673k|            arraypos++;
  233|   673k|        }
  234|   778k|    }
  235|  6.10k|    return newcard;
  236|  7.95k|}
run_bitset_container_intersection_cardinality:
  241|  2.60k|    const run_container_t *src_1, const bitset_container_t *src_2) {
  242|  2.60k|    if (run_container_is_full(src_1)) {
  ------------------
  |  Branch (242:9): [True: 0, False: 2.60k]
  ------------------
  243|      0|        return bitset_container_cardinality(src_2);
  244|      0|    }
  245|  2.60k|    int answer = 0;
  246|   219k|    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (246:30): [True: 217k, False: 2.60k]
  ------------------
  247|   217k|        rle16_t rle = src_1->runs[rlepos];
  248|   217k|        answer +=
  249|   217k|            bitset_lenrange_cardinality(src_2->words, rle.value, rle.length);
  250|   217k|    }
  251|  2.60k|    return answer;
  252|  2.60k|}
array_run_container_intersect:
  255|  2.01k|                                   const run_container_t *src_2) {
  256|  2.01k|    if (run_container_is_full(src_2)) {
  ------------------
  |  Branch (256:9): [True: 30, False: 1.98k]
  ------------------
  257|     30|        return !array_container_empty(src_1);
  258|     30|    }
  259|  1.98k|    if (src_2->n_runs == 0) {
  ------------------
  |  Branch (259:9): [True: 0, False: 1.98k]
  ------------------
  260|      0|        return false;
  261|      0|    }
  262|  1.98k|    int32_t rlepos = 0;
  263|  1.98k|    int32_t arraypos = 0;
  264|  1.98k|    rle16_t rle = src_2->runs[rlepos];
  265|  3.43k|    while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (265:12): [True: 3.38k, False: 49]
  ------------------
  266|  3.38k|        const uint16_t arrayval = src_1->array[arraypos];
  267|  8.29k|        while (rle.value + rle.length <
  ------------------
  |  Branch (267:16): [True: 4.95k, False: 3.33k]
  ------------------
  268|  8.29k|               arrayval) {  // this will frequently be false
  269|  4.95k|            ++rlepos;
  270|  4.95k|            if (rlepos == src_2->n_runs) {
  ------------------
  |  Branch (270:17): [True: 45, False: 4.91k]
  ------------------
  271|     45|                return false;  // we are done
  272|     45|            }
  273|  4.91k|            rle = src_2->runs[rlepos];
  274|  4.91k|        }
  275|  3.33k|        if (rle.value > arrayval) {
  ------------------
  |  Branch (275:13): [True: 1.44k, False: 1.89k]
  ------------------
  276|  1.44k|            arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
  277|  1.44k|                                    rle.value);
  278|  1.89k|        } else {
  279|  1.89k|            return true;
  280|  1.89k|        }
  281|  3.33k|    }
  282|     49|    return false;
  283|  1.98k|}
run_bitset_container_intersect:
  288|    650|                                    const bitset_container_t *src_2) {
  289|    650|    if (run_container_is_full(src_1)) {
  ------------------
  |  Branch (289:9): [True: 0, False: 650]
  ------------------
  290|      0|        return !bitset_container_empty(src_2);
  291|      0|    }
  292|  4.54k|    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (292:30): [True: 4.53k, False: 16]
  ------------------
  293|  4.53k|        rle16_t rle = src_1->runs[rlepos];
  294|  4.53k|        if (!bitset_lenrange_empty(src_2->words, rle.value, rle.length))
  ------------------
  |  Branch (294:13): [True: 634, False: 3.89k]
  ------------------
  295|    634|            return true;
  296|  4.53k|    }
  297|     16|    return false;
  298|    650|}
bitset_bitset_container_intersection_inplace:
  329|  1.50k|    container_t **dst) {
  330|  1.50k|    const int newCardinality = bitset_container_and_justcard(src_1, src_2);
  331|  1.50k|    if (newCardinality > DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (331:9): [True: 1.50k, False: 0]
  ------------------
  332|  1.50k|        *dst = src_1;
  333|  1.50k|        bitset_container_and_nocard(src_1, src_2, src_1);
  334|  1.50k|        CAST_bitset(*dst)->cardinality = newCardinality;
  ------------------
  |  |   52|  1.50k|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  1.50k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  335|  1.50k|        return true;  // it is a bitset
  336|  1.50k|    }
  337|      0|    *dst = array_container_create_given_capacity(newCardinality);
  338|      0|    if (*dst != NULL) {
  ------------------
  |  Branch (338:9): [True: 0, False: 0]
  ------------------
  339|      0|        CAST_array(*dst)->cardinality = newCardinality;
  ------------------
  |  |   54|      0|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  340|      0|        bitset_extract_intersection_setbits_uint16(
  341|      0|            src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS,
  342|      0|            CAST_array(*dst)->array, 0);
  ------------------
  |  |   54|      0|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  343|      0|    }
  344|       |    return false;  // not a bitset
  345|  1.50k|}

array_container_negation:
   33|    165|                              bitset_container_t *dst) {
   34|    165|    uint64_t card = UINT64_C(1 << 16);
   35|    165|    bitset_container_set_all(dst);
   36|       |
   37|    165|    if (src->cardinality == 0) {
  ------------------
  |  Branch (37:9): [True: 0, False: 165]
  ------------------
   38|      0|        return;
   39|      0|    }
   40|       |
   41|    165|    dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array,
   42|    165|                                                  (uint64_t)src->cardinality);
   43|    165|}
bitset_container_negation_inplace:
   67|     66|                                       container_t **dst) {
   68|     66|    return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
   69|     66|}
run_container_negation_inplace:
   88|  5.38k|int run_container_negation_inplace(run_container_t *src, container_t **dst) {
   89|  5.38k|    return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
   90|  5.38k|}
array_container_negation_range:
   99|  3.58k|                                    container_t **dst) {
  100|       |    /* close port of the Java implementation */
  101|  3.58k|    if (range_start >= range_end) {
  ------------------
  |  Branch (101:9): [True: 0, False: 3.58k]
  ------------------
  102|      0|        *dst = array_container_clone(src);
  103|      0|        return false;
  104|      0|    }
  105|       |
  106|  3.58k|    int32_t start_index =
  107|  3.58k|        binarySearch(src->array, src->cardinality, (uint16_t)range_start);
  108|  3.58k|    if (start_index < 0) start_index = -start_index - 1;
  ------------------
  |  Branch (108:9): [True: 3.26k, False: 318]
  ------------------
  109|       |
  110|  3.58k|    int32_t last_index =
  111|  3.58k|        binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
  112|  3.58k|    if (last_index < 0) last_index = -last_index - 2;
  ------------------
  |  Branch (112:9): [True: 3.32k, False: 254]
  ------------------
  113|       |
  114|  3.58k|    const int32_t current_values_in_range = last_index - start_index + 1;
  115|  3.58k|    const int32_t span_to_be_flipped = range_end - range_start;
  116|  3.58k|    const int32_t new_values_in_range =
  117|  3.58k|        span_to_be_flipped - current_values_in_range;
  118|  3.58k|    const int32_t cardinality_change =
  119|  3.58k|        new_values_in_range - current_values_in_range;
  120|  3.58k|    const int32_t new_cardinality = src->cardinality + cardinality_change;
  121|       |
  122|  3.58k|    if (new_cardinality > DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (122:9): [True: 132, False: 3.44k]
  ------------------
  123|    132|        bitset_container_t *temp = bitset_container_from_array(src);
  124|    132|        bitset_flip_range(temp->words, (uint32_t)range_start,
  125|    132|                          (uint32_t)range_end);
  126|    132|        temp->cardinality = new_cardinality;
  127|    132|        *dst = temp;
  128|    132|        return true;
  129|    132|    }
  130|       |
  131|  3.44k|    array_container_t *arr =
  132|  3.44k|        array_container_create_given_capacity(new_cardinality);
  133|  3.44k|    *dst = (container_t *)arr;
  134|  3.44k|    if (new_cardinality == 0) {
  ------------------
  |  Branch (134:9): [True: 13, False: 3.43k]
  ------------------
  135|     13|        arr->cardinality = new_cardinality;
  136|     13|        return false;  // we are done.
  137|     13|    }
  138|       |    // copy stuff before the active area
  139|  3.43k|    memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
  140|       |
  141|       |    // work on the range
  142|  3.43k|    int32_t out_pos = start_index, in_pos = start_index;
  143|  3.43k|    int32_t val_in_range = range_start;
  144|  39.5k|    for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
  ------------------
  |  Branch (144:12): [True: 39.3k, False: 240]
  |  Branch (144:40): [True: 36.1k, False: 3.19k]
  ------------------
  145|  36.1k|        if ((uint16_t)val_in_range != src->array[in_pos]) {
  ------------------
  |  Branch (145:13): [True: 24.4k, False: 11.6k]
  ------------------
  146|  24.4k|            arr->array[out_pos++] = (uint16_t)val_in_range;
  147|  24.4k|        } else {
  148|  11.6k|            ++in_pos;
  149|  11.6k|        }
  150|  36.1k|    }
  151|  41.3k|    for (; val_in_range < range_end; ++val_in_range)
  ------------------
  |  Branch (151:12): [True: 37.9k, False: 3.43k]
  ------------------
  152|  37.9k|        arr->array[out_pos++] = (uint16_t)val_in_range;
  153|       |
  154|       |    // content after the active range
  155|  3.43k|    memcpy(arr->array + out_pos, src->array + (last_index + 1),
  156|  3.43k|           (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
  157|  3.43k|    arr->cardinality = new_cardinality;
  158|       |    return false;
  159|  3.44k|}
array_container_negation_range_inplace:
  168|  3.58k|                                            container_t **dst) {
  169|  3.58k|    bool ans = array_container_negation_range(src, range_start, range_end, dst);
  170|       |    // TODO : try a real inplace version
  171|  3.58k|    array_container_free(src);
  172|  3.58k|    return ans;
  173|  3.58k|}
bitset_container_negation_range_inplace:
  217|  1.41k|                                             container_t **dst) {
  218|  1.41k|    bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
  219|  1.41k|    src->cardinality = bitset_container_compute_cardinality(src);
  220|  1.41k|    if (src->cardinality > DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (220:9): [True: 1.35k, False: 63]
  ------------------
  221|  1.35k|        *dst = src;
  222|  1.35k|        return true;
  223|  1.35k|    }
  224|     63|    *dst = array_container_from_bitset(src);
  225|     63|    bitset_container_free(src);
  226|       |    return false;
  227|  1.41k|}
run_container_negation_range:
  237|    177|                                 container_t **dst) {
  238|    177|    uint8_t return_typecode;
  239|       |
  240|       |    // follows the Java implementation
  241|    177|    if (range_end <= range_start) {
  ------------------
  |  Branch (241:9): [True: 0, False: 177]
  ------------------
  242|      0|        *dst = run_container_clone(src);
  243|      0|        return RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  244|      0|    }
  245|       |
  246|    177|    run_container_t *ans = run_container_create_given_capacity(
  247|    177|        src->n_runs + 1);  // src->n_runs + 1);
  248|    177|    int k = 0;
  249|  3.04k|    for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
  ------------------
  |  Branch (249:12): [True: 2.99k, False: 52]
  |  Branch (249:31): [True: 2.86k, False: 125]
  ------------------
  250|  2.86k|        ans->runs[k] = src->runs[k];
  251|  2.86k|        ans->n_runs++;
  252|  2.86k|    }
  253|       |
  254|    177|    run_container_smart_append_exclusive(
  255|    177|        ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
  256|       |
  257|  4.21k|    for (; k < src->n_runs; ++k) {
  ------------------
  |  Branch (257:12): [True: 4.04k, False: 177]
  ------------------
  258|  4.04k|        run_container_smart_append_exclusive(ans, src->runs[k].value,
  259|  4.04k|                                             src->runs[k].length);
  260|  4.04k|    }
  261|       |
  262|    177|    *dst = convert_run_to_efficient_container(ans, &return_typecode);
  263|    177|    if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
  ------------------
  |  |   50|    177|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (263:9): [True: 6, False: 171]
  ------------------
  264|       |
  265|    177|    return return_typecode;
  266|    177|}
run_container_negation_range_inplace:
  278|  7.12k|                                         container_t **dst) {
  279|  7.12k|    uint8_t return_typecode;
  280|       |
  281|  7.12k|    if (range_end <= range_start) {
  ------------------
  |  Branch (281:9): [True: 0, False: 7.12k]
  ------------------
  282|      0|        *dst = src;
  283|      0|        return RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  284|      0|    }
  285|       |
  286|       |    // TODO: efficient special case when range is 0 to 65535 inclusive
  287|       |
  288|  7.12k|    if (src->capacity == src->n_runs) {
  ------------------
  |  Branch (288:9): [True: 5.95k, False: 1.16k]
  ------------------
  289|       |        // no excess room.  More checking to see if result can fit
  290|  5.95k|        bool last_val_before_range = false;
  291|  5.95k|        bool first_val_in_range = false;
  292|  5.95k|        bool last_val_in_range = false;
  293|  5.95k|        bool first_val_past_range = false;
  294|       |
  295|  5.95k|        if (range_start > 0)
  ------------------
  |  Branch (295:13): [True: 186, False: 5.77k]
  ------------------
  296|    186|            last_val_before_range =
  297|    186|                run_container_contains(src, (uint16_t)(range_start - 1));
  298|  5.95k|        first_val_in_range = run_container_contains(src, (uint16_t)range_start);
  299|       |
  300|  5.95k|        if (last_val_before_range == first_val_in_range) {
  ------------------
  |  Branch (300:13): [True: 506, False: 5.45k]
  ------------------
  301|    506|            last_val_in_range =
  302|    506|                run_container_contains(src, (uint16_t)(range_end - 1));
  303|    506|            if (range_end != 0x10000)
  ------------------
  |  Branch (303:17): [True: 332, False: 174]
  ------------------
  304|    332|                first_val_past_range =
  305|    332|                    run_container_contains(src, (uint16_t)range_end);
  306|       |
  307|    506|            if (last_val_in_range ==
  ------------------
  |  Branch (307:17): [True: 177, False: 329]
  ------------------
  308|    506|                first_val_past_range) {  // no space for inplace
  309|    177|                int ans = run_container_negation_range(src, range_start,
  310|    177|                                                       range_end, dst);
  311|    177|                run_container_free(src);
  312|    177|                return ans;
  313|    177|            }
  314|    506|        }
  315|  5.95k|    }
  316|       |    // all other cases: result will fit
  317|       |
  318|  6.95k|    run_container_t *ans = src;
  319|  6.95k|    int my_nbr_runs = src->n_runs;
  320|       |
  321|  6.95k|    ans->n_runs = 0;
  322|  6.95k|    int k = 0;
  323|  8.96k|    for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
  ------------------
  |  Branch (323:12): [True: 8.88k, False: 82]
  |  Branch (323:33): [True: 2.01k, False: 6.86k]
  ------------------
  324|       |        // ans->runs[k] = src->runs[k]; (would be self-copy)
  325|  2.01k|        ans->n_runs++;
  326|  2.01k|    }
  327|       |
  328|       |    // as with Java implementation, use locals to give self a buffer of depth 1
  329|  6.95k|    rle16_t buffered = CROARING_MAKE_RLE16(0, 0);
  ------------------
  |  |   60|  6.95k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  330|  6.95k|    rle16_t next = buffered;
  331|  6.95k|    if (k < my_nbr_runs) buffered = src->runs[k];
  ------------------
  |  Branch (331:9): [True: 6.86k, False: 82]
  ------------------
  332|       |
  333|  6.95k|    run_container_smart_append_exclusive(
  334|  6.95k|        ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
  335|       |
  336|  98.4k|    for (; k < my_nbr_runs; ++k) {
  ------------------
  |  Branch (336:12): [True: 91.4k, False: 6.95k]
  ------------------
  337|  91.4k|        if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
  ------------------
  |  Branch (337:13): [True: 84.6k, False: 6.86k]
  ------------------
  338|       |
  339|  91.4k|        run_container_smart_append_exclusive(ans, buffered.value,
  340|  91.4k|                                             buffered.length);
  341|  91.4k|        buffered = next;
  342|  91.4k|    }
  343|       |
  344|  6.95k|    *dst = convert_run_to_efficient_container(ans, &return_typecode);
  345|  6.95k|    if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
  ------------------
  |  |   50|  6.95k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (345:9): [True: 5.26k, False: 1.68k]
  ------------------
  346|       |
  347|  6.95k|    return return_typecode;
  348|  7.12k|}

array_container_is_subset_bitset:
   11|  2.43k|                                      const bitset_container_t* container2) {
   12|  2.43k|    if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
  ------------------
  |  Branch (12:9): [True: 2.43k, False: 0]
  ------------------
   13|  2.43k|        if (container2->cardinality < container1->cardinality) {
  ------------------
  |  Branch (13:13): [True: 0, False: 2.43k]
  ------------------
   14|      0|            return false;
   15|      0|        }
   16|  2.43k|    }
   17|  2.43k|    for (int i = 0; i < container1->cardinality; ++i) {
  ------------------
  |  Branch (17:21): [True: 2.43k, False: 0]
  ------------------
   18|  2.43k|        if (!bitset_container_contains(container2, container1->array[i])) {
  ------------------
  |  Branch (18:13): [True: 2.43k, False: 0]
  ------------------
   19|  2.43k|            return false;
   20|  2.43k|        }
   21|  2.43k|    }
   22|      0|    return true;
   23|  2.43k|}
run_container_is_subset_array:
   26|    380|                                   const array_container_t* container2) {
   27|    380|    if (run_container_cardinality(container1) > container2->cardinality)
  ------------------
  |  Branch (27:9): [True: 181, False: 199]
  ------------------
   28|    181|        return false;
   29|    199|    int32_t start_pos = -1, stop_pos = -1;
   30|    199|    for (int i = 0; i < container1->n_runs; ++i) {
  ------------------
  |  Branch (30:21): [True: 199, False: 0]
  ------------------
   31|    199|        int32_t start = container1->runs[i].value;
   32|    199|        int32_t stop = start + container1->runs[i].length;
   33|    199|        start_pos = advanceUntil(container2->array, stop_pos,
   34|    199|                                 container2->cardinality, start);
   35|    199|        stop_pos = advanceUntil(container2->array, stop_pos,
   36|    199|                                container2->cardinality, stop);
   37|    199|        if (stop_pos == container2->cardinality) {
  ------------------
  |  Branch (37:13): [True: 5, False: 194]
  ------------------
   38|      5|            return false;
   39|    194|        } else if (stop_pos - start_pos != stop - start ||
  ------------------
  |  Branch (39:20): [True: 72, False: 122]
  ------------------
   40|    122|                   container2->array[start_pos] != start ||
  ------------------
  |  Branch (40:20): [True: 122, False: 0]
  ------------------
   41|    194|                   container2->array[stop_pos] != stop) {
  ------------------
  |  Branch (41:20): [True: 0, False: 0]
  ------------------
   42|    194|            return false;
   43|    194|        }
   44|    199|    }
   45|      0|    return true;
   46|    199|}
array_container_is_subset_run:
   49|  2.31k|                                   const run_container_t* container2) {
   50|  2.31k|    if (container1->cardinality > run_container_cardinality(container2))
  ------------------
  |  Branch (50:9): [True: 161, False: 2.15k]
  ------------------
   51|    161|        return false;
   52|  2.15k|    int i_array = 0, i_run = 0;
   53|  6.93k|    while (i_array < container1->cardinality && i_run < container2->n_runs) {
  ------------------
  |  Branch (53:12): [True: 6.93k, False: 0]
  |  Branch (53:49): [True: 6.91k, False: 19]
  ------------------
   54|  6.91k|        uint32_t start = container2->runs[i_run].value;
   55|  6.91k|        uint32_t stop = start + container2->runs[i_run].length;
   56|  6.91k|        if (container1->array[i_array] < start) {
  ------------------
  |  Branch (56:13): [True: 2.13k, False: 4.77k]
  ------------------
   57|  2.13k|            return false;
   58|  4.77k|        } else if (container1->array[i_array] > stop) {
  ------------------
  |  Branch (58:20): [True: 4.77k, False: 0]
  ------------------
   59|  4.77k|            i_run++;
   60|  4.77k|        } else {  // the value of the array is in the run
   61|      0|            i_array++;
   62|      0|        }
   63|  6.91k|    }
   64|     19|    if (i_array == container1->cardinality) {
  ------------------
  |  Branch (64:9): [True: 0, False: 19]
  ------------------
   65|      0|        return true;
   66|     19|    } else {
   67|       |        return false;
   68|     19|    }
   69|     19|}

array_bitset_container_union:
   24|  1.26k|                                  bitset_container_t *dst) {
   25|  1.26k|    if (src_2 != dst) bitset_container_copy(src_2, dst);
  ------------------
  |  Branch (25:9): [True: 1.26k, False: 0]
  ------------------
   26|  1.26k|    dst->cardinality = (int32_t)bitset_set_list_withcard(
   27|  1.26k|        dst->words, dst->cardinality, src_1->array, src_1->cardinality);
   28|  1.26k|}
run_bitset_container_union:
   43|  1.30k|                                bitset_container_t *dst) {
   44|  1.30k|    assert(!run_container_is_full(src_1));  // catch this case upstream
   45|  1.30k|    if (src_2 != dst) bitset_container_copy(src_2, dst);
  ------------------
  |  Branch (45:9): [True: 1.30k, False: 0]
  ------------------
   46|   109k|    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (46:30): [True: 108k, False: 1.30k]
  ------------------
   47|   108k|        rle16_t rle = src_1->runs[rlepos];
   48|   108k|        bitset_set_lenrange(dst->words, rle.value, rle.length);
   49|   108k|    }
   50|  1.30k|    dst->cardinality = bitset_container_compute_cardinality(dst);
   51|  1.30k|}
array_run_container_union:
   68|  2.65k|                               run_container_t *dst) {
   69|  2.65k|    if (run_container_is_full(src_2)) {
  ------------------
  |  Branch (69:9): [True: 97, False: 2.55k]
  ------------------
   70|     97|        run_container_copy(src_2, dst);
   71|     97|        return;
   72|     97|    }
   73|       |    // TODO: see whether the "2*" is spurious
   74|  2.55k|    run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false);
   75|  2.55k|    int32_t rlepos = 0;
   76|  2.55k|    int32_t arraypos = 0;
   77|  2.55k|    rle16_t previousrle;
   78|  2.55k|    if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
  ------------------
  |  Branch (78:9): [True: 1.94k, False: 609]
  ------------------
   79|  1.94k|        previousrle = run_container_append_first(dst, src_2->runs[rlepos]);
   80|  1.94k|        rlepos++;
   81|  1.94k|    } else {
   82|    609|        previousrle =
   83|    609|            run_container_append_value_first(dst, src_1->array[arraypos]);
   84|    609|        arraypos++;
   85|    609|    }
   86|   460k|    while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
  ------------------
  |  Branch (86:12): [True: 459k, False: 1.62k]
  |  Branch (86:40): [True: 458k, False: 933]
  ------------------
   87|   458k|        if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
  ------------------
  |  Branch (87:13): [True: 149k, False: 309k]
  ------------------
   88|   149k|            run_container_append(dst, src_2->runs[rlepos], &previousrle);
   89|   149k|            rlepos++;
   90|   309k|        } else {
   91|   309k|            run_container_append_value(dst, src_1->array[arraypos],
   92|   309k|                                       &previousrle);
   93|   309k|            arraypos++;
   94|   309k|        }
   95|   458k|    }
   96|  2.55k|    if (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (96:9): [True: 1.62k, False: 933]
  ------------------
   97|   172k|        while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (97:16): [True: 170k, False: 1.62k]
  ------------------
   98|   170k|            run_container_append_value(dst, src_1->array[arraypos],
   99|   170k|                                       &previousrle);
  100|   170k|            arraypos++;
  101|   170k|        }
  102|  1.62k|    } else {
  103|  6.86k|        while (rlepos < src_2->n_runs) {
  ------------------
  |  Branch (103:16): [True: 5.93k, False: 933]
  ------------------
  104|  5.93k|            run_container_append(dst, src_2->runs[rlepos], &previousrle);
  105|  5.93k|            rlepos++;
  106|  5.93k|        }
  107|    933|    }
  108|  2.55k|}
array_run_container_inplace_union:
  111|    244|                                       run_container_t *src_2) {
  112|    244|    if (run_container_is_full(src_2)) {
  ------------------
  |  Branch (112:9): [True: 0, False: 244]
  ------------------
  113|      0|        return;
  114|      0|    }
  115|    244|    const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
  116|    244|    const int32_t neededcapacity = maxoutput + src_2->n_runs;
  117|    244|    if (src_2->capacity < neededcapacity)
  ------------------
  |  Branch (117:9): [True: 244, False: 0]
  ------------------
  118|    244|        run_container_grow(src_2, neededcapacity, true);
  119|    244|    memmove(src_2->runs + maxoutput, src_2->runs,
  120|    244|            src_2->n_runs * sizeof(rle16_t));
  121|    244|    rle16_t *inputsrc2 = src_2->runs + maxoutput;
  122|    244|    int32_t rlepos = 0;
  123|    244|    int32_t arraypos = 0;
  124|    244|    int src2nruns = src_2->n_runs;
  125|    244|    src_2->n_runs = 0;
  126|       |
  127|    244|    rle16_t previousrle;
  128|       |
  129|    244|    if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
  ------------------
  |  Branch (129:9): [True: 207, False: 37]
  ------------------
  130|    207|        previousrle = run_container_append_first(src_2, inputsrc2[rlepos]);
  131|    207|        rlepos++;
  132|    207|    } else {
  133|     37|        previousrle =
  134|     37|            run_container_append_value_first(src_2, src_1->array[arraypos]);
  135|     37|        arraypos++;
  136|     37|    }
  137|       |
  138|  52.0k|    while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
  ------------------
  |  Branch (138:12): [True: 51.8k, False: 244]
  |  Branch (138:36): [True: 51.8k, False: 0]
  ------------------
  139|  51.8k|        if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
  ------------------
  |  Branch (139:13): [True: 13.2k, False: 38.5k]
  ------------------
  140|  13.2k|            run_container_append(src_2, inputsrc2[rlepos], &previousrle);
  141|  13.2k|            rlepos++;
  142|  38.5k|        } else {
  143|  38.5k|            run_container_append_value(src_2, src_1->array[arraypos],
  144|  38.5k|                                       &previousrle);
  145|  38.5k|            arraypos++;
  146|  38.5k|        }
  147|  51.8k|    }
  148|    244|    if (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (148:9): [True: 244, False: 0]
  ------------------
  149|  1.98k|        while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (149:16): [True: 1.73k, False: 244]
  ------------------
  150|  1.73k|            run_container_append_value(src_2, src_1->array[arraypos],
  151|  1.73k|                                       &previousrle);
  152|  1.73k|            arraypos++;
  153|  1.73k|        }
  154|    244|    } else {
  155|      0|        while (rlepos < src2nruns) {
  ------------------
  |  Branch (155:16): [True: 0, False: 0]
  ------------------
  156|      0|            run_container_append(src_2, inputsrc2[rlepos], &previousrle);
  157|      0|            rlepos++;
  158|      0|        }
  159|      0|    }
  160|    244|}
array_array_container_union:
  164|  2.40k|                                 container_t **dst) {
  165|  2.40k|    int totalCardinality = src_1->cardinality + src_2->cardinality;
  166|  2.40k|    if (totalCardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (166:9): [True: 2.28k, False: 112]
  ------------------
  167|  2.28k|        *dst = array_container_create_given_capacity(totalCardinality);
  168|  2.28k|        if (*dst != NULL) {
  ------------------
  |  Branch (168:13): [True: 2.28k, False: 0]
  ------------------
  169|  2.28k|            array_container_union(src_1, src_2, CAST_array(*dst));
  ------------------
  |  |   54|  2.28k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.28k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  170|  2.28k|        } else {
  171|      0|            return true;  // otherwise failure won't be caught
  172|      0|        }
  173|  2.28k|        return false;  // not a bitset
  174|  2.28k|    }
  175|    112|    *dst = bitset_container_create();
  176|    112|    bool returnval = true;  // expect a bitset
  177|    112|    if (*dst != NULL) {
  ------------------
  |  Branch (177:9): [True: 112, False: 0]
  ------------------
  178|    112|        bitset_container_t *ourbitset = CAST_bitset(*dst);
  ------------------
  |  |   52|    112|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    112|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  179|    112|        bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
  180|    112|        ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
  181|    112|            ourbitset->words, src_1->cardinality, src_2->array,
  182|    112|            src_2->cardinality);
  183|    112|        if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (183:13): [True: 68, False: 44]
  ------------------
  184|       |            // need to convert!
  185|     68|            *dst = array_container_from_bitset(ourbitset);
  186|     68|            bitset_container_free(ourbitset);
  187|       |            returnval = false;  // not going to be a bitset
  188|     68|        }
  189|    112|    }
  190|    112|    return returnval;
  191|  2.40k|}
array_array_container_inplace_union:
  195|  2.58k|                                         container_t **dst) {
  196|  2.58k|    int totalCardinality = src_1->cardinality + src_2->cardinality;
  197|  2.58k|    *dst = NULL;
  198|  2.58k|    if (totalCardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (198:9): [True: 2.50k, False: 84]
  ------------------
  199|  2.50k|        if (src_1->capacity < totalCardinality) {
  ------------------
  |  Branch (199:13): [True: 2.50k, False: 0]
  ------------------
  200|  2.50k|            *dst = array_container_create_given_capacity(
  201|  2.50k|                2 * totalCardinality);  // be purposefully generous
  202|  2.50k|            if (*dst != NULL) {
  ------------------
  |  Branch (202:17): [True: 2.50k, False: 0]
  ------------------
  203|  2.50k|                array_container_union(src_1, src_2, CAST_array(*dst));
  ------------------
  |  |   54|  2.50k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  2.50k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  204|  2.50k|            } else {
  205|      0|                return true;  // otherwise failure won't be caught
  206|      0|            }
  207|  2.50k|            return false;  // not a bitset
  208|  2.50k|        } else {
  209|      0|            memmove(src_1->array + src_2->cardinality, src_1->array,
  210|      0|                    src_1->cardinality * sizeof(uint16_t));
  211|       |            // In theory, we could use fast_union_uint16, but it is unsafe. It
  212|       |            // fails with Intel compilers in particular.
  213|       |            // https://github.com/RoaringBitmap/CRoaring/pull/452
  214|       |            // See report https://github.com/RoaringBitmap/CRoaring/issues/476
  215|      0|            src_1->cardinality = (int32_t)union_uint16(
  216|      0|                src_1->array + src_2->cardinality, src_1->cardinality,
  217|      0|                src_2->array, src_2->cardinality, src_1->array);
  218|      0|            return false;  // not a bitset
  219|      0|        }
  220|  2.50k|    }
  221|     84|    *dst = bitset_container_create();
  222|     84|    bool returnval = true;  // expect a bitset
  223|     84|    if (*dst != NULL) {
  ------------------
  |  Branch (223:9): [True: 84, False: 0]
  ------------------
  224|     84|        bitset_container_t *ourbitset = CAST_bitset(*dst);
  ------------------
  |  |   52|     84|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|     84|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  225|     84|        bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality);
  226|     84|        ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
  227|     84|            ourbitset->words, src_1->cardinality, src_2->array,
  228|     84|            src_2->cardinality);
  229|     84|        if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (229:13): [True: 84, False: 0]
  ------------------
  230|       |            // need to convert!
  231|     84|            if (src_1->capacity < ourbitset->cardinality) {
  ------------------
  |  Branch (231:17): [True: 84, False: 0]
  ------------------
  232|     84|                array_container_grow(src_1, ourbitset->cardinality, false);
  233|     84|            }
  234|       |
  235|     84|            bitset_extract_setbits_uint16(ourbitset->words,
  236|     84|                                          BITSET_CONTAINER_SIZE_IN_WORDS,
  237|     84|                                          src_1->array, 0);
  238|     84|            src_1->cardinality = ourbitset->cardinality;
  239|     84|            *dst = src_1;
  240|     84|            bitset_container_free(ourbitset);
  241|       |            returnval = false;  // not going to be a bitset
  242|     84|        }
  243|     84|    }
  244|     84|    return returnval;
  245|  2.58k|}

array_bitset_container_xor:
   25|    700|                                container_t **dst) {
   26|    700|    bitset_container_t *result = bitset_container_create();
   27|    700|    bitset_container_copy(src_2, result);
   28|    700|    result->cardinality = (int32_t)bitset_flip_list_withcard(
   29|    700|        result->words, result->cardinality, src_1->array, src_1->cardinality);
   30|       |
   31|       |    // do required type conversions.
   32|    700|    if (result->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (32:9): [True: 140, False: 560]
  ------------------
   33|    140|        *dst = array_container_from_bitset(result);
   34|    140|        bitset_container_free(result);
   35|    140|        return false;  // not bitset
   36|    140|    }
   37|    560|    *dst = result;
   38|       |    return true;  // bitset
   39|    700|}
run_bitset_container_xor:
   63|    731|                              container_t **dst) {
   64|    731|    bitset_container_t *result = bitset_container_create();
   65|       |
   66|    731|    bitset_container_copy(src_2, result);
   67|  58.7k|    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
  ------------------
  |  Branch (67:30): [True: 58.0k, False: 731]
  ------------------
   68|  58.0k|        rle16_t rle = src_1->runs[rlepos];
   69|  58.0k|        bitset_flip_range(result->words, rle.value,
   70|  58.0k|                          rle.value + rle.length + UINT32_C(1));
   71|  58.0k|    }
   72|    731|    result->cardinality = bitset_container_compute_cardinality(result);
   73|       |
   74|    731|    if (result->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (74:9): [True: 139, False: 592]
  ------------------
   75|    139|        *dst = array_container_from_bitset(result);
   76|    139|        bitset_container_free(result);
   77|    139|        return false;  // not bitset
   78|    139|    }
   79|    592|    *dst = result;
   80|       |    return true;  // bitset
   81|    731|}
array_run_container_xor:
  105|  2.34k|                            const run_container_t *src_2, container_t **dst) {
  106|       |    // semi following Java XOR implementation as of May 2016
  107|       |    // the C OR implementation works quite differently and can return a run
  108|       |    // container
  109|       |    // TODO could optimize for full run containers.
  110|       |
  111|       |    // use of lazy following Java impl.
  112|  2.34k|    const int arbitrary_threshold = 32;
  113|  2.34k|    if (src_1->cardinality < arbitrary_threshold) {
  ------------------
  |  Branch (113:9): [True: 931, False: 1.41k]
  ------------------
  114|    931|        run_container_t *ans = run_container_create();
  115|    931|        array_run_container_lazy_xor(src_1, src_2, ans);  // keeps runs.
  116|    931|        uint8_t typecode_after;
  117|    931|        *dst =
  118|    931|            convert_run_to_efficient_container_and_free(ans, &typecode_after);
  119|    931|        return typecode_after;
  120|    931|    }
  121|       |
  122|  1.41k|    int card = run_container_cardinality(src_2);
  123|  1.41k|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (123:9): [True: 1.17k, False: 241]
  ------------------
  124|       |        // Java implementation works with the array, xoring the run elements via
  125|       |        // iterator
  126|  1.17k|        array_container_t *temp = array_container_from_run(src_2);
  127|  1.17k|        bool ret_is_bitset = array_array_container_xor(temp, src_1, dst);
  128|  1.17k|        array_container_free(temp);
  129|  1.17k|        return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   48|      5|#define BITSET_CONTAINER_TYPE 1
  ------------------
                      return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  1.16k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (129:16): [True: 5, False: 1.16k]
  ------------------
  130|       |
  131|  1.17k|    } else {  // guess that it will end up as a bitset
  132|    241|        bitset_container_t *result = bitset_container_from_run(src_2);
  133|    241|        bool is_bitset = bitset_array_container_ixor(result, src_1, dst);
  134|       |        // any necessary type conversion has been done by the ixor
  135|    241|        int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
  ------------------
  |  |   48|    223|#define BITSET_CONTAINER_TYPE 1
  ------------------
                      int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
  ------------------
  |  |   49|     18|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  |  Branch (135:23): [True: 223, False: 18]
  ------------------
  136|    241|        return retval;
  137|    241|    }
  138|  1.41k|}
array_run_container_lazy_xor:
  147|    931|                                  run_container_t *dst) {
  148|    931|    run_container_grow(dst, src_1->cardinality + src_2->n_runs, false);
  149|    931|    int32_t rlepos = 0;
  150|    931|    int32_t arraypos = 0;
  151|    931|    dst->n_runs = 0;
  152|       |
  153|  28.9k|    while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
  ------------------
  |  Branch (153:12): [True: 28.3k, False: 548]
  |  Branch (153:40): [True: 28.0k, False: 383]
  ------------------
  154|  28.0k|        if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
  ------------------
  |  Branch (154:13): [True: 16.9k, False: 11.0k]
  ------------------
  155|  16.9k|            run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
  156|  16.9k|                                                 src_2->runs[rlepos].length);
  157|  16.9k|            rlepos++;
  158|  16.9k|        } else {
  159|  11.0k|            run_container_smart_append_exclusive(dst, src_1->array[arraypos],
  160|  11.0k|                                                 0);
  161|  11.0k|            arraypos++;
  162|  11.0k|        }
  163|  28.0k|    }
  164|  3.21k|    while (arraypos < src_1->cardinality) {
  ------------------
  |  Branch (164:12): [True: 2.28k, False: 931]
  ------------------
  165|  2.28k|        run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0);
  166|  2.28k|        arraypos++;
  167|  2.28k|    }
  168|  7.94k|    while (rlepos < src_2->n_runs) {
  ------------------
  |  Branch (168:12): [True: 7.01k, False: 931]
  ------------------
  169|  7.01k|        run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
  170|  7.01k|                                             src_2->runs[rlepos].length);
  171|  7.01k|        rlepos++;
  172|  7.01k|    }
  173|    931|}
run_run_container_xor:
  180|  60.2k|                          const run_container_t *src_2, container_t **dst) {
  181|  60.2k|    run_container_t *ans = run_container_create();
  182|  60.2k|    run_container_xor(src_1, src_2, ans);
  183|  60.2k|    uint8_t typecode_after;
  184|  60.2k|    *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
  185|  60.2k|    return typecode_after;
  186|  60.2k|}
array_array_container_xor:
  198|  8.77k|                               container_t **dst) {
  199|  8.77k|    int totalCardinality =
  200|  8.77k|        src_1->cardinality + src_2->cardinality;  // upper bound
  201|  8.77k|    if (totalCardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (201:9): [True: 8.25k, False: 521]
  ------------------
  202|  8.25k|        *dst = array_container_create_given_capacity(totalCardinality);
  203|  8.25k|        array_container_xor(src_1, src_2, CAST_array(*dst));
  ------------------
  |  |   54|  8.25k|#define CAST_array(c) CAST(array_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  8.25k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  204|  8.25k|        return false;  // not a bitset
  205|  8.25k|    }
  206|    521|    *dst = bitset_container_from_array(src_1);
  207|    521|    bool returnval = true;  // expect a bitset
  208|    521|    bitset_container_t *ourbitset = CAST_bitset(*dst);
  ------------------
  |  |   52|    521|#define CAST_bitset(c) CAST(bitset_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|    521|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
  209|    521|    ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard(
  210|    521|        ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality);
  211|    521|    if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (211:9): [True: 504, False: 17]
  ------------------
  212|       |        // need to convert!
  213|    504|        *dst = array_container_from_bitset(ourbitset);
  214|    504|        bitset_container_free(ourbitset);
  215|    504|        returnval = false;  // not going to be a bitset
  216|    504|    }
  217|       |
  218|    521|    return returnval;
  219|  8.77k|}
bitset_array_container_ixor:
  284|    241|                                 container_t **dst) {
  285|    241|    *dst = src_1;
  286|    241|    src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
  287|    241|        src_1->words, src_1->cardinality, src_2->array, src_2->cardinality);
  288|       |
  289|    241|    if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (289:9): [True: 18, False: 223]
  ------------------
  290|     18|        *dst = array_container_from_bitset(src_1);
  291|     18|        bitset_container_free(src_1);
  292|     18|        return false;  // not bitset
  293|     18|    } else
  294|    223|        return true;
  295|    241|}
bitset_bitset_container_ixor:
  304|  1.53k|                                  container_t **dst) {
  305|  1.53k|    int card = bitset_container_xor(src_1, src_2, src_1);
  306|  1.53k|    if (card <= DEFAULT_MAX_SIZE) {
  ------------------
  |  Branch (306:9): [True: 1.53k, False: 0]
  ------------------
  307|  1.53k|        *dst = array_container_from_bitset(src_1);
  308|  1.53k|        bitset_container_free(src_1);
  309|  1.53k|        return false;  // not bitset
  310|  1.53k|    } else {
  311|      0|        *dst = src_1;
  312|       |        return true;
  313|      0|    }
  314|  1.53k|}
array_bitset_container_ixor:
  318|     92|                                 container_t **dst) {
  319|       |    bool ans = array_bitset_container_xor(src_1, src_2, dst);
  320|     92|    array_container_free(src_1);
  321|     92|    return ans;
  322|     92|}
run_bitset_container_ixor:
  333|     12|                               container_t **dst) {
  334|       |    bool ans = run_bitset_container_xor(src_1, src_2, dst);
  335|     12|    run_container_free(src_1);
  336|     12|    return ans;
  337|     12|}
bitset_run_container_ixor:
  341|     69|                               container_t **dst) {
  342|       |    bool ans = run_bitset_container_xor(src_2, src_1, dst);
  343|     69|    bitset_container_free(src_1);
  344|     69|    return ans;
  345|     69|}
array_run_container_ixor:
  352|    266|                             const run_container_t *src_2, container_t **dst) {
  353|    266|    int ans = array_run_container_xor(src_1, src_2, dst);
  354|    266|    array_container_free(src_1);
  355|    266|    return ans;
  356|    266|}
run_array_container_ixor:
  360|     57|                             container_t **dst) {
  361|     57|    int ans = array_run_container_xor(src_2, src_1, dst);
  362|     57|    run_container_free(src_1);
  363|     57|    return ans;
  364|     57|}
array_array_container_ixor:
  368|  5.20k|                                container_t **dst) {
  369|       |    bool ans = array_array_container_xor(src_1, src_2, dst);
  370|  5.20k|    array_container_free(src_1);
  371|  5.20k|    return ans;
  372|  5.20k|}
run_run_container_ixor:
  375|  59.5k|                           container_t **dst) {
  376|  59.5k|    int ans = run_run_container_xor(src_1, src_2, dst);
  377|  59.5k|    run_container_free(src_1);
  378|  59.5k|    return ans;
  379|  59.5k|}

run_container_add:
   39|   420k|bool run_container_add(run_container_t *run, uint16_t pos) {
   40|   420k|    int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
   41|   420k|    if (index >= 0) return false;  // already there
  ------------------
  |  Branch (41:9): [True: 190k, False: 229k]
  ------------------
   42|   229k|    index = -index - 2;            // points to preceding value, possibly -1
   43|   229k|    if (index >= 0) {              // possible match
  ------------------
  |  Branch (43:9): [True: 229k, False: 357]
  ------------------
   44|   229k|        int32_t offset = pos - run->runs[index].value;
   45|   229k|        int32_t le = run->runs[index].length;
   46|   229k|        if (offset <= le) return false;  // already there
  ------------------
  |  Branch (46:13): [True: 135k, False: 93.6k]
  ------------------
   47|  93.6k|        if (offset == le + 1) {
  ------------------
  |  Branch (47:13): [True: 14.6k, False: 78.9k]
  ------------------
   48|       |            // we may need to fuse
   49|  14.6k|            if (index + 1 < run->n_runs) {
  ------------------
  |  Branch (49:17): [True: 14.3k, False: 277]
  ------------------
   50|  14.3k|                if (run->runs[index + 1].value == pos + 1) {
  ------------------
  |  Branch (50:21): [True: 6.71k, False: 7.67k]
  ------------------
   51|       |                    // indeed fusion is needed
   52|  6.71k|                    run->runs[index].length = run->runs[index + 1].value +
   53|  6.71k|                                              run->runs[index + 1].length -
   54|  6.71k|                                              run->runs[index].value;
   55|  6.71k|                    recoverRoomAtIndex(run, (uint16_t)(index + 1));
   56|  6.71k|                    return true;
   57|  6.71k|                }
   58|  14.3k|            }
   59|  7.95k|            run->runs[index].length++;
   60|  7.95k|            return true;
   61|  14.6k|        }
   62|  78.9k|        if (index + 1 < run->n_runs) {
  ------------------
  |  Branch (62:13): [True: 77.0k, False: 1.91k]
  ------------------
   63|       |            // we may need to fuse
   64|  77.0k|            if (run->runs[index + 1].value == pos + 1) {
  ------------------
  |  Branch (64:17): [True: 33.6k, False: 43.4k]
  ------------------
   65|       |                // indeed fusion is needed
   66|  33.6k|                run->runs[index + 1].value = pos;
   67|  33.6k|                run->runs[index + 1].length = run->runs[index + 1].length + 1;
   68|  33.6k|                return true;
   69|  33.6k|            }
   70|  77.0k|        }
   71|  78.9k|    }
   72|  45.6k|    if (index == -1) {
  ------------------
  |  Branch (72:9): [True: 357, False: 45.3k]
  ------------------
   73|       |        // we may need to extend the first run
   74|    357|        if (0 < run->n_runs) {
  ------------------
  |  Branch (74:13): [True: 357, False: 0]
  ------------------
   75|    357|            if (run->runs[0].value == pos + 1) {
  ------------------
  |  Branch (75:17): [True: 232, False: 125]
  ------------------
   76|    232|                run->runs[0].length++;
   77|    232|                run->runs[0].value--;
   78|    232|                return true;
   79|    232|            }
   80|    357|        }
   81|    357|    }
   82|  45.4k|    makeRoomAtIndex(run, (uint16_t)(index + 1));
   83|  45.4k|    run->runs[index + 1].value = pos;
   84|  45.4k|    run->runs[index + 1].length = 0;
   85|       |    return true;
   86|  45.6k|}
run_container_create_given_capacity:
   89|   401k|run_container_t *run_container_create_given_capacity(int32_t size) {
   90|   401k|    run_container_t *run;
   91|       |    /* Allocate the run container itself. */
   92|   401k|    if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) ==
  ------------------
  |  Branch (92:9): [True: 0, False: 401k]
  ------------------
   93|   401k|        NULL) {
   94|      0|        return NULL;
   95|      0|    }
   96|   401k|    if (size <= 0) {  // we don't want to rely on malloc(0)
  ------------------
  |  Branch (96:9): [True: 127k, False: 274k]
  ------------------
   97|   127k|        run->runs = NULL;
   98|   274k|    } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) *
  ------------------
  |  Branch (98:16): [True: 0, False: 274k]
  ------------------
   99|   274k|                                                      size)) == NULL) {
  100|      0|        roaring_free(run);
  101|      0|        return NULL;
  102|      0|    }
  103|   401k|    run->capacity = size;
  104|   401k|    run->n_runs = 0;
  105|   401k|    return run;
  106|   401k|}
run_container_shrink_to_fit:
  108|  2.64k|int run_container_shrink_to_fit(run_container_t *src) {
  109|  2.64k|    if (src->n_runs == src->capacity) return 0;  // nothing to do
  ------------------
  |  Branch (109:9): [True: 2.64k, False: 0]
  ------------------
  110|      0|    int savings = src->capacity - src->n_runs;
  111|      0|    src->capacity = src->n_runs;
  112|      0|    rle16_t *oldruns = src->runs;
  113|      0|    src->runs =
  114|      0|        (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
  115|      0|    if (src->runs == NULL) roaring_free(oldruns);  // should never happen?
  ------------------
  |  Branch (115:9): [True: 0, False: 0]
  ------------------
  116|      0|    return savings;
  117|  2.64k|}
run_container_create:
  119|   127k|run_container_t *run_container_create(void) {
  120|   127k|    return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
  121|   127k|}
run_container_clone:
  124|   178k|run_container_t *run_container_clone(const run_container_t *src) {
  125|   178k|    run_container_t *run = run_container_create_given_capacity(src->capacity);
  126|   178k|    if (run == NULL) return NULL;
  ------------------
  |  Branch (126:9): [True: 0, False: 178k]
  ------------------
  127|   178k|    run->capacity = src->capacity;
  128|   178k|    run->n_runs = src->n_runs;
  129|   178k|    memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
  130|   178k|    return run;
  131|   178k|}
run_container_free:
  193|   401k|void run_container_free(run_container_t *run) {
  194|   401k|    if (run == NULL) return;
  ------------------
  |  Branch (194:9): [True: 0, False: 401k]
  ------------------
  195|   401k|    roaring_free(run->runs);
  196|   401k|    roaring_free(run);
  197|   401k|}
run_container_grow:
  199|   132k|void run_container_grow(run_container_t *run, int32_t min, bool copy) {
  200|   132k|    int32_t newCapacity = (run->capacity == 0)   ? RUN_DEFAULT_INIT_SIZE
  ------------------
  |  Branch (200:27): [True: 127k, False: 5.25k]
  ------------------
  201|   132k|                          : run->capacity < 64   ? run->capacity * 2
  ------------------
  |  Branch (201:29): [True: 3.65k, False: 1.59k]
  ------------------
  202|  5.25k|                          : run->capacity < 1024 ? run->capacity * 3 / 2
  ------------------
  |  Branch (202:29): [True: 1.59k, False: 0]
  ------------------
  203|  1.59k|                                                 : run->capacity * 5 / 4;
  204|   132k|    if (newCapacity < min) newCapacity = min;
  ------------------
  |  Branch (204:9): [True: 129k, False: 3.30k]
  ------------------
  205|   132k|    run->capacity = newCapacity;
  206|   132k|    assert(run->capacity >= min);
  207|   132k|    if (copy) {
  ------------------
  |  Branch (207:9): [True: 5.25k, False: 127k]
  ------------------
  208|  5.25k|        rle16_t *oldruns = run->runs;
  209|  5.25k|        run->runs = (rle16_t *)roaring_realloc(oldruns,
  210|  5.25k|                                               run->capacity * sizeof(rle16_t));
  211|  5.25k|        if (run->runs == NULL) roaring_free(oldruns);
  ------------------
  |  Branch (211:13): [True: 0, False: 5.25k]
  ------------------
  212|   127k|    } else {
  213|   127k|        roaring_free(run->runs);
  214|   127k|        run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
  215|   127k|    }
  216|       |    // We may have run->runs == NULL.
  217|   132k|}
run_container_copy:
  220|  57.7k|void run_container_copy(const run_container_t *src, run_container_t *dst) {
  221|  57.7k|    const int32_t n_runs = src->n_runs;
  222|  57.7k|    if (src->n_runs > dst->capacity) {
  ------------------
  |  Branch (222:9): [True: 57.7k, False: 40]
  ------------------
  223|       |        run_container_grow(dst, n_runs, false);
  224|  57.7k|    }
  225|  57.7k|    dst->n_runs = n_runs;
  226|  57.7k|    memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
  227|  57.7k|}
run_container_union:
  232|    725|                         const run_container_t *src_2, run_container_t *dst) {
  233|       |    // TODO: this could be a lot more efficient
  234|       |
  235|       |    // we start out with inexpensive checks
  236|    725|    const bool if1 = run_container_is_full(src_1);
  237|    725|    const bool if2 = run_container_is_full(src_2);
  238|    725|    if (if1 || if2) {
  ------------------
  |  Branch (238:9): [True: 0, False: 725]
  |  Branch (238:16): [True: 15, False: 710]
  ------------------
  239|     15|        if (if1) {
  ------------------
  |  Branch (239:13): [True: 0, False: 15]
  ------------------
  240|      0|            run_container_copy(src_1, dst);
  241|      0|            return;
  242|      0|        }
  243|     15|        if (if2) {
  ------------------
  |  Branch (243:13): [True: 15, False: 0]
  ------------------
  244|     15|            run_container_copy(src_2, dst);
  245|     15|            return;
  246|     15|        }
  247|     15|    }
  248|    710|    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
  249|    710|    if (dst->capacity < neededcapacity)
  ------------------
  |  Branch (249:9): [True: 710, False: 0]
  ------------------
  250|    710|        run_container_grow(dst, neededcapacity, false);
  251|    710|    dst->n_runs = 0;
  252|    710|    int32_t rlepos = 0;
  253|    710|    int32_t xrlepos = 0;
  254|       |
  255|    710|    rle16_t previousrle;
  256|    710|    if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
  ------------------
  |  Branch (256:9): [True: 650, False: 60]
  ------------------
  257|    650|        previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
  258|    650|        rlepos++;
  259|    650|    } else {
  260|     60|        previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
  261|     60|        xrlepos++;
  262|     60|    }
  263|       |
  264|  88.4k|    while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
  ------------------
  |  Branch (264:12): [True: 88.1k, False: 312]
  |  Branch (264:41): [True: 87.7k, False: 398]
  ------------------
  265|  87.7k|        rle16_t newrl;
  266|  87.7k|        if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
  ------------------
  |  Branch (266:13): [True: 47.3k, False: 40.4k]
  ------------------
  267|  47.3k|            newrl = src_1->runs[rlepos];
  268|  47.3k|            rlepos++;
  269|  47.3k|        } else {
  270|  40.4k|            newrl = src_2->runs[xrlepos];
  271|  40.4k|            xrlepos++;
  272|  40.4k|        }
  273|  87.7k|        run_container_append(dst, newrl, &previousrle);
  274|  87.7k|    }
  275|  1.79k|    while (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (275:12): [True: 1.08k, False: 710]
  ------------------
  276|  1.08k|        run_container_append(dst, src_2->runs[xrlepos], &previousrle);
  277|  1.08k|        xrlepos++;
  278|  1.08k|    }
  279|  14.1k|    while (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (279:12): [True: 13.4k, False: 710]
  ------------------
  280|  13.4k|        run_container_append(dst, src_1->runs[rlepos], &previousrle);
  281|  13.4k|        rlepos++;
  282|  13.4k|    }
  283|    710|}
run_container_union_inplace:
  288|  1.74k|                                 const run_container_t *src_2) {
  289|       |    // TODO: this could be a lot more efficient
  290|       |
  291|       |    // we start out with inexpensive checks
  292|  1.74k|    const bool if1 = run_container_is_full(src_1);
  293|  1.74k|    const bool if2 = run_container_is_full(src_2);
  294|  1.74k|    if (if1 || if2) {
  ------------------
  |  Branch (294:9): [True: 0, False: 1.74k]
  |  Branch (294:16): [True: 40, False: 1.70k]
  ------------------
  295|     40|        if (if1) {
  ------------------
  |  Branch (295:13): [True: 0, False: 40]
  ------------------
  296|      0|            return;
  297|      0|        }
  298|     40|        if (if2) {
  ------------------
  |  Branch (298:13): [True: 40, False: 0]
  ------------------
  299|     40|            run_container_copy(src_2, src_1);
  300|     40|            return;
  301|     40|        }
  302|     40|    }
  303|       |    // we move the data to the end of the current array
  304|  1.70k|    const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
  305|  1.70k|    const int32_t neededcapacity = maxoutput + src_1->n_runs;
  306|  1.70k|    if (src_1->capacity < neededcapacity)
  ------------------
  |  Branch (306:9): [True: 1.70k, False: 0]
  ------------------
  307|  1.70k|        run_container_grow(src_1, neededcapacity, true);
  308|  1.70k|    memmove(src_1->runs + maxoutput, src_1->runs,
  309|  1.70k|            src_1->n_runs * sizeof(rle16_t));
  310|  1.70k|    rle16_t *inputsrc1 = src_1->runs + maxoutput;
  311|  1.70k|    const int32_t input1nruns = src_1->n_runs;
  312|  1.70k|    src_1->n_runs = 0;
  313|  1.70k|    int32_t rlepos = 0;
  314|  1.70k|    int32_t xrlepos = 0;
  315|       |
  316|  1.70k|    rle16_t previousrle;
  317|  1.70k|    if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
  ------------------
  |  Branch (317:9): [True: 1.61k, False: 91]
  ------------------
  318|  1.61k|        previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
  319|  1.61k|        rlepos++;
  320|  1.61k|    } else {
  321|     91|        previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
  322|     91|        xrlepos++;
  323|     91|    }
  324|   195k|    while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
  ------------------
  |  Branch (324:12): [True: 195k, False: 141]
  |  Branch (324:41): [True: 193k, False: 1.56k]
  ------------------
  325|   193k|        rle16_t newrl;
  326|   193k|        if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
  ------------------
  |  Branch (326:13): [True: 92.3k, False: 101k]
  ------------------
  327|  92.3k|            newrl = inputsrc1[rlepos];
  328|  92.3k|            rlepos++;
  329|   101k|        } else {
  330|   101k|            newrl = src_2->runs[xrlepos];
  331|   101k|            xrlepos++;
  332|   101k|        }
  333|   193k|        run_container_append(src_1, newrl, &previousrle);
  334|   193k|    }
  335|  4.40k|    while (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (335:12): [True: 2.69k, False: 1.70k]
  ------------------
  336|  2.69k|        run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
  337|  2.69k|        xrlepos++;
  338|  2.69k|    }
  339|  9.62k|    while (rlepos < input1nruns) {
  ------------------
  |  Branch (339:12): [True: 7.92k, False: 1.70k]
  ------------------
  340|  7.92k|        run_container_append(src_1, inputsrc1[rlepos], &previousrle);
  341|  7.92k|        rlepos++;
  342|  7.92k|    }
  343|  1.70k|}
run_container_xor:
  349|  60.2k|                       const run_container_t *src_2, run_container_t *dst) {
  350|       |    // don't bother to convert xor with full range into negation
  351|       |    // since negation is implemented similarly
  352|       |
  353|  60.2k|    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
  354|  60.2k|    if (dst->capacity < neededcapacity)
  ------------------
  |  Branch (354:9): [True: 60.2k, False: 0]
  ------------------
  355|  60.2k|        run_container_grow(dst, neededcapacity, false);
  356|       |
  357|  60.2k|    int32_t pos1 = 0;
  358|  60.2k|    int32_t pos2 = 0;
  359|  60.2k|    dst->n_runs = 0;
  360|       |
  361|   247k|    while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
  ------------------
  |  Branch (361:12): [True: 187k, False: 59.9k]
  |  Branch (361:38): [True: 186k, False: 335]
  ------------------
  362|   186k|        if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
  ------------------
  |  Branch (362:13): [True: 125k, False: 61.7k]
  ------------------
  363|   125k|            run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
  364|   125k|                                                 src_1->runs[pos1].length);
  365|   125k|            pos1++;
  366|   125k|        } else {
  367|  61.7k|            run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
  368|  61.7k|                                                 src_2->runs[pos2].length);
  369|  61.7k|            pos2++;
  370|  61.7k|        }
  371|   186k|    }
  372|  74.7k|    while (pos1 < src_1->n_runs) {
  ------------------
  |  Branch (372:12): [True: 14.5k, False: 60.2k]
  ------------------
  373|  14.5k|        run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
  374|  14.5k|                                             src_1->runs[pos1].length);
  375|  14.5k|        pos1++;
  376|  14.5k|    }
  377|       |
  378|   122k|    while (pos2 < src_2->n_runs) {
  ------------------
  |  Branch (378:12): [True: 61.9k, False: 60.2k]
  ------------------
  379|  61.9k|        run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
  380|  61.9k|                                             src_2->runs[pos2].length);
  381|  61.9k|        pos2++;
  382|  61.9k|    }
  383|  60.2k|}
run_container_intersection:
  389|  61.3k|                                run_container_t *dst) {
  390|  61.3k|    const bool if1 = run_container_is_full(src_1);
  391|  61.3k|    const bool if2 = run_container_is_full(src_2);
  392|  61.3k|    if (if1 || if2) {
  ------------------
  |  Branch (392:9): [True: 57.6k, False: 3.69k]
  |  Branch (392:16): [True: 15, False: 3.67k]
  ------------------
  393|  57.6k|        if (if1) {
  ------------------
  |  Branch (393:13): [True: 57.6k, False: 15]
  ------------------
  394|  57.6k|            run_container_copy(src_2, dst);
  395|  57.6k|            return;
  396|  57.6k|        }
  397|     15|        if (if2) {
  ------------------
  |  Branch (397:13): [True: 15, False: 0]
  ------------------
  398|     15|            run_container_copy(src_1, dst);
  399|     15|            return;
  400|     15|        }
  401|     15|    }
  402|       |    // TODO: this could be a lot more efficient, could use SIMD optimizations
  403|  3.67k|    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
  404|  3.67k|    if (dst->capacity < neededcapacity)
  ------------------
  |  Branch (404:9): [True: 3.67k, False: 0]
  ------------------
  405|  3.67k|        run_container_grow(dst, neededcapacity, false);
  406|  3.67k|    dst->n_runs = 0;
  407|  3.67k|    int32_t rlepos = 0;
  408|  3.67k|    int32_t xrlepos = 0;
  409|  3.67k|    int32_t start = src_1->runs[rlepos].value;
  410|  3.67k|    int32_t end = start + src_1->runs[rlepos].length + 1;
  411|  3.67k|    int32_t xstart = src_2->runs[xrlepos].value;
  412|  3.67k|    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
  413|   192k|    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
  ------------------
  |  Branch (413:12): [True: 189k, False: 3.17k]
  |  Branch (413:40): [True: 189k, False: 506]
  ------------------
  414|   189k|        if (end <= xstart) {
  ------------------
  |  Branch (414:13): [True: 46.4k, False: 142k]
  ------------------
  415|  46.4k|            ++rlepos;
  416|  46.4k|            if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (416:17): [True: 46.3k, False: 92]
  ------------------
  417|  46.3k|                start = src_1->runs[rlepos].value;
  418|  46.3k|                end = start + src_1->runs[rlepos].length + 1;
  419|  46.3k|            }
  420|   142k|        } else if (xend <= start) {
  ------------------
  |  Branch (420:20): [True: 13.7k, False: 128k]
  ------------------
  421|  13.7k|            ++xrlepos;
  422|  13.7k|            if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (422:17): [True: 13.6k, False: 53]
  ------------------
  423|  13.6k|                xstart = src_2->runs[xrlepos].value;
  424|  13.6k|                xend = xstart + src_2->runs[xrlepos].length + 1;
  425|  13.6k|            }
  426|   128k|        } else {  // they overlap
  427|   128k|            const int32_t lateststart = start > xstart ? start : xstart;
  ------------------
  |  Branch (427:41): [True: 17.0k, False: 111k]
  ------------------
  428|   128k|            int32_t earliestend;
  429|   128k|            if (end == xend) {  // improbable
  ------------------
  |  Branch (429:17): [True: 96.5k, False: 32.3k]
  ------------------
  430|  96.5k|                earliestend = end;
  431|  96.5k|                rlepos++;
  432|  96.5k|                xrlepos++;
  433|  96.5k|                if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (433:21): [True: 93.6k, False: 2.92k]
  ------------------
  434|  93.6k|                    start = src_1->runs[rlepos].value;
  435|  93.6k|                    end = start + src_1->runs[rlepos].length + 1;
  436|  93.6k|                }
  437|  96.5k|                if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (437:21): [True: 93.3k, False: 3.17k]
  ------------------
  438|  93.3k|                    xstart = src_2->runs[xrlepos].value;
  439|  93.3k|                    xend = xstart + src_2->runs[xrlepos].length + 1;
  440|  93.3k|                }
  441|  96.5k|            } else if (end < xend) {
  ------------------
  |  Branch (441:24): [True: 17.3k, False: 15.0k]
  ------------------
  442|  17.3k|                earliestend = end;
  443|  17.3k|                rlepos++;
  444|  17.3k|                if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (444:21): [True: 17.2k, False: 151]
  ------------------
  445|  17.2k|                    start = src_1->runs[rlepos].value;
  446|  17.2k|                    end = start + src_1->runs[rlepos].length + 1;
  447|  17.2k|                }
  448|       |
  449|  17.3k|            } else {  // end > xend
  450|  15.0k|                earliestend = xend;
  451|  15.0k|                xrlepos++;
  452|  15.0k|                if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (452:21): [True: 14.8k, False: 131]
  ------------------
  453|  14.8k|                    xstart = src_2->runs[xrlepos].value;
  454|  14.8k|                    xend = xstart + src_2->runs[xrlepos].length + 1;
  455|  14.8k|                }
  456|  15.0k|            }
  457|   128k|            dst->runs[dst->n_runs].value = (uint16_t)lateststart;
  458|   128k|            dst->runs[dst->n_runs].length =
  459|   128k|                (uint16_t)(earliestend - lateststart - 1);
  460|   128k|            dst->n_runs++;
  461|   128k|        }
  462|   189k|    }
  463|  3.67k|}
run_container_intersection_cardinality:
  467|  2.90k|                                           const run_container_t *src_2) {
  468|  2.90k|    const bool if1 = run_container_is_full(src_1);
  469|  2.90k|    const bool if2 = run_container_is_full(src_2);
  470|  2.90k|    if (if1 || if2) {
  ------------------
  |  Branch (470:9): [True: 0, False: 2.90k]
  |  Branch (470:16): [True: 60, False: 2.84k]
  ------------------
  471|     60|        if (if1) {
  ------------------
  |  Branch (471:13): [True: 0, False: 60]
  ------------------
  472|      0|            return run_container_cardinality(src_2);
  473|      0|        }
  474|     60|        if (if2) {
  ------------------
  |  Branch (474:13): [True: 60, False: 0]
  ------------------
  475|     60|            return run_container_cardinality(src_1);
  476|     60|        }
  477|     60|    }
  478|  2.84k|    int answer = 0;
  479|  2.84k|    int32_t rlepos = 0;
  480|  2.84k|    int32_t xrlepos = 0;
  481|  2.84k|    int32_t start = src_1->runs[rlepos].value;
  482|  2.84k|    int32_t end = start + src_1->runs[rlepos].length + 1;
  483|  2.84k|    int32_t xstart = src_2->runs[xrlepos].value;
  484|  2.84k|    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
  485|   307k|    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
  ------------------
  |  Branch (485:12): [True: 305k, False: 1.94k]
  |  Branch (485:40): [True: 304k, False: 892]
  ------------------
  486|   304k|        if (end <= xstart) {
  ------------------
  |  Branch (486:13): [True: 71.9k, False: 232k]
  ------------------
  487|  71.9k|            ++rlepos;
  488|  71.9k|            if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (488:17): [True: 71.5k, False: 368]
  ------------------
  489|  71.5k|                start = src_1->runs[rlepos].value;
  490|  71.5k|                end = start + src_1->runs[rlepos].length + 1;
  491|  71.5k|            }
  492|   232k|        } else if (xend <= start) {
  ------------------
  |  Branch (492:20): [True: 54.8k, False: 177k]
  ------------------
  493|  54.8k|            ++xrlepos;
  494|  54.8k|            if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (494:17): [True: 54.6k, False: 212]
  ------------------
  495|  54.6k|                xstart = src_2->runs[xrlepos].value;
  496|  54.6k|                xend = xstart + src_2->runs[xrlepos].length + 1;
  497|  54.6k|            }
  498|   177k|        } else {  // they overlap
  499|   177k|            const int32_t lateststart = start > xstart ? start : xstart;
  ------------------
  |  Branch (499:41): [True: 68.2k, False: 109k]
  ------------------
  500|   177k|            int32_t earliestend;
  501|   177k|            if (end == xend) {  // improbable
  ------------------
  |  Branch (501:17): [True: 88.7k, False: 88.8k]
  ------------------
  502|  88.7k|                earliestend = end;
  503|  88.7k|                rlepos++;
  504|  88.7k|                xrlepos++;
  505|  88.7k|                if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (505:21): [True: 87.7k, False: 976]
  ------------------
  506|  87.7k|                    start = src_1->runs[rlepos].value;
  507|  87.7k|                    end = start + src_1->runs[rlepos].length + 1;
  508|  87.7k|                }
  509|  88.7k|                if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (509:21): [True: 87.5k, False: 1.15k]
  ------------------
  510|  87.5k|                    xstart = src_2->runs[xrlepos].value;
  511|  87.5k|                    xend = xstart + src_2->runs[xrlepos].length + 1;
  512|  87.5k|                }
  513|  88.8k|            } else if (end < xend) {
  ------------------
  |  Branch (513:24): [True: 69.4k, False: 19.3k]
  ------------------
  514|  69.4k|                earliestend = end;
  515|  69.4k|                rlepos++;
  516|  69.4k|                if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (516:21): [True: 68.8k, False: 604]
  ------------------
  517|  68.8k|                    start = src_1->runs[rlepos].value;
  518|  68.8k|                    end = start + src_1->runs[rlepos].length + 1;
  519|  68.8k|                }
  520|       |
  521|  69.4k|            } else {  // end > xend
  522|  19.3k|                earliestend = xend;
  523|  19.3k|                xrlepos++;
  524|  19.3k|                if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (524:21): [True: 19.1k, False: 196]
  ------------------
  525|  19.1k|                    xstart = src_2->runs[xrlepos].value;
  526|  19.1k|                    xend = xstart + src_2->runs[xrlepos].length + 1;
  527|  19.1k|                }
  528|  19.3k|            }
  529|   177k|            answer += earliestend - lateststart;
  530|   177k|        }
  531|   304k|    }
  532|  2.84k|    return answer;
  533|  2.90k|}
run_container_intersect:
  536|    725|                             const run_container_t *src_2) {
  537|    725|    const bool if1 = run_container_is_full(src_1);
  538|    725|    const bool if2 = run_container_is_full(src_2);
  539|    725|    if (if1 || if2) {
  ------------------
  |  Branch (539:9): [True: 0, False: 725]
  |  Branch (539:16): [True: 15, False: 710]
  ------------------
  540|     15|        if (if1) {
  ------------------
  |  Branch (540:13): [True: 0, False: 15]
  ------------------
  541|      0|            return !run_container_empty(src_2);
  542|      0|        }
  543|     15|        if (if2) {
  ------------------
  |  Branch (543:13): [True: 15, False: 0]
  ------------------
  544|     15|            return !run_container_empty(src_1);
  545|     15|        }
  546|     15|    }
  547|    710|    int32_t rlepos = 0;
  548|    710|    int32_t xrlepos = 0;
  549|    710|    int32_t start = src_1->runs[rlepos].value;
  550|    710|    int32_t end = start + src_1->runs[rlepos].length + 1;
  551|    710|    int32_t xstart = src_2->runs[xrlepos].value;
  552|    710|    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
  553|  3.79k|    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
  ------------------
  |  Branch (553:12): [True: 3.77k, False: 17]
  |  Branch (553:40): [True: 3.77k, False: 6]
  ------------------
  554|  3.77k|        if (end <= xstart) {
  ------------------
  |  Branch (554:13): [True: 2.61k, False: 1.15k]
  ------------------
  555|  2.61k|            ++rlepos;
  556|  2.61k|            if (rlepos < src_1->n_runs) {
  ------------------
  |  Branch (556:17): [True: 2.60k, False: 17]
  ------------------
  557|  2.60k|                start = src_1->runs[rlepos].value;
  558|  2.60k|                end = start + src_1->runs[rlepos].length + 1;
  559|  2.60k|            }
  560|  2.61k|        } else if (xend <= start) {
  ------------------
  |  Branch (560:20): [True: 467, False: 687]
  ------------------
  561|    467|            ++xrlepos;
  562|    467|            if (xrlepos < src_2->n_runs) {
  ------------------
  |  Branch (562:17): [True: 461, False: 6]
  ------------------
  563|    461|                xstart = src_2->runs[xrlepos].value;
  564|    461|                xend = xstart + src_2->runs[xrlepos].length + 1;
  565|    461|            }
  566|    687|        } else {  // they overlap
  567|    687|            return true;
  568|    687|        }
  569|  3.77k|    }
  570|     23|    return false;
  571|    710|}
run_container_andnot:
  576|  1.18k|                          const run_container_t *src_2, run_container_t *dst) {
  577|       |    // following Java implementation as of June 2016
  578|       |
  579|  1.18k|    if (dst->capacity < src_1->n_runs + src_2->n_runs)
  ------------------
  |  Branch (579:9): [True: 1.18k, False: 0]
  ------------------
  580|  1.18k|        run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
  581|       |
  582|  1.18k|    dst->n_runs = 0;
  583|       |
  584|  1.18k|    int rlepos1 = 0;
  585|  1.18k|    int rlepos2 = 0;
  586|  1.18k|    int32_t start = src_1->runs[rlepos1].value;
  587|  1.18k|    int32_t end = start + src_1->runs[rlepos1].length + 1;
  588|  1.18k|    int32_t start2 = src_2->runs[rlepos2].value;
  589|  1.18k|    int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
  590|       |
  591|   169k|    while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
  ------------------
  |  Branch (591:12): [True: 168k, False: 718]
  |  Branch (591:41): [True: 168k, False: 462]
  ------------------
  592|   168k|        if (end <= start2) {
  ------------------
  |  Branch (592:13): [True: 24.8k, False: 143k]
  ------------------
  593|       |            // output the first run
  594|  24.8k|            dst->runs[dst->n_runs++] =
  595|  24.8k|                CROARING_MAKE_RLE16(start, end - start - 1);
  ------------------
  |  |   60|  24.8k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  596|  24.8k|            rlepos1++;
  597|  24.8k|            if (rlepos1 < src_1->n_runs) {
  ------------------
  |  Branch (597:17): [True: 24.8k, False: 92]
  ------------------
  598|  24.8k|                start = src_1->runs[rlepos1].value;
  599|  24.8k|                end = start + src_1->runs[rlepos1].length + 1;
  600|  24.8k|            }
  601|   143k|        } else if (end2 <= start) {
  ------------------
  |  Branch (601:20): [True: 69.6k, False: 73.7k]
  ------------------
  602|       |            // exit the second run
  603|  69.6k|            rlepos2++;
  604|  69.6k|            if (rlepos2 < src_2->n_runs) {
  ------------------
  |  Branch (604:17): [True: 69.1k, False: 462]
  ------------------
  605|  69.1k|                start2 = src_2->runs[rlepos2].value;
  606|  69.1k|                end2 = start2 + src_2->runs[rlepos2].length + 1;
  607|  69.1k|            }
  608|  73.7k|        } else {
  609|  73.7k|            if (start < start2) {
  ------------------
  |  Branch (609:17): [True: 20.4k, False: 53.3k]
  ------------------
  610|  20.4k|                dst->runs[dst->n_runs++] =
  611|  20.4k|                    CROARING_MAKE_RLE16(start, start2 - start - 1);
  ------------------
  |  |   60|  20.4k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  612|  20.4k|            }
  613|  73.7k|            if (end2 < end) {
  ------------------
  |  Branch (613:17): [True: 19.8k, False: 53.8k]
  ------------------
  614|  19.8k|                start = end2;
  615|  53.8k|            } else {
  616|  53.8k|                rlepos1++;
  617|  53.8k|                if (rlepos1 < src_1->n_runs) {
  ------------------
  |  Branch (617:21): [True: 53.2k, False: 626]
  ------------------
  618|  53.2k|                    start = src_1->runs[rlepos1].value;
  619|  53.2k|                    end = start + src_1->runs[rlepos1].length + 1;
  620|  53.2k|                }
  621|  53.8k|            }
  622|  73.7k|        }
  623|   168k|    }
  624|  1.18k|    if (rlepos1 < src_1->n_runs) {
  ------------------
  |  Branch (624:9): [True: 462, False: 718]
  ------------------
  625|    462|        dst->runs[dst->n_runs++] = CROARING_MAKE_RLE16(start, end - start - 1);
  ------------------
  |  |   60|    462|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  626|    462|        rlepos1++;
  627|    462|        if (rlepos1 < src_1->n_runs) {
  ------------------
  |  Branch (627:13): [True: 195, False: 267]
  ------------------
  628|    195|            memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
  629|    195|                   sizeof(rle16_t) * (src_1->n_runs - rlepos1));
  630|    195|            dst->n_runs += src_1->n_runs - rlepos1;
  631|    195|        }
  632|    462|    }
  633|  1.18k|}
run_container_read:
  739|     56|                           const char *buf) {
  740|     56|    (void)cardinality;
  741|     56|    uint16_t cast_16;
  742|     56|    memcpy(&cast_16, buf, sizeof(uint16_t));
  743|     56|    container->n_runs = croaring_letoh16(cast_16);
  ------------------
  |  |  516|     56|#define croaring_letoh16(x) croaring_htole16(x)
  |  |  ------------------
  |  |  |  |  510|     56|#define croaring_htole16(x) (x)
  |  |  ------------------
  ------------------
  744|     56|    if (container->n_runs > container->capacity)
  ------------------
  |  Branch (744:9): [True: 8, False: 48]
  ------------------
  745|      8|        run_container_grow(container, container->n_runs, false);
  746|     56|    if (container->n_runs > 0) {
  ------------------
  |  Branch (746:9): [True: 8, False: 48]
  ------------------
  747|       |#if CROARING_IS_BIG_ENDIAN
  748|       |        const char *in = buf + sizeof(uint16_t);
  749|       |        for (int32_t i = 0; i < container->n_runs; ++i) {
  750|       |            uint16_t v_le, l_le;
  751|       |            memcpy(&v_le, in, sizeof(uint16_t));
  752|       |            memcpy(&l_le, in + sizeof(uint16_t), sizeof(uint16_t));
  753|       |            container->runs[i].value = croaring_letoh16(v_le);
  754|       |            container->runs[i].length = croaring_letoh16(l_le);
  755|       |            in += sizeof(rle16_t);
  756|       |        }
  757|       |#else
  758|      8|        memcpy(container->runs, buf + sizeof(uint16_t),
  759|      8|               container->n_runs * sizeof(rle16_t));
  760|      8|#endif
  761|      8|    }
  762|     56|    return run_container_size_in_bytes(container);
  763|     56|}
run_container_iterate:
  766|  60.1k|                           roaring_iterator iterator, void *ptr) {
  767|   165k|    for (int i = 0; i < cont->n_runs; ++i) {
  ------------------
  |  Branch (767:21): [True: 104k, False: 60.1k]
  ------------------
  768|   104k|        uint32_t run_start = base + cont->runs[i].value;
  769|   104k|        uint16_t le = cont->runs[i].length;
  770|       |
  771|  3.84G|        for (int j = 0; j <= le; ++j)
  ------------------
  |  Branch (771:25): [True: 3.84G, False: 104k]
  ------------------
  772|  3.84G|            if (!iterator(run_start + j, ptr)) return false;
  ------------------
  |  Branch (772:17): [True: 0, False: 3.84G]
  ------------------
  773|   104k|    }
  774|  60.1k|    return true;
  775|  60.1k|}
run_container_is_subset:
  792|    234|                             const run_container_t *container2) {
  793|    234|    int i1 = 0, i2 = 0;
  794|    722|    while (i1 < container1->n_runs && i2 < container2->n_runs) {
  ------------------
  |  Branch (794:12): [True: 722, False: 0]
  |  Branch (794:39): [True: 706, False: 16]
  ------------------
  795|    706|        int start1 = container1->runs[i1].value;
  796|    706|        int stop1 = start1 + container1->runs[i1].length;
  797|    706|        int start2 = container2->runs[i2].value;
  798|    706|        int stop2 = start2 + container2->runs[i2].length;
  799|    706|        if (start1 < start2) {
  ------------------
  |  Branch (799:13): [True: 218, False: 488]
  ------------------
  800|    218|            return false;
  801|    488|        } else {  // start1 >= start2
  802|    488|            if (stop1 < stop2) {
  ------------------
  |  Branch (802:17): [True: 0, False: 488]
  ------------------
  803|      0|                i1++;
  804|    488|            } else if (stop1 == stop2) {
  ------------------
  |  Branch (804:24): [True: 0, False: 488]
  ------------------
  805|      0|                i1++;
  806|      0|                i2++;
  807|    488|            } else {  // stop1 > stop2
  808|    488|                i2++;
  809|    488|            }
  810|    488|        }
  811|    706|    }
  812|     16|    if (i1 == container1->n_runs) {
  ------------------
  |  Branch (812:9): [True: 0, False: 16]
  ------------------
  813|      0|        return true;
  814|     16|    } else {
  815|       |        return false;
  816|     16|    }
  817|     16|}
run_container_smart_append_exclusive:
  826|   403k|                                          const uint16_t length) {
  827|   403k|    int old_end;
  828|   403k|    rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
  ------------------
  |  Branch (828:25): [True: 330k, False: 72.9k]
  ------------------
  829|   403k|    rle16_t *appended_last_run = src->runs + src->n_runs;
  830|       |
  831|   403k|    if (!src->n_runs ||
  ------------------
  |  Branch (831:9): [True: 72.9k, False: 330k]
  ------------------
  832|   330k|        (start > (old_end = last_run->value + last_run->length + 1))) {
  ------------------
  |  Branch (832:9): [True: 181k, False: 148k]
  ------------------
  833|   254k|        *appended_last_run = CROARING_MAKE_RLE16(start, length);
  ------------------
  |  |   60|   254k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  834|   254k|        src->n_runs++;
  835|   254k|        return;
  836|   254k|    }
  837|   148k|    if (old_end == start) {
  ------------------
  |  Branch (837:9): [True: 4.06k, False: 144k]
  ------------------
  838|       |        // we merge
  839|  4.06k|        last_run->length += (length + 1);
  840|  4.06k|        return;
  841|  4.06k|    }
  842|   144k|    int new_end = start + length + 1;
  843|       |
  844|   144k|    if (start == last_run->value) {
  ------------------
  |  Branch (844:9): [True: 114k, False: 30.7k]
  ------------------
  845|       |        // wipe out previous
  846|   114k|        if (new_end < old_end) {
  ------------------
  |  Branch (846:13): [True: 7.07k, False: 107k]
  ------------------
  847|  7.07k|            *last_run = CROARING_MAKE_RLE16(new_end, old_end - new_end - 1);
  ------------------
  |  |   60|  7.07k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  848|  7.07k|            return;
  849|   107k|        } else if (new_end > old_end) {
  ------------------
  |  Branch (849:20): [True: 2.43k, False: 104k]
  ------------------
  850|  2.43k|            *last_run = CROARING_MAKE_RLE16(old_end, new_end - old_end - 1);
  ------------------
  |  |   60|  2.43k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  851|  2.43k|            return;
  852|   104k|        } else {
  853|   104k|            src->n_runs--;
  854|   104k|            return;
  855|   104k|        }
  856|   114k|    }
  857|  30.7k|    last_run->length = start - last_run->value - 1;
  858|  30.7k|    if (new_end < old_end) {
  ------------------
  |  Branch (858:9): [True: 24.4k, False: 6.25k]
  ------------------
  859|  24.4k|        *appended_last_run =
  860|  24.4k|            CROARING_MAKE_RLE16(new_end, old_end - new_end - 1);
  ------------------
  |  |   60|  24.4k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  861|  24.4k|        src->n_runs++;
  862|  24.4k|    } else if (new_end > old_end) {
  ------------------
  |  Branch (862:16): [True: 1.13k, False: 5.12k]
  ------------------
  863|  1.13k|        *appended_last_run =
  864|  1.13k|            CROARING_MAKE_RLE16(old_end, new_end - old_end - 1);
  ------------------
  |  |   60|  1.13k|    (rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
  ------------------
  865|  1.13k|        src->n_runs++;
  866|  1.13k|    }
  867|  30.7k|}
run_container_select:
  871|  2.64k|                          uint32_t *element) {
  872|  14.3k|    for (int i = 0; i < container->n_runs; i++) {
  ------------------
  |  Branch (872:21): [True: 14.2k, False: 95]
  ------------------
  873|  14.2k|        uint16_t length = container->runs[i].length;
  874|  14.2k|        if (rank <= *start_rank + length) {
  ------------------
  |  Branch (874:13): [True: 2.54k, False: 11.7k]
  ------------------
  875|  2.54k|            uint16_t value = container->runs[i].value;
  876|  2.54k|            *element = value + rank - (*start_rank);
  877|  2.54k|            return true;
  878|  2.54k|        } else
  879|  11.7k|            *start_rank += length + 1;
  880|  14.2k|    }
  881|     95|    return false;
  882|  2.64k|}
run_container_rank:
  884|  2.64k|int run_container_rank(const run_container_t *container, uint16_t x) {
  885|  2.64k|    int sum = 0;
  886|  2.64k|    uint32_t x32 = x;
  887|  11.6k|    for (int i = 0; i < container->n_runs; i++) {
  ------------------
  |  Branch (887:21): [True: 11.6k, False: 55]
  ------------------
  888|  11.6k|        uint32_t startpoint = container->runs[i].value;
  889|  11.6k|        uint32_t length = container->runs[i].length;
  890|  11.6k|        uint32_t endpoint = length + startpoint;
  891|  11.6k|        if (x <= endpoint) {
  ------------------
  |  Branch (891:13): [True: 2.58k, False: 9.02k]
  ------------------
  892|  2.58k|            if (x < startpoint) break;
  ------------------
  |  Branch (892:17): [True: 279, False: 2.30k]
  ------------------
  893|  2.30k|            return sum + (x32 - startpoint) + 1;
  894|  9.02k|        } else {
  895|  9.02k|            sum += length + 1;
  896|  9.02k|        }
  897|  11.6k|    }
  898|    334|    return sum;
  899|  2.64k|}
_avx2_run_container_to_uint32_array:
 1021|  60.7k|                                        uint32_t base) {
 1022|  60.7k|    int outpos = 0;
 1023|  60.7k|    uint32_t *out = (uint32_t *)vout;
 1024|       |
 1025|   211k|    for (int i = 0; i < cont->n_runs; ++i) {
  ------------------
  |  Branch (1025:21): [True: 150k, False: 60.7k]
  ------------------
 1026|   150k|        uint32_t run_start = base + cont->runs[i].value;
 1027|   150k|        uint16_t le = cont->runs[i].length;
 1028|   150k|        if (le < 8) {
  ------------------
  |  Branch (1028:13): [True: 85.9k, False: 64.5k]
  ------------------
 1029|   222k|            for (int j = 0; j <= le; ++j) {
  ------------------
  |  Branch (1029:29): [True: 136k, False: 85.9k]
  ------------------
 1030|   136k|                uint32_t val = run_start + j;
 1031|   136k|                memcpy(out + outpos, &val,
 1032|   136k|                       sizeof(uint32_t));  // should be compiled as a MOV on x64
 1033|   136k|                outpos++;
 1034|   136k|            }
 1035|  85.9k|        } else {
 1036|  64.5k|            int j = 0;
 1037|  64.5k|            __m256i run_start_v = _mm256_set1_epi32(run_start);
 1038|       |            // [8,8,8,8....]
 1039|  64.5k|            __m256i inc = _mm256_set1_epi32(8);
 1040|       |            // used for generate sequence:
 1041|       |            // [0, 1, 2, 3...], [8, 9, 10,...]
 1042|  64.5k|            __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
 1043|   482M|            for (j = 0; j + 8 <= le; j += 8) {
  ------------------
  |  Branch (1043:25): [True: 482M, False: 64.5k]
  ------------------
 1044|   482M|                __m256i val_v = _mm256_add_epi32(run_start_v, delta);
 1045|   482M|                _mm256_storeu_si256((__m256i *)(out + outpos), val_v);
 1046|   482M|                delta = _mm256_add_epi32(inc, delta);
 1047|   482M|                outpos += 8;
 1048|   482M|            }
 1049|   555k|            for (; j <= le; ++j) {
  ------------------
  |  Branch (1049:20): [True: 490k, False: 64.5k]
  ------------------
 1050|   490k|                uint32_t val = run_start + j;
 1051|   490k|                memcpy(out + outpos, &val,
 1052|   490k|                       sizeof(uint32_t));  // should be compiled as a MOV on x64
 1053|   490k|                outpos++;
 1054|   490k|            }
 1055|  64.5k|        }
 1056|   150k|    }
 1057|  60.7k|    return outpos;
 1058|  60.7k|}
run_container_cardinality:
 1077|   540k|int run_container_cardinality(const run_container_t *run) {
 1078|       |    // Empirically AVX-512 is not always faster than AVX2
 1079|       |#if CROARING_COMPILER_SUPPORTS_AVX512 && \
 1080|       |    CROARING_ENABLE_AVX512_RUN_CONTAINER_CARDINALITY
 1081|       |    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
 1082|       |        return _avx512_run_container_cardinality(run);
 1083|       |    } else
 1084|       |#endif
 1085|   540k|        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1085:13): [True: 540k, False: 0]
  ------------------
 1086|   540k|        return _avx2_run_container_cardinality(run);
 1087|   540k|    } else {
 1088|      0|        return _scalar_run_container_cardinality(run);
 1089|      0|    }
 1090|   540k|}
run_container_to_uint32_array:
 1111|  60.7k|                                  uint32_t base) {
 1112|  60.7k|    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
  ------------------
  |  Branch (1112:9): [True: 60.7k, False: 0]
  ------------------
 1113|  60.7k|        return _avx2_run_container_to_uint32_array(vout, cont, base);
 1114|  60.7k|    } else {
 1115|      0|        return _scalar_run_container_to_uint32_array(vout, cont, base);
 1116|      0|    }
 1117|  60.7k|}
run.c:_avx2_run_container_cardinality:
  991|   540k|static inline int _avx2_run_container_cardinality(const run_container_t *run) {
  992|   540k|    const int32_t n_runs = run->n_runs;
  993|   540k|    const rle16_t *runs = run->runs;
  994|       |
  995|       |    /* by initializing with n_runs, we omit counting the +1 for each pair. */
  996|   540k|    int sum = n_runs;
  997|   540k|    int32_t k = 0;
  998|   540k|    const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
  999|   540k|    if (n_runs > step) {
  ------------------
  |  Branch (999:9): [True: 34.1k, False: 506k]
  ------------------
 1000|  34.1k|        __m256i total = _mm256_setzero_si256();
 1001|   385k|        for (; k + step <= n_runs; k += step) {
  ------------------
  |  Branch (1001:16): [True: 350k, False: 34.1k]
  ------------------
 1002|   350k|            __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
 1003|   350k|            __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
 1004|   350k|            total = _mm256_add_epi32(total, justlengths);
 1005|   350k|        }
 1006|       |        // a store might be faster than extract?
 1007|  34.1k|        uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
 1008|  34.1k|        _mm256_storeu_si256((__m256i *)buffer, total);
 1009|  34.1k|        sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
 1010|  34.1k|               (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
 1011|  34.1k|    }
 1012|  1.12M|    for (; k < n_runs; ++k) {
  ------------------
  |  Branch (1012:12): [True: 588k, False: 540k]
  ------------------
 1013|   588k|        sum += runs[k].length;
 1014|   588k|    }
 1015|       |
 1016|   540k|    return sum;
 1017|   540k|}

croaring_hardware_support:
  321|   964k|int croaring_hardware_support(void) {
  322|   964k|    static
  323|   964k|#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
  324|   964k|        _Atomic
  325|   964k|#endif
  326|   964k|        int support = 0xFFFFFFF;
  327|   964k|    if (support == 0xFFFFFFF) {
  ------------------
  |  Branch (327:9): [True: 1, False: 964k]
  ------------------
  328|      1|        bool has_avx2 = (croaring_detect_supported_architectures() &
  329|      1|                         CROARING_AVX2) == CROARING_AVX2;
  330|      1|        bool has_avx512 = false;
  331|      1|#if CROARING_COMPILER_SUPPORTS_AVX512
  332|      1|        has_avx512 = (croaring_detect_supported_architectures() &
  333|      1|                      CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED;
  334|      1|#endif  // CROARING_COMPILER_SUPPORTS_AVX512
  335|      1|        support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) |
  ------------------
  |  Branch (335:20): [True: 1, False: 0]
  ------------------
  336|      1|                  (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0);
  ------------------
  |  Branch (336:20): [True: 0, False: 1]
  ------------------
  337|      1|    }
  338|   964k|    return support;
  339|   964k|}
isadetection.c:croaring_detect_supported_architectures:
  269|      2|static uint32_t croaring_detect_supported_architectures(void) {
  270|       |    // we use an atomic for thread safety
  271|      2|    static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
  272|      2|    if (buffer == CROARING_UNINITIALIZED) {
  ------------------
  |  Branch (272:9): [True: 1, False: 1]
  ------------------
  273|       |        // atomicity is sufficient
  274|      1|        buffer = dynamic_croaring_detect_supported_architectures();
  275|      1|    }
  276|      2|    return buffer;
  277|      2|}
isadetection.c:dynamic_croaring_detect_supported_architectures:
  154|      1|static inline uint32_t dynamic_croaring_detect_supported_architectures(void) {
  155|      1|    uint32_t eax, ebx, ecx, edx;
  156|      1|    uint32_t host_isa = 0x0;
  157|       |    // Can be found on Intel ISA Reference for CPUID
  158|      1|    static uint32_t cpuid_avx2_bit =
  159|      1|        1 << 5;  ///< @private Bit 5 of EBX for EAX=0x7
  160|      1|    static uint32_t cpuid_bmi1_bit =
  161|      1|        1 << 3;  ///< @private bit 3 of EBX for EAX=0x7
  162|      1|    static uint32_t cpuid_bmi2_bit =
  163|      1|        1 << 8;  ///< @private bit 8 of EBX for EAX=0x7
  164|      1|    static uint32_t cpuid_avx512f_bit =
  165|      1|        1 << 16;  ///< @private bit 16 of EBX for EAX=0x7
  166|      1|    static uint32_t cpuid_avx512dq_bit =
  167|      1|        1 << 17;  ///< @private bit 17 of EBX for EAX=0x7
  168|      1|    static uint32_t cpuid_avx512bw_bit =
  169|      1|        1 << 30;  ///< @private bit 30 of EBX for EAX=0x7
  170|      1|    static uint32_t cpuid_avx512vbmi2_bit =
  171|      1|        1 << 6;  ///< @private bit 6 of ECX for EAX=0x7
  172|      1|    static uint32_t cpuid_avx512bitalg_bit =
  173|      1|        1 << 12;  ///< @private bit 12 of ECX for EAX=0x7
  174|      1|    static uint32_t cpuid_avx512vpopcntdq_bit =
  175|      1|        1 << 14;  ///< @private bit 14 of ECX for EAX=0x7
  176|      1|    static uint64_t cpuid_avx256_saved = 1 << 2;  ///< @private bit 2 = AVX
  177|      1|    static uint64_t cpuid_avx512_saved =
  178|      1|        7 << 5;  ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
  179|      1|    static uint32_t cpuid_sse42_bit =
  180|      1|        1 << 20;  ///< @private bit 20 of ECX for EAX=0x1
  181|      1|    static uint32_t cpuid_osxsave =
  182|      1|        (1 << 26) | (1 << 27);  ///< @private bits 26+27 of ECX for EAX=0x1
  183|      1|    static uint32_t cpuid_pclmulqdq_bit =
  184|      1|        1 << 1;  ///< @private bit  1 of ECX for EAX=0x1
  185|       |
  186|       |    // EBX for EAX=0x1
  187|      1|    eax = 0x1;
  188|      1|    ecx = 0x0;
  189|      1|    cpuid(&eax, &ebx, &ecx, &edx);
  190|       |
  191|      1|    if (ecx & cpuid_sse42_bit) {
  ------------------
  |  Branch (191:9): [True: 1, False: 0]
  ------------------
  192|      1|        host_isa |= CROARING_SSE42;
  193|      1|    } else {
  194|      0|        return host_isa;  // everything after is redundant
  195|      0|    }
  196|       |
  197|      1|    if (ecx & cpuid_pclmulqdq_bit) {
  ------------------
  |  Branch (197:9): [True: 1, False: 0]
  ------------------
  198|      1|        host_isa |= CROARING_PCLMULQDQ;
  199|      1|    }
  200|       |
  201|      1|    if ((ecx & cpuid_osxsave) != cpuid_osxsave) {
  ------------------
  |  Branch (201:9): [True: 0, False: 1]
  ------------------
  202|      0|        return host_isa;
  203|      0|    }
  204|       |
  205|       |    // xgetbv for checking if the OS saves registers
  206|      1|    uint64_t xcr0 = xgetbv();
  207|       |
  208|      1|    if ((xcr0 & cpuid_avx256_saved) == 0) {
  ------------------
  |  Branch (208:9): [True: 0, False: 1]
  ------------------
  209|      0|        return host_isa;
  210|      0|    }
  211|       |
  212|       |    // ECX for EAX=0x7
  213|      1|    eax = 0x7;
  214|      1|    ecx = 0x0;
  215|      1|    cpuid(&eax, &ebx, &ecx, &edx);
  216|      1|    if (ebx & cpuid_avx2_bit) {
  ------------------
  |  Branch (216:9): [True: 1, False: 0]
  ------------------
  217|      1|        host_isa |= CROARING_AVX2;
  218|      1|    }
  219|      1|    if (ebx & cpuid_bmi1_bit) {
  ------------------
  |  Branch (219:9): [True: 1, False: 0]
  ------------------
  220|      1|        host_isa |= CROARING_BMI1;
  221|      1|    }
  222|       |
  223|      1|    if (ebx & cpuid_bmi2_bit) {
  ------------------
  |  Branch (223:9): [True: 1, False: 0]
  ------------------
  224|      1|        host_isa |= CROARING_BMI2;
  225|      1|    }
  226|       |
  227|      1|    if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) {
  ------------------
  |  Branch (227:9): [True: 1, False: 0]
  ------------------
  228|      1|        return host_isa;
  229|      1|    }
  230|       |
  231|      0|    if (ebx & cpuid_avx512f_bit) {
  ------------------
  |  Branch (231:9): [True: 0, False: 0]
  ------------------
  232|      0|        host_isa |= CROARING_AVX512F;
  233|      0|    }
  234|       |
  235|      0|    if (ebx & cpuid_avx512bw_bit) {
  ------------------
  |  Branch (235:9): [True: 0, False: 0]
  ------------------
  236|      0|        host_isa |= CROARING_AVX512BW;
  237|      0|    }
  238|       |
  239|      0|    if (ebx & cpuid_avx512dq_bit) {
  ------------------
  |  Branch (239:9): [True: 0, False: 0]
  ------------------
  240|      0|        host_isa |= CROARING_AVX512DQ;
  241|      0|    }
  242|       |
  243|      0|    if (ecx & cpuid_avx512vbmi2_bit) {
  ------------------
  |  Branch (243:9): [True: 0, False: 0]
  ------------------
  244|      0|        host_isa |= CROARING_AVX512VBMI2;
  245|      0|    }
  246|       |
  247|      0|    if (ecx & cpuid_avx512bitalg_bit) {
  ------------------
  |  Branch (247:9): [True: 0, False: 0]
  ------------------
  248|      0|        host_isa |= CROARING_AVX512BITALG;
  249|      0|    }
  250|       |
  251|      0|    if (ecx & cpuid_avx512vpopcntdq_bit) {
  ------------------
  |  Branch (251:9): [True: 0, False: 0]
  ------------------
  252|      0|        host_isa |= CROARING_AVX512VPOPCNTDQ;
  253|      0|    }
  254|       |
  255|      0|    return host_isa;
  256|      1|}
isadetection.c:cpuid:
  115|      2|                         uint32_t *edx) {
  116|       |#if CROARING_REGULAR_VISUAL_STUDIO
  117|       |    int cpu_info[4];
  118|       |    __cpuidex(cpu_info, *eax, *ecx);
  119|       |    *eax = cpu_info[0];
  120|       |    *ebx = cpu_info[1];
  121|       |    *ecx = cpu_info[2];
  122|       |    *edx = cpu_info[3];
  123|       |#elif (defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)) || \
  124|       |    defined(__FILC__)
  125|       |    uint32_t level = *eax;
  126|       |    __get_cpuid(level, eax, ebx, ecx, edx);
  127|       |#else
  128|      2|    uint32_t a = *eax, b, c = *ecx, d;
  129|      2|    __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
  130|      2|    *eax = a;
  131|      2|    *ebx = b;
  132|      2|    *ecx = c;
  133|      2|    *edx = d;
  134|      2|#endif
  135|      2|}
isadetection.c:xgetbv:
  137|      1|static inline uint64_t xgetbv(void) {
  138|       |#if defined(_MSC_VER)
  139|       |    return _xgetbv(0);
  140|       |#elif defined(__FILC__)
  141|       |    return zxgetbv();
  142|       |#else
  143|      1|    uint32_t xcr0_lo, xcr0_hi;
  144|      1|    __asm__("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0));
  145|      1|    return xcr0_lo | ((uint64_t)xcr0_hi << 32);
  146|      1|#endif
  147|      1|}

roaring_malloc:
   48|  1.42M|void* roaring_malloc(size_t n) { return global_memory_hook.malloc(n); }
roaring_realloc:
   50|   120k|void* roaring_realloc(void* p, size_t new_sz) {
   51|   120k|    return global_memory_hook.realloc(p, new_sz);
   52|   120k|}
roaring_free:
   58|  1.67M|void roaring_free(void* p) { global_memory_hook.free(p); }
roaring_aligned_malloc:
   60|   296k|void* roaring_aligned_malloc(size_t alignment, size_t size) {
   61|   296k|    return global_memory_hook.aligned_malloc(alignment, size);
   62|   296k|}
roaring_aligned_free:
   64|   296k|void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); }
memory.c:roaring_bitmap_aligned_malloc:
   11|   296k|static void* roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
   12|   296k|    void* p;
   13|       |#ifdef _MSC_VER
   14|       |    p = _aligned_malloc(size, alignment);
   15|       |#elif defined(__MINGW32__) || defined(__MINGW64__)
   16|       |    p = __mingw_aligned_malloc(size, alignment);
   17|       |#else
   18|       |    // somehow, if this is used before including "x86intrin.h", it creates an
   19|       |    // implicit defined warning.
   20|   296k|    if (posix_memalign(&p, alignment, size) != 0) return NULL;
  ------------------
  |  Branch (20:9): [True: 0, False: 296k]
  ------------------
   21|   296k|#endif
   22|   296k|    return p;
   23|   296k|}
memory.c:roaring_bitmap_aligned_free:
   25|   296k|static void roaring_bitmap_aligned_free(void* memblock) {
   26|       |#ifdef _MSC_VER
   27|       |    _aligned_free(memblock);
   28|       |#elif defined(__MINGW32__) || defined(__MINGW64__)
   29|       |    __mingw_aligned_free(memblock);
   30|       |#else
   31|   296k|    free(memblock);
   32|   296k|#endif
   33|   296k|}

roaring_bitmap_create_with_capacity:
   86|  25.9k|roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
   87|  25.9k|    roaring_bitmap_t *ans =
   88|  25.9k|        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
   89|  25.9k|    if (!ans) {
  ------------------
  |  Branch (89:9): [True: 0, False: 25.9k]
  ------------------
   90|      0|        return NULL;
   91|      0|    }
   92|  25.9k|    bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
   93|  25.9k|    if (!is_ok) {
  ------------------
  |  Branch (93:9): [True: 0, False: 25.9k]
  ------------------
   94|      0|        roaring_free(ans);
   95|      0|        return NULL;
   96|      0|    }
   97|  25.9k|    return ans;
   98|  25.9k|}
roaring_bitmap_init_with_capacity:
  100|  32.5k|bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
  101|  32.5k|    return ra_init_with_capacity(&r->high_low_container, cap);
  102|  32.5k|}
roaring_bitmap_add_many:
  135|  19.5k|                             const uint32_t *vals) {
  136|  19.5k|    uint32_t val;
  137|  19.5k|    const uint32_t *start = vals;
  138|  19.5k|    const uint32_t *end = vals + n_args;
  139|  19.5k|    const uint32_t *current_val = start;
  140|       |
  141|  19.5k|    if (n_args == 0) {
  ------------------
  |  Branch (141:9): [True: 0, False: 19.5k]
  ------------------
  142|      0|        return;
  143|      0|    }
  144|       |
  145|  19.5k|    uint8_t typecode;
  146|  19.5k|    int idx;
  147|  19.5k|    container_t *container;
  148|  19.5k|    val = *current_val;
  149|  19.5k|    container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
  150|  19.5k|    roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16),
  151|  19.5k|                                      typecode};
  152|       |
  153|  8.16M|    for (; current_val != end; current_val++) {
  ------------------
  |  Branch (153:12): [True: 8.14M, False: 19.5k]
  ------------------
  154|  8.14M|        memcpy(&val, current_val, sizeof(val));
  155|  8.14M|        add_bulk_impl(r, &context, val);
  156|  8.14M|    }
  157|  19.5k|}
roaring_bitmap_add_range_closed:
  252|  2.86k|                                     uint32_t max) {
  253|  2.86k|    if (min > max) {
  ------------------
  |  Branch (253:9): [True: 0, False: 2.86k]
  ------------------
  254|      0|        return;
  255|      0|    }
  256|       |
  257|  2.86k|    roaring_array_t *ra = &r->high_low_container;
  258|       |
  259|  2.86k|    uint32_t min_key = min >> 16;
  260|  2.86k|    uint32_t max_key = max >> 16;
  261|       |
  262|  2.86k|    int32_t num_required_containers = max_key - min_key + 1;
  263|  2.86k|    int32_t suffix_length =
  264|  2.86k|        count_greater(ra->keys, ra->size, (uint16_t)max_key);
  265|  2.86k|    int32_t prefix_length =
  266|  2.86k|        count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key);
  267|  2.86k|    int32_t common_length = ra->size - prefix_length - suffix_length;
  268|       |
  269|  2.86k|    if (num_required_containers > common_length) {
  ------------------
  |  Branch (269:9): [True: 1.32k, False: 1.54k]
  ------------------
  270|  1.32k|        ra_shift_tail(ra, suffix_length,
  271|  1.32k|                      num_required_containers - common_length);
  272|  1.32k|    }
  273|       |
  274|  2.86k|    int32_t src = prefix_length + common_length - 1;
  275|  2.86k|    int32_t dst = ra->size - suffix_length - 1;
  276|  80.6k|    for (uint32_t key = max_key; key != min_key - 1;
  ------------------
  |  Branch (276:34): [True: 77.8k, False: 2.86k]
  ------------------
  277|  77.8k|         key--) {  // beware of min_key==0
  278|  77.8k|        uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
  ------------------
  |  Branch (278:34): [True: 2.86k, False: 74.9k]
  ------------------
  279|  77.8k|        uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
  ------------------
  |  Branch (279:34): [True: 2.86k, False: 74.9k]
  ------------------
  280|  77.8k|        container_t *new_container;
  281|  77.8k|        uint8_t new_type;
  282|       |
  283|  77.8k|        if (src >= 0 && ra->keys[src] == key) {
  ------------------
  |  Branch (283:13): [True: 77.8k, False: 0]
  |  Branch (283:25): [True: 3.27k, False: 74.5k]
  ------------------
  284|  3.27k|            ra_unshare_container_at_index(ra, (uint16_t)src);
  285|  3.27k|            new_container =
  286|  3.27k|                container_add_range(ra->containers[src], ra->typecodes[src],
  287|  3.27k|                                    container_min, container_max, &new_type);
  288|  3.27k|            if (new_container != ra->containers[src]) {
  ------------------
  |  Branch (288:17): [True: 2.23k, False: 1.04k]
  ------------------
  289|  2.23k|                container_free(ra->containers[src], ra->typecodes[src]);
  290|  2.23k|            }
  291|  3.27k|            src--;
  292|  74.5k|        } else {
  293|  74.5k|            new_container = container_from_range(&new_type, container_min,
  294|  74.5k|                                                 container_max + 1, 1);
  295|  74.5k|        }
  296|  77.8k|        ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key,
  297|  77.8k|                                              new_container, new_type);
  298|  77.8k|        dst--;
  299|  77.8k|    }
  300|  2.86k|}
roaring_bitmap_remove_range_closed:
  303|  7.52k|                                        uint32_t max) {
  304|  7.52k|    if (min > max) {
  ------------------
  |  Branch (304:9): [True: 785, False: 6.74k]
  ------------------
  305|    785|        return;
  306|    785|    }
  307|       |
  308|  6.74k|    roaring_array_t *ra = &r->high_low_container;
  309|       |
  310|  6.74k|    uint32_t min_key = min >> 16;
  311|  6.74k|    uint32_t max_key = max >> 16;
  312|       |
  313|  6.74k|    int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key);
  314|  6.74k|    int32_t dst = src;
  315|  29.3k|    while (src < ra->size && ra->keys[src] <= max_key) {
  ------------------
  |  Branch (315:12): [True: 24.9k, False: 4.44k]
  |  Branch (315:30): [True: 22.6k, False: 2.29k]
  ------------------
  316|  22.6k|        uint32_t container_min =
  317|  22.6k|            (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
  ------------------
  |  Branch (317:13): [True: 6.24k, False: 16.3k]
  ------------------
  318|  22.6k|        uint32_t container_max =
  319|  22.6k|            (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
  ------------------
  |  Branch (319:13): [True: 5.96k, False: 16.6k]
  ------------------
  320|  22.6k|        ra_unshare_container_at_index(ra, (uint16_t)src);
  321|  22.6k|        container_t *new_container;
  322|  22.6k|        uint8_t new_type;
  323|  22.6k|        new_container =
  324|  22.6k|            container_remove_range(ra->containers[src], ra->typecodes[src],
  325|  22.6k|                                   container_min, container_max, &new_type);
  326|  22.6k|        if (new_container != ra->containers[src]) {
  ------------------
  |  Branch (326:13): [True: 16.4k, False: 6.20k]
  ------------------
  327|  16.4k|            container_free(ra->containers[src], ra->typecodes[src]);
  328|  16.4k|        }
  329|  22.6k|        if (new_container) {
  ------------------
  |  Branch (329:13): [True: 6.49k, False: 16.1k]
  ------------------
  330|  6.49k|            ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
  331|  6.49k|                                                  new_container, new_type);
  332|  6.49k|            dst++;
  333|  6.49k|        }
  334|  22.6k|        src++;
  335|  22.6k|    }
  336|  6.74k|    if (src > dst) {
  ------------------
  |  Branch (336:9): [True: 713, False: 6.02k]
  ------------------
  337|    713|        ra_shift_tail(ra, ra->size - src, dst - src);
  338|    713|    }
  339|  6.74k|}
roaring_bitmap_copy:
  525|    126|roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
  526|    126|    roaring_bitmap_t *ans =
  527|    126|        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
  528|    126|    if (!ans) {
  ------------------
  |  Branch (528:9): [True: 0, False: 126]
  ------------------
  529|      0|        return NULL;
  530|      0|    }
  531|    126|    if (!ra_init_with_capacity(  // allocation of list of containers can fail
  ------------------
  |  Branch (531:9): [True: 0, False: 126]
  ------------------
  532|    126|            &ans->high_low_container, r->high_low_container.size)) {
  533|      0|        roaring_free(ans);
  534|      0|        return NULL;
  535|      0|    }
  536|    126|    if (!ra_overwrite(  // memory allocation of individual containers may fail
  ------------------
  |  Branch (536:9): [True: 0, False: 126]
  ------------------
  537|    126|            &r->high_low_container, &ans->high_low_container, is_cow(r))) {
  538|      0|        roaring_bitmap_free(ans);  // overwrite should leave in freeable state
  539|      0|        return NULL;
  540|      0|    }
  541|    126|    roaring_bitmap_set_copy_on_write(ans, is_cow(r));
  542|    126|    return ans;
  543|    126|}
roaring_bitmap_overwrite:
  546|  14.5k|                              const roaring_bitmap_t *src) {
  547|  14.5k|    roaring_bitmap_set_copy_on_write(dest, is_cow(src));
  548|  14.5k|    return ra_overwrite(&src->high_low_container, &dest->high_low_container,
  549|  14.5k|                        is_cow(src));
  550|  14.5k|}
roaring_bitmap_clear:
  562|  65.1k|void roaring_bitmap_clear(roaring_bitmap_t *r) {
  563|  65.1k|    ra_reset(&r->high_low_container);
  564|  65.1k|}
roaring_bitmap_add:
  566|  6.51k|void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
  567|  6.51k|    roaring_array_t *ra = &r->high_low_container;
  568|       |
  569|  6.51k|    const uint16_t hb = val >> 16;
  570|  6.51k|    const int i = ra_get_index(ra, hb);
  571|  6.51k|    uint8_t typecode;
  572|  6.51k|    if (i >= 0) {
  ------------------
  |  Branch (572:9): [True: 4.70k, False: 1.80k]
  ------------------
  573|  4.70k|        ra_unshare_container_at_index(ra, (uint16_t)i);
  574|  4.70k|        container_t *container =
  575|  4.70k|            ra_get_container_at_index(ra, (uint16_t)i, &typecode);
  576|  4.70k|        uint8_t newtypecode = typecode;
  577|  4.70k|        container_t *container2 =
  578|  4.70k|            container_add(container, val & 0xFFFF, typecode, &newtypecode);
  579|  4.70k|        if (container2 != container) {
  ------------------
  |  Branch (579:13): [True: 0, False: 4.70k]
  ------------------
  580|      0|            container_free(container, typecode);
  581|      0|            ra_set_container_at_index(&r->high_low_container, i, container2,
  582|      0|                                      newtypecode);
  583|      0|        }
  584|  4.70k|    } else {
  585|  1.80k|        array_container_t *newac = array_container_create();
  586|  1.80k|        container_t *container =
  587|  1.80k|            container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
  ------------------
  |  |   49|  1.80k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  588|       |        // we could just assume that it stays an array container
  589|  1.80k|        ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
  590|  1.80k|                                   container, typecode);
  591|  1.80k|    }
  592|  6.51k|}
roaring_bitmap_add_checked:
  594|  6.51k|bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
  595|  6.51k|    const uint16_t hb = val >> 16;
  596|  6.51k|    const int i = ra_get_index(&r->high_low_container, hb);
  597|  6.51k|    uint8_t typecode;
  598|  6.51k|    bool result = false;
  599|  6.51k|    if (i >= 0) {
  ------------------
  |  Branch (599:9): [True: 4.89k, False: 1.62k]
  ------------------
  600|  4.89k|        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
  601|  4.89k|        container_t *container = ra_get_container_at_index(
  602|  4.89k|            &r->high_low_container, (uint16_t)i, &typecode);
  603|       |
  604|  4.89k|        const int oldCardinality =
  605|  4.89k|            container_get_cardinality(container, typecode);
  606|       |
  607|  4.89k|        uint8_t newtypecode = typecode;
  608|  4.89k|        container_t *container2 =
  609|  4.89k|            container_add(container, val & 0xFFFF, typecode, &newtypecode);
  610|  4.89k|        if (container2 != container) {
  ------------------
  |  Branch (610:13): [True: 0, False: 4.89k]
  ------------------
  611|      0|            container_free(container, typecode);
  612|      0|            ra_set_container_at_index(&r->high_low_container, i, container2,
  613|      0|                                      newtypecode);
  614|      0|            result = true;
  615|  4.89k|        } else {
  616|  4.89k|            const int newCardinality =
  617|  4.89k|                container_get_cardinality(container, newtypecode);
  618|       |
  619|  4.89k|            result = oldCardinality != newCardinality;
  620|  4.89k|        }
  621|  4.89k|    } else {
  622|  1.62k|        array_container_t *newac = array_container_create();
  623|  1.62k|        container_t *container =
  624|  1.62k|            container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
  ------------------
  |  |   49|  1.62k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  625|       |        // we could just assume that it stays an array container
  626|  1.62k|        ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
  627|  1.62k|                                   container, typecode);
  628|  1.62k|        result = true;
  629|  1.62k|    }
  630|       |
  631|  6.51k|    return result;
  632|  6.51k|}
roaring_bitmap_remove:
  634|  6.51k|void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
  635|  6.51k|    const uint16_t hb = val >> 16;
  636|  6.51k|    const int i = ra_get_index(&r->high_low_container, hb);
  637|  6.51k|    uint8_t typecode;
  638|  6.51k|    if (i >= 0) {
  ------------------
  |  Branch (638:9): [True: 5.81k, False: 701]
  ------------------
  639|  5.81k|        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
  640|  5.81k|        container_t *container = ra_get_container_at_index(
  641|  5.81k|            &r->high_low_container, (uint16_t)i, &typecode);
  642|  5.81k|        uint8_t newtypecode = typecode;
  643|  5.81k|        container_t *container2 =
  644|  5.81k|            container_remove(container, val & 0xFFFF, typecode, &newtypecode);
  645|  5.81k|        if (container2 != container) {
  ------------------
  |  Branch (645:13): [True: 13, False: 5.80k]
  ------------------
  646|     13|            container_free(container, typecode);
  647|     13|            ra_set_container_at_index(&r->high_low_container, i, container2,
  648|     13|                                      newtypecode);
  649|     13|        }
  650|  5.81k|        if (container_nonzero_cardinality(container2, newtypecode)) {
  ------------------
  |  Branch (650:13): [True: 5.76k, False: 51]
  ------------------
  651|  5.76k|            ra_set_container_at_index(&r->high_low_container, i, container2,
  652|  5.76k|                                      newtypecode);
  653|  5.76k|        } else {
  654|     51|            ra_remove_at_index_and_free(&r->high_low_container, i);
  655|     51|        }
  656|  5.81k|    }
  657|  6.51k|}
roaring_bitmap_remove_checked:
  659|  6.51k|bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
  660|  6.51k|    const uint16_t hb = val >> 16;
  661|  6.51k|    const int i = ra_get_index(&r->high_low_container, hb);
  662|  6.51k|    uint8_t typecode;
  663|  6.51k|    bool result = false;
  664|  6.51k|    if (i >= 0) {
  ------------------
  |  Branch (664:9): [True: 5.85k, False: 666]
  ------------------
  665|  5.85k|        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
  666|  5.85k|        container_t *container = ra_get_container_at_index(
  667|  5.85k|            &r->high_low_container, (uint16_t)i, &typecode);
  668|       |
  669|  5.85k|        const int oldCardinality =
  670|  5.85k|            container_get_cardinality(container, typecode);
  671|       |
  672|  5.85k|        uint8_t newtypecode = typecode;
  673|  5.85k|        container_t *container2 =
  674|  5.85k|            container_remove(container, val & 0xFFFF, typecode, &newtypecode);
  675|  5.85k|        if (container2 != container) {
  ------------------
  |  Branch (675:13): [True: 4, False: 5.84k]
  ------------------
  676|      4|            container_free(container, typecode);
  677|      4|            ra_set_container_at_index(&r->high_low_container, i, container2,
  678|      4|                                      newtypecode);
  679|      4|        }
  680|       |
  681|  5.85k|        const int newCardinality =
  682|  5.85k|            container_get_cardinality(container2, newtypecode);
  683|       |
  684|  5.85k|        if (newCardinality != 0) {
  ------------------
  |  Branch (684:13): [True: 5.83k, False: 11]
  ------------------
  685|  5.83k|            ra_set_container_at_index(&r->high_low_container, i, container2,
  686|  5.83k|                                      newtypecode);
  687|  5.83k|        } else {
  688|     11|            ra_remove_at_index_and_free(&r->high_low_container, i);
  689|     11|        }
  690|       |
  691|  5.85k|        result = oldCardinality != newCardinality;
  692|  5.85k|    }
  693|  6.51k|    return result;
  694|  6.51k|}
roaring_bitmap_and:
  732|  6.51k|                                     const roaring_bitmap_t *x2) {
  733|  6.51k|    uint8_t result_type = 0;
  734|  6.51k|    const int length1 = x1->high_low_container.size,
  735|  6.51k|              length2 = x2->high_low_container.size;
  736|  6.51k|    uint32_t neededcap = length1 > length2 ? length2 : length1;
  ------------------
  |  Branch (736:26): [True: 42, False: 6.47k]
  ------------------
  737|  6.51k|    roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
  738|  6.51k|    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
  ------------------
  |  Branch (738:46): [True: 0, False: 6.51k]
  |  Branch (738:60): [True: 0, False: 6.51k]
  ------------------
  739|       |
  740|  6.51k|    int pos1 = 0, pos2 = 0;
  741|       |
  742|  12.9k|    while (pos1 < length1 && pos2 < length2) {
  ------------------
  |  Branch (742:12): [True: 6.51k, False: 6.47k]
  |  Branch (742:30): [True: 6.47k, False: 42]
  ------------------
  743|  6.47k|        const uint16_t s1 =
  744|  6.47k|            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  745|  6.47k|        const uint16_t s2 =
  746|  6.47k|            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  747|       |
  748|  6.47k|        if (s1 == s2) {
  ------------------
  |  Branch (748:13): [True: 6.40k, False: 72]
  ------------------
  749|  6.40k|            uint8_t type1, type2;
  750|  6.40k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
  751|  6.40k|                                                        (uint16_t)pos1, &type1);
  752|  6.40k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
  753|  6.40k|                                                        (uint16_t)pos2, &type2);
  754|  6.40k|            container_t *c = container_and(c1, type1, c2, type2, &result_type);
  755|       |
  756|  6.40k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (756:17): [True: 6.06k, False: 335]
  ------------------
  757|  6.06k|                ra_append(&answer->high_low_container, s1, c, result_type);
  758|  6.06k|            } else {
  759|    335|                container_free(c, result_type);  // otherwise: memory leak!
  760|    335|            }
  761|  6.40k|            ++pos1;
  762|  6.40k|            ++pos2;
  763|  6.40k|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (763:20): [True: 72, False: 0]
  ------------------
  764|     72|            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
  765|     72|        } else {  // s1 > s2
  766|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
  767|      0|        }
  768|  6.47k|    }
  769|  6.51k|    return answer;
  770|  6.51k|}
roaring_bitmap_and_inplace:
  813|  6.51k|                                const roaring_bitmap_t *x2) {
  814|  6.51k|    if (x1 == x2) return;
  ------------------
  |  Branch (814:9): [True: 0, False: 6.51k]
  ------------------
  815|  6.51k|    int pos1 = 0, pos2 = 0, intersection_size = 0;
  816|  6.51k|    const int length1 = ra_get_size(&x1->high_low_container);
  817|  6.51k|    const int length2 = ra_get_size(&x2->high_low_container);
  818|       |
  819|       |    // any skipped-over or newly emptied containers in x1
  820|       |    // have to be freed.
  821|  74.9k|    while (pos1 < length1 && pos2 < length2) {
  ------------------
  |  Branch (821:12): [True: 68.4k, False: 6.47k]
  |  Branch (821:30): [True: 68.3k, False: 42]
  ------------------
  822|  68.3k|        const uint16_t s1 =
  823|  68.3k|            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  824|  68.3k|        const uint16_t s2 =
  825|  68.3k|            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  826|       |
  827|  68.3k|        if (s1 == s2) {
  ------------------
  |  Branch (827:13): [True: 68.3k, False: 72]
  ------------------
  828|  68.3k|            uint8_t type1, type2, result_type;
  829|  68.3k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
  830|  68.3k|                                                        (uint16_t)pos1, &type1);
  831|  68.3k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
  832|  68.3k|                                                        (uint16_t)pos2, &type2);
  833|       |
  834|       |            // We do the computation "in place" only when c1 is not a shared
  835|       |            // container. Rationale: using a shared container safely with in
  836|       |            // place computation would require making a copy and then doing the
  837|       |            // computation in place which is likely less efficient than avoiding
  838|       |            // in place entirely and always generating a new container.
  839|  68.3k|            container_t *c =
  840|  68.3k|                (type1 == SHARED_CONTAINER_TYPE)
  ------------------
  |  |   51|  68.3k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (840:17): [True: 0, False: 68.3k]
  ------------------
  841|  68.3k|                    ? container_and(c1, type1, c2, type2, &result_type)
  842|  68.3k|                    : container_iand(c1, type1, c2, type2, &result_type);
  843|       |
  844|  68.3k|            if (c != c1) {  // in this instance a new container was created, and
  ------------------
  |  Branch (844:17): [True: 61.8k, False: 6.46k]
  ------------------
  845|       |                            // we need to free the old one
  846|  61.8k|                container_free(c1, type1);
  847|  61.8k|            }
  848|  68.3k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (848:17): [True: 68.3k, False: 0]
  ------------------
  849|  68.3k|                ra_replace_key_and_container_at_index(&x1->high_low_container,
  850|  68.3k|                                                      intersection_size, s1, c,
  851|  68.3k|                                                      result_type);
  852|  68.3k|                intersection_size++;
  853|  68.3k|            } else {
  854|      0|                container_free(c, result_type);
  855|      0|            }
  856|  68.3k|            ++pos1;
  857|  68.3k|            ++pos2;
  858|  68.3k|        } else if (s1 < s2) {
  ------------------
  |  Branch (858:20): [True: 72, False: 0]
  ------------------
  859|     72|            pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
  860|     72|        } else {  // s1 > s2
  861|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
  862|      0|        }
  863|  68.3k|    }
  864|       |
  865|       |    // if we ended early because x2 ran out, then all remaining in x1 should be
  866|       |    // freed
  867|  6.55k|    while (pos1 < length1) {
  ------------------
  |  Branch (867:12): [True: 42, False: 6.51k]
  ------------------
  868|     42|        container_free(x1->high_low_container.containers[pos1],
  869|     42|                       x1->high_low_container.typecodes[pos1]);
  870|     42|        ++pos1;
  871|     42|    }
  872|       |
  873|       |    // all containers after this have either been copied or freed
  874|  6.51k|    ra_downsize(&x1->high_low_container, intersection_size);
  875|  6.51k|}
roaring_bitmap_or:
  878|  6.51k|                                    const roaring_bitmap_t *x2) {
  879|  6.51k|    uint8_t result_type = 0;
  880|  6.51k|    const int length1 = x1->high_low_container.size,
  881|  6.51k|              length2 = x2->high_low_container.size;
  882|  6.51k|    if (0 == length1) {
  ------------------
  |  Branch (882:9): [True: 0, False: 6.51k]
  ------------------
  883|      0|        return roaring_bitmap_copy(x2);
  884|      0|    }
  885|  6.51k|    if (0 == length2) {
  ------------------
  |  Branch (885:9): [True: 42, False: 6.47k]
  ------------------
  886|     42|        return roaring_bitmap_copy(x1);
  887|     42|    }
  888|  6.47k|    roaring_bitmap_t *answer =
  889|  6.47k|        roaring_bitmap_create_with_capacity(length1 + length2);
  890|  6.47k|    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
  ------------------
  |  Branch (890:46): [True: 0, False: 6.47k]
  |  Branch (890:60): [True: 0, False: 6.47k]
  ------------------
  891|  6.47k|    int pos1 = 0, pos2 = 0;
  892|  6.47k|    uint8_t type1, type2;
  893|  6.47k|    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  894|  6.47k|    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  895|  6.47k|    while (true) {
  ------------------
  |  Branch (895:12): [True: 6.47k, Folded]
  ------------------
  896|  6.47k|        if (s1 == s2) {
  ------------------
  |  Branch (896:13): [True: 6.40k, False: 72]
  ------------------
  897|  6.40k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
  898|  6.40k|                                                        (uint16_t)pos1, &type1);
  899|  6.40k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
  900|  6.40k|                                                        (uint16_t)pos2, &type2);
  901|  6.40k|            container_t *c = container_or(c1, type1, c2, type2, &result_type);
  902|       |
  903|       |            // since we assume that the initial containers are non-empty, the
  904|       |            // result here
  905|       |            // can only be non-empty
  906|  6.40k|            ra_append(&answer->high_low_container, s1, c, result_type);
  907|  6.40k|            ++pos1;
  908|  6.40k|            ++pos2;
  909|  6.40k|            if (pos1 == length1) break;
  ------------------
  |  Branch (909:17): [True: 6.40k, False: 0]
  ------------------
  910|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (910:17): [True: 0, False: 0]
  ------------------
  911|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  912|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  913|       |
  914|     72|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (914:20): [True: 72, False: 0]
  ------------------
  915|     72|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
  916|     72|                                                        (uint16_t)pos1, &type1);
  917|       |            // c1 = container_clone(c1, type1);
  918|     72|            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
  919|     72|            if (is_cow(x1)) {
  ------------------
  |  Branch (919:17): [True: 0, False: 72]
  ------------------
  920|      0|                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
  921|      0|                                          type1);
  922|      0|            }
  923|     72|            ra_append(&answer->high_low_container, s1, c1, type1);
  924|     72|            pos1++;
  925|     72|            if (pos1 == length1) break;
  ------------------
  |  Branch (925:17): [True: 72, False: 0]
  ------------------
  926|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  927|       |
  928|      0|        } else {  // s1 > s2
  929|      0|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
  930|      0|                                                        (uint16_t)pos2, &type2);
  931|       |            // c2 = container_clone(c2, type2);
  932|      0|            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
  933|      0|            if (is_cow(x2)) {
  ------------------
  |  Branch (933:17): [True: 0, False: 0]
  ------------------
  934|      0|                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
  935|      0|                                          type2);
  936|      0|            }
  937|      0|            ra_append(&answer->high_low_container, s2, c2, type2);
  938|      0|            pos2++;
  939|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (939:17): [True: 0, False: 0]
  ------------------
  940|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  941|      0|        }
  942|  6.47k|    }
  943|  6.47k|    if (pos1 == length1) {
  ------------------
  |  Branch (943:9): [True: 6.47k, False: 0]
  ------------------
  944|  6.47k|        ra_append_copy_range(&answer->high_low_container,
  945|  6.47k|                             &x2->high_low_container, pos2, length2,
  946|  6.47k|                             is_cow(x2));
  947|  6.47k|    } else if (pos2 == length2) {
  ------------------
  |  Branch (947:16): [True: 0, False: 0]
  ------------------
  948|      0|        ra_append_copy_range(&answer->high_low_container,
  949|      0|                             &x1->high_low_container, pos1, length1,
  950|      0|                             is_cow(x1));
  951|      0|    }
  952|  6.47k|    return answer;
  953|  6.51k|}
roaring_bitmap_or_inplace:
  957|  6.51k|                               const roaring_bitmap_t *x2) {
  958|  6.51k|    uint8_t result_type = 0;
  959|  6.51k|    int length1 = x1->high_low_container.size;
  960|  6.51k|    const int length2 = x2->high_low_container.size;
  961|       |
  962|  6.51k|    if (0 == length2) return;
  ------------------
  |  Branch (962:9): [True: 0, False: 6.51k]
  ------------------
  963|       |
  964|  6.51k|    if (0 == length1) {
  ------------------
  |  Branch (964:9): [True: 0, False: 6.51k]
  ------------------
  965|      0|        roaring_bitmap_overwrite(x1, x2);
  966|      0|        return;
  967|      0|    }
  968|  6.51k|    int pos1 = 0, pos2 = 0;
  969|  6.51k|    uint8_t type1, type2;
  970|  6.51k|    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  971|  6.51k|    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  972|  6.51k|    while (true) {
  ------------------
  |  Branch (972:12): [True: 6.51k, Folded]
  ------------------
  973|  6.51k|        if (s1 == s2) {
  ------------------
  |  Branch (973:13): [True: 6.51k, False: 0]
  ------------------
  974|  6.51k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
  975|  6.51k|                                                        (uint16_t)pos1, &type1);
  976|  6.51k|            if (!container_is_full(c1, type1)) {
  ------------------
  |  Branch (976:17): [True: 6.51k, False: 0]
  ------------------
  977|  6.51k|                container_t *c2 = ra_get_container_at_index(
  978|  6.51k|                    &x2->high_low_container, (uint16_t)pos2, &type2);
  979|  6.51k|                container_t *c =
  980|  6.51k|                    (type1 == SHARED_CONTAINER_TYPE)
  ------------------
  |  |   51|  6.51k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (980:21): [True: 0, False: 6.51k]
  ------------------
  981|  6.51k|                        ? container_or(c1, type1, c2, type2, &result_type)
  982|  6.51k|                        : container_ior(c1, type1, c2, type2, &result_type);
  983|       |
  984|  6.51k|                if (c != c1) {  // in this instance a new container was created,
  ------------------
  |  Branch (984:21): [True: 4.68k, False: 1.83k]
  ------------------
  985|       |                                // and we need to free the old one
  986|  4.68k|                    container_free(c1, type1);
  987|  4.68k|                }
  988|  6.51k|                ra_set_container_at_index(&x1->high_low_container, pos1, c,
  989|  6.51k|                                          result_type);
  990|  6.51k|            }
  991|  6.51k|            ++pos1;
  992|  6.51k|            ++pos2;
  993|  6.51k|            if (pos1 == length1) break;
  ------------------
  |  Branch (993:17): [True: 6.51k, False: 0]
  ------------------
  994|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (994:17): [True: 0, False: 0]
  ------------------
  995|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
  996|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
  997|       |
  998|      0|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (998:20): [True: 0, False: 0]
  ------------------
  999|      0|            pos1++;
 1000|      0|            if (pos1 == length1) break;
  ------------------
  |  Branch (1000:17): [True: 0, False: 0]
  ------------------
 1001|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1002|       |
 1003|      0|        } else {  // s1 > s2
 1004|      0|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1005|      0|                                                        (uint16_t)pos2, &type2);
 1006|      0|            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
 1007|      0|            if (is_cow(x2)) {
  ------------------
  |  Branch (1007:17): [True: 0, False: 0]
  ------------------
 1008|      0|                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
 1009|      0|                                          type2);
 1010|      0|            }
 1011|       |
 1012|       |            // container_t *c2_clone = container_clone(c2, type2);
 1013|      0|            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
 1014|      0|                                       type2);
 1015|      0|            pos1++;
 1016|      0|            length1++;
 1017|      0|            pos2++;
 1018|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1018:17): [True: 0, False: 0]
  ------------------
 1019|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1020|      0|        }
 1021|  6.51k|    }
 1022|  6.51k|    if (pos1 == length1) {
  ------------------
  |  Branch (1022:9): [True: 6.51k, False: 0]
  ------------------
 1023|  6.51k|        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
 1024|  6.51k|                             pos2, length2, is_cow(x2));
 1025|  6.51k|    }
 1026|  6.51k|}
roaring_bitmap_xor:
 1029|  6.51k|                                     const roaring_bitmap_t *x2) {
 1030|  6.51k|    uint8_t result_type = 0;
 1031|  6.51k|    const int length1 = x1->high_low_container.size,
 1032|  6.51k|              length2 = x2->high_low_container.size;
 1033|  6.51k|    if (0 == length1) {
  ------------------
  |  Branch (1033:9): [True: 0, False: 6.51k]
  ------------------
 1034|      0|        return roaring_bitmap_copy(x2);
 1035|      0|    }
 1036|  6.51k|    if (0 == length2) {
  ------------------
  |  Branch (1036:9): [True: 42, False: 6.47k]
  ------------------
 1037|     42|        return roaring_bitmap_copy(x1);
 1038|     42|    }
 1039|  6.47k|    roaring_bitmap_t *answer =
 1040|  6.47k|        roaring_bitmap_create_with_capacity(length1 + length2);
 1041|  6.47k|    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
  ------------------
  |  Branch (1041:46): [True: 0, False: 6.47k]
  |  Branch (1041:60): [True: 0, False: 6.47k]
  ------------------
 1042|  6.47k|    int pos1 = 0, pos2 = 0;
 1043|  6.47k|    uint8_t type1, type2;
 1044|  6.47k|    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1045|  6.47k|    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1046|  6.47k|    while (true) {
  ------------------
  |  Branch (1046:12): [True: 6.47k, Folded]
  ------------------
 1047|  6.47k|        if (s1 == s2) {
  ------------------
  |  Branch (1047:13): [True: 6.40k, False: 72]
  ------------------
 1048|  6.40k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 1049|  6.40k|                                                        (uint16_t)pos1, &type1);
 1050|  6.40k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1051|  6.40k|                                                        (uint16_t)pos2, &type2);
 1052|  6.40k|            container_t *c = container_xor(c1, type1, c2, type2, &result_type);
 1053|       |
 1054|  6.40k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (1054:17): [True: 6.38k, False: 19]
  ------------------
 1055|  6.38k|                ra_append(&answer->high_low_container, s1, c, result_type);
 1056|  6.38k|            } else {
 1057|     19|                container_free(c, result_type);
 1058|     19|            }
 1059|  6.40k|            ++pos1;
 1060|  6.40k|            ++pos2;
 1061|  6.40k|            if (pos1 == length1) break;
  ------------------
  |  Branch (1061:17): [True: 6.40k, False: 0]
  ------------------
 1062|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1062:17): [True: 0, False: 0]
  ------------------
 1063|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1064|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1065|       |
 1066|     72|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (1066:20): [True: 72, False: 0]
  ------------------
 1067|     72|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 1068|     72|                                                        (uint16_t)pos1, &type1);
 1069|     72|            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
 1070|     72|            if (is_cow(x1)) {
  ------------------
  |  Branch (1070:17): [True: 0, False: 72]
  ------------------
 1071|      0|                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
 1072|      0|                                          type1);
 1073|      0|            }
 1074|     72|            ra_append(&answer->high_low_container, s1, c1, type1);
 1075|     72|            pos1++;
 1076|     72|            if (pos1 == length1) break;
  ------------------
  |  Branch (1076:17): [True: 72, False: 0]
  ------------------
 1077|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1078|       |
 1079|      0|        } else {  // s1 > s2
 1080|      0|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1081|      0|                                                        (uint16_t)pos2, &type2);
 1082|      0|            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
 1083|      0|            if (is_cow(x2)) {
  ------------------
  |  Branch (1083:17): [True: 0, False: 0]
  ------------------
 1084|      0|                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
 1085|      0|                                          type2);
 1086|      0|            }
 1087|      0|            ra_append(&answer->high_low_container, s2, c2, type2);
 1088|      0|            pos2++;
 1089|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1089:17): [True: 0, False: 0]
  ------------------
 1090|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1091|      0|        }
 1092|  6.47k|    }
 1093|  6.47k|    if (pos1 == length1) {
  ------------------
  |  Branch (1093:9): [True: 6.47k, False: 0]
  ------------------
 1094|  6.47k|        ra_append_copy_range(&answer->high_low_container,
 1095|  6.47k|                             &x2->high_low_container, pos2, length2,
 1096|  6.47k|                             is_cow(x2));
 1097|  6.47k|    } else if (pos2 == length2) {
  ------------------
  |  Branch (1097:16): [True: 0, False: 0]
  ------------------
 1098|      0|        ra_append_copy_range(&answer->high_low_container,
 1099|      0|                             &x1->high_low_container, pos1, length1,
 1100|      0|                             is_cow(x1));
 1101|      0|    }
 1102|  6.47k|    return answer;
 1103|  6.51k|}
roaring_bitmap_xor_inplace:
 1108|  6.51k|                                const roaring_bitmap_t *x2) {
 1109|  6.51k|    assert(x1 != x2);
 1110|  6.51k|    uint8_t result_type = 0;
 1111|  6.51k|    int length1 = x1->high_low_container.size;
 1112|  6.51k|    const int length2 = x2->high_low_container.size;
 1113|       |
 1114|  6.51k|    if (0 == length2) return;
  ------------------
  |  Branch (1114:9): [True: 14, False: 6.50k]
  ------------------
 1115|       |
 1116|  6.50k|    if (0 == length1) {
  ------------------
  |  Branch (1116:9): [True: 1.53k, False: 4.97k]
  ------------------
 1117|  1.53k|        roaring_bitmap_overwrite(x1, x2);
 1118|  1.53k|        return;
 1119|  1.53k|    }
 1120|       |
 1121|       |    // XOR can have new containers inserted from x2, but can also
 1122|       |    // lose containers when x1 and x2 are nonempty and identical.
 1123|       |
 1124|  4.97k|    int pos1 = 0, pos2 = 0;
 1125|  4.97k|    uint8_t type1, type2;
 1126|  4.97k|    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1127|  4.97k|    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1128|  66.8k|    while (true) {
  ------------------
  |  Branch (1128:12): [True: 66.8k, Folded]
  ------------------
 1129|  66.8k|        if (s1 == s2) {
  ------------------
  |  Branch (1129:13): [True: 66.7k, False: 102]
  ------------------
 1130|  66.7k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 1131|  66.7k|                                                        (uint16_t)pos1, &type1);
 1132|  66.7k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1133|  66.7k|                                                        (uint16_t)pos2, &type2);
 1134|       |
 1135|       |            // We do the computation "in place" only when c1 is not a shared
 1136|       |            // container. Rationale: using a shared container safely with in
 1137|       |            // place computation would require making a copy and then doing the
 1138|       |            // computation in place which is likely less efficient than avoiding
 1139|       |            // in place entirely and always generating a new container.
 1140|       |
 1141|  66.7k|            container_t *c;
 1142|  66.7k|            if (type1 == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  66.7k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (1142:17): [True: 0, False: 66.7k]
  ------------------
 1143|      0|                c = container_xor(c1, type1, c2, type2, &result_type);
 1144|      0|                shared_container_free(CAST_shared(c1));  // so release
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1145|  66.7k|            } else {
 1146|  66.7k|                c = container_ixor(c1, type1, c2, type2, &result_type);
 1147|  66.7k|            }
 1148|       |
 1149|  66.7k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (1149:17): [True: 4.65k, False: 62.1k]
  ------------------
 1150|  4.65k|                ra_set_container_at_index(&x1->high_low_container, pos1, c,
 1151|  4.65k|                                          result_type);
 1152|  4.65k|                ++pos1;
 1153|  62.1k|            } else {
 1154|  62.1k|                container_free(c, result_type);
 1155|  62.1k|                ra_remove_at_index(&x1->high_low_container, pos1);
 1156|  62.1k|                --length1;
 1157|  62.1k|            }
 1158|       |
 1159|  66.7k|            ++pos2;
 1160|  66.7k|            if (pos1 == length1) break;
  ------------------
  |  Branch (1160:17): [True: 4.97k, False: 61.8k]
  ------------------
 1161|  61.8k|            if (pos2 == length2) break;
  ------------------
  |  Branch (1161:17): [True: 0, False: 61.8k]
  ------------------
 1162|  61.8k|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1163|  61.8k|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1164|       |
 1165|  61.8k|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (1165:20): [True: 0, False: 102]
  ------------------
 1166|      0|            pos1++;
 1167|      0|            if (pos1 == length1) break;
  ------------------
  |  Branch (1167:17): [True: 0, False: 0]
  ------------------
 1168|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1169|       |
 1170|    102|        } else {  // s1 > s2
 1171|    102|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1172|    102|                                                        (uint16_t)pos2, &type2);
 1173|    102|            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
 1174|    102|            if (is_cow(x2)) {
  ------------------
  |  Branch (1174:17): [True: 0, False: 102]
  ------------------
 1175|      0|                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
 1176|      0|                                          type2);
 1177|      0|            }
 1178|       |
 1179|    102|            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
 1180|    102|                                       type2);
 1181|    102|            pos1++;
 1182|    102|            length1++;
 1183|    102|            pos2++;
 1184|    102|            if (pos2 == length2) break;
  ------------------
  |  Branch (1184:17): [True: 0, False: 102]
  ------------------
 1185|    102|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1186|    102|        }
 1187|  66.8k|    }
 1188|  4.97k|    if (pos1 == length1) {
  ------------------
  |  Branch (1188:9): [True: 4.97k, False: 0]
  ------------------
 1189|  4.97k|        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
 1190|  4.97k|                             pos2, length2, is_cow(x2));
 1191|  4.97k|    }
 1192|  4.97k|}
roaring_bitmap_andnot:
 1195|  6.51k|                                        const roaring_bitmap_t *x2) {
 1196|  6.51k|    uint8_t result_type = 0;
 1197|  6.51k|    const int length1 = x1->high_low_container.size,
 1198|  6.51k|              length2 = x2->high_low_container.size;
 1199|  6.51k|    if (0 == length1) {
  ------------------
  |  Branch (1199:9): [True: 0, False: 6.51k]
  ------------------
 1200|      0|        roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
 1201|      0|        roaring_bitmap_set_copy_on_write(empty_bitmap,
 1202|      0|                                         is_cow(x1) || is_cow(x2));
  ------------------
  |  Branch (1202:42): [True: 0, False: 0]
  |  Branch (1202:56): [True: 0, False: 0]
  ------------------
 1203|      0|        return empty_bitmap;
 1204|      0|    }
 1205|  6.51k|    if (0 == length2) {
  ------------------
  |  Branch (1205:9): [True: 42, False: 6.47k]
  ------------------
 1206|     42|        return roaring_bitmap_copy(x1);
 1207|     42|    }
 1208|  6.47k|    roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
 1209|  6.47k|    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
  ------------------
  |  Branch (1209:46): [True: 0, False: 6.47k]
  |  Branch (1209:60): [True: 0, False: 6.47k]
  ------------------
 1210|       |
 1211|  6.47k|    int pos1 = 0, pos2 = 0;
 1212|  6.47k|    uint8_t type1, type2;
 1213|  6.47k|    uint16_t s1 = 0;
 1214|  6.47k|    uint16_t s2 = 0;
 1215|  6.47k|    while (true) {
  ------------------
  |  Branch (1215:12): [True: 6.47k, Folded]
  ------------------
 1216|  6.47k|        s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1217|  6.47k|        s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1218|       |
 1219|  6.47k|        if (s1 == s2) {
  ------------------
  |  Branch (1219:13): [True: 6.40k, False: 72]
  ------------------
 1220|  6.40k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 1221|  6.40k|                                                        (uint16_t)pos1, &type1);
 1222|  6.40k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1223|  6.40k|                                                        (uint16_t)pos2, &type2);
 1224|  6.40k|            container_t *c =
 1225|  6.40k|                container_andnot(c1, type1, c2, type2, &result_type);
 1226|       |
 1227|  6.40k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (1227:17): [True: 6.17k, False: 228]
  ------------------
 1228|  6.17k|                ra_append(&answer->high_low_container, s1, c, result_type);
 1229|  6.17k|            } else {
 1230|    228|                container_free(c, result_type);
 1231|    228|            }
 1232|  6.40k|            ++pos1;
 1233|  6.40k|            ++pos2;
 1234|  6.40k|            if (pos1 == length1) break;
  ------------------
  |  Branch (1234:17): [True: 6.40k, False: 0]
  ------------------
 1235|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1235:17): [True: 0, False: 0]
  ------------------
 1236|     72|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (1236:20): [True: 72, False: 0]
  ------------------
 1237|     72|            const int next_pos1 =
 1238|     72|                ra_advance_until(&x1->high_low_container, s2, pos1);
 1239|     72|            ra_append_copy_range(&answer->high_low_container,
 1240|     72|                                 &x1->high_low_container, pos1, next_pos1,
 1241|     72|                                 is_cow(x1));
 1242|       |            // TODO : perhaps some of the copy_on_write should be based on
 1243|       |            // answer rather than x1 (more stringent?).  Many similar cases
 1244|     72|            pos1 = next_pos1;
 1245|     72|            if (pos1 == length1) break;
  ------------------
  |  Branch (1245:17): [True: 72, False: 0]
  ------------------
 1246|     72|        } else {  // s1 > s2
 1247|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
 1248|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1248:17): [True: 0, False: 0]
  ------------------
 1249|      0|        }
 1250|  6.47k|    }
 1251|  6.47k|    if (pos2 == length2) {
  ------------------
  |  Branch (1251:9): [True: 4.01k, False: 2.45k]
  ------------------
 1252|  4.01k|        ra_append_copy_range(&answer->high_low_container,
 1253|  4.01k|                             &x1->high_low_container, pos1, length1,
 1254|  4.01k|                             is_cow(x1));
 1255|  4.01k|    }
 1256|  6.47k|    return answer;
 1257|  6.51k|}
roaring_bitmap_andnot_inplace:
 1262|  6.51k|                                   const roaring_bitmap_t *x2) {
 1263|  6.51k|    assert(x1 != x2);
 1264|       |
 1265|  6.51k|    uint8_t result_type = 0;
 1266|  6.51k|    int length1 = x1->high_low_container.size;
 1267|  6.51k|    const int length2 = x2->high_low_container.size;
 1268|  6.51k|    int intersection_size = 0;
 1269|       |
 1270|  6.51k|    if (0 == length2) return;
  ------------------
  |  Branch (1270:9): [True: 449, False: 6.06k]
  ------------------
 1271|       |
 1272|  6.06k|    if (0 == length1) {
  ------------------
  |  Branch (1272:9): [True: 0, False: 6.06k]
  ------------------
 1273|      0|        roaring_bitmap_clear(x1);
 1274|      0|        return;
 1275|      0|    }
 1276|       |
 1277|  6.06k|    int pos1 = 0, pos2 = 0;
 1278|  6.06k|    uint8_t type1, type2;
 1279|  6.06k|    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1280|  6.06k|    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1281|  6.06k|    while (true) {
  ------------------
  |  Branch (1281:12): [True: 6.06k, Folded]
  ------------------
 1282|  6.06k|        if (s1 == s2) {
  ------------------
  |  Branch (1282:13): [True: 6.06k, False: 0]
  ------------------
 1283|  6.06k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 1284|  6.06k|                                                        (uint16_t)pos1, &type1);
 1285|  6.06k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 1286|  6.06k|                                                        (uint16_t)pos2, &type2);
 1287|       |
 1288|       |            // We do the computation "in place" only when c1 is not a shared
 1289|       |            // container. Rationale: using a shared container safely with in
 1290|       |            // place computation would require making a copy and then doing the
 1291|       |            // computation in place which is likely less efficient than avoiding
 1292|       |            // in place entirely and always generating a new container.
 1293|       |
 1294|  6.06k|            container_t *c;
 1295|  6.06k|            if (type1 == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  6.06k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (1295:17): [True: 0, False: 6.06k]
  ------------------
 1296|      0|                c = container_andnot(c1, type1, c2, type2, &result_type);
 1297|      0|                shared_container_free(CAST_shared(c1));  // release
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1298|  6.06k|            } else {
 1299|  6.06k|                c = container_iandnot(c1, type1, c2, type2, &result_type);
 1300|  6.06k|            }
 1301|       |
 1302|  6.06k|            if (container_nonzero_cardinality(c, result_type)) {
  ------------------
  |  Branch (1302:17): [True: 4.53k, False: 1.53k]
  ------------------
 1303|  4.53k|                ra_replace_key_and_container_at_index(&x1->high_low_container,
 1304|  4.53k|                                                      intersection_size++, s1,
 1305|  4.53k|                                                      c, result_type);
 1306|  4.53k|            } else {
 1307|  1.53k|                container_free(c, result_type);
 1308|  1.53k|            }
 1309|       |
 1310|  6.06k|            ++pos1;
 1311|  6.06k|            ++pos2;
 1312|  6.06k|            if (pos1 == length1) break;
  ------------------
  |  Branch (1312:17): [True: 3.79k, False: 2.27k]
  ------------------
 1313|  2.27k|            if (pos2 == length2) break;
  ------------------
  |  Branch (1313:17): [True: 2.27k, False: 0]
  ------------------
 1314|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1315|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1316|       |
 1317|      0|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (1317:20): [True: 0, False: 0]
  ------------------
 1318|      0|            if (pos1 != intersection_size) {
  ------------------
  |  Branch (1318:17): [True: 0, False: 0]
  ------------------
 1319|      0|                container_t *c1 = ra_get_container_at_index(
 1320|      0|                    &x1->high_low_container, (uint16_t)pos1, &type1);
 1321|       |
 1322|      0|                ra_replace_key_and_container_at_index(
 1323|      0|                    &x1->high_low_container, intersection_size, s1, c1, type1);
 1324|      0|            }
 1325|      0|            intersection_size++;
 1326|      0|            pos1++;
 1327|      0|            if (pos1 == length1) break;
  ------------------
  |  Branch (1327:17): [True: 0, False: 0]
  ------------------
 1328|      0|            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 1329|       |
 1330|      0|        } else {  // s1 > s2
 1331|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
 1332|      0|            if (pos2 == length2) break;
  ------------------
  |  Branch (1332:17): [True: 0, False: 0]
  ------------------
 1333|      0|            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 1334|      0|        }
 1335|  6.06k|    }
 1336|       |
 1337|  6.06k|    if (pos1 < length1) {
  ------------------
  |  Branch (1337:9): [True: 2.27k, False: 3.79k]
  ------------------
 1338|       |        // all containers between intersection_size and
 1339|       |        // pos1 are junk.  However, they have either been moved
 1340|       |        // (thus still referenced) or involved in an iandnot
 1341|       |        // that will clean up all containers that could not be reused.
 1342|       |        // Thus we should not free the junk containers between
 1343|       |        // intersection_size and pos1.
 1344|  2.27k|        if (pos1 > intersection_size) {
  ------------------
  |  Branch (1344:13): [True: 35, False: 2.23k]
  ------------------
 1345|       |            // left slide of remaining items
 1346|     35|            ra_copy_range(&x1->high_low_container, pos1, length1,
 1347|     35|                          intersection_size);
 1348|     35|        }
 1349|       |        // else current placement is fine
 1350|  2.27k|        intersection_size += (length1 - pos1);
 1351|  2.27k|    }
 1352|  6.06k|    ra_downsize(&x1->high_low_container, intersection_size);
 1353|  6.06k|}
roaring_bitmap_get_cardinality:
 1355|  65.1k|uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
 1356|  65.1k|    const roaring_array_t *ra = &r->high_low_container;
 1357|       |
 1358|  65.1k|    uint64_t card = 0;
 1359|   439k|    for (int i = 0; i < ra->size; ++i)
  ------------------
  |  Branch (1359:21): [True: 373k, False: 65.1k]
  ------------------
 1360|   373k|        card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
 1361|  65.1k|    return card;
 1362|  65.1k|}
roaring_bitmap_is_empty:
 1425|  13.0k|bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
 1426|  13.0k|    return r->high_low_container.size == 0;
 1427|  13.0k|}
roaring_bitmap_to_uint32_array:
 1429|  6.51k|void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
 1430|  6.51k|    ra_to_uint32_array(&r->high_low_container, ans);
 1431|  6.51k|}
roaring_bitmap_run_optimize:
 1449|  13.0k|bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
 1450|  13.0k|    bool answer = false;
 1451|  26.0k|    for (int i = 0; i < r->high_low_container.size; i++) {
  ------------------
  |  Branch (1451:21): [True: 13.0k, False: 13.0k]
  ------------------
 1452|  13.0k|        uint8_t type_original, type_after;
 1453|  13.0k|        ra_unshare_container_at_index(
 1454|  13.0k|            &r->high_low_container,
 1455|  13.0k|            (uint16_t)i);  // TODO: this introduces extra cloning!
 1456|  13.0k|        container_t *c = ra_get_container_at_index(&r->high_low_container,
 1457|  13.0k|                                                   (uint16_t)i, &type_original);
 1458|  13.0k|        container_t *c1 = convert_run_optimize(c, type_original, &type_after);
 1459|  13.0k|        if (type_after == RUN_CONTAINER_TYPE) {
  ------------------
  |  |   50|  13.0k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (1459:13): [True: 4.09k, False: 8.93k]
  ------------------
 1460|       |            answer = true;
 1461|  4.09k|        }
 1462|  13.0k|        ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
 1463|  13.0k|    }
 1464|  13.0k|    return answer;
 1465|  13.0k|}
roaring_bitmap_shrink_to_fit:
 1467|  6.51k|size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
 1468|  6.51k|    size_t answer = 0;
 1469|  13.0k|    for (int i = 0; i < r->high_low_container.size; i++) {
  ------------------
  |  Branch (1469:21): [True: 6.51k, False: 6.51k]
  ------------------
 1470|  6.51k|        uint8_t type_original;
 1471|  6.51k|        container_t *c = ra_get_container_at_index(&r->high_low_container,
 1472|  6.51k|                                                   (uint16_t)i, &type_original);
 1473|  6.51k|        answer += container_shrink_to_fit(c, type_original);
 1474|  6.51k|    }
 1475|  6.51k|    answer += ra_shrink_to_fit(&r->high_low_container);
 1476|  6.51k|    return answer;
 1477|  6.51k|}
roaring_bitmap_remove_run_compression:
 1483|  6.51k|bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
 1484|  6.51k|    bool answer = false;
 1485|  86.3k|    for (int i = 0; i < r->high_low_container.size; i++) {
  ------------------
  |  Branch (1485:21): [True: 79.8k, False: 6.51k]
  ------------------
 1486|  79.8k|        uint8_t type_original, type_after;
 1487|  79.8k|        container_t *c = ra_get_container_at_index(&r->high_low_container,
 1488|  79.8k|                                                   (uint16_t)i, &type_original);
 1489|  79.8k|        if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
  ------------------
  |  |   50|  79.8k|#define RUN_CONTAINER_TYPE 3
  ------------------
  |  Branch (1489:13): [True: 72.0k, False: 7.77k]
  ------------------
 1490|  72.0k|            answer = true;
 1491|  72.0k|            if (type_original == SHARED_CONTAINER_TYPE) {
  ------------------
  |  |   51|  72.0k|#define SHARED_CONTAINER_TYPE 4
  ------------------
  |  Branch (1491:17): [True: 0, False: 72.0k]
  ------------------
 1492|      0|                run_container_t *truec = CAST_run(CAST_shared(c)->container);
  ------------------
  |  |   77|      0|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1493|      0|                int32_t card = run_container_cardinality(truec);
 1494|      0|                container_t *c1 = convert_to_bitset_or_array_container(
 1495|      0|                    truec, card, &type_after);
 1496|      0|                shared_container_free(CAST_shared(c));  // frees run as needed
  ------------------
  |  |   79|      0|#define CAST_shared(c) CAST(shared_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|      0|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1497|      0|                ra_set_container_at_index(&r->high_low_container, i, c1,
 1498|      0|                                          type_after);
 1499|       |
 1500|  72.0k|            } else {
 1501|  72.0k|                int32_t card = run_container_cardinality(CAST_run(c));
  ------------------
  |  |   77|  72.0k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  72.0k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1502|  72.0k|                container_t *c1 = convert_to_bitset_or_array_container(
 1503|  72.0k|                    CAST_run(c), card, &type_after);
  ------------------
  |  |   77|  72.0k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  72.0k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1504|  72.0k|                run_container_free(CAST_run(c));
  ------------------
  |  |   77|  72.0k|#define CAST_run(c) CAST(run_container_t *, c)  // safer downcast
  |  |  ------------------
  |  |  |  |   86|  72.0k|#define CAST(type, value) ((type)value)
  |  |  ------------------
  ------------------
 1505|  72.0k|                ra_set_container_at_index(&r->high_low_container, i, c1,
 1506|  72.0k|                                          type_after);
 1507|  72.0k|            }
 1508|  72.0k|        }
 1509|  79.8k|    }
 1510|  6.51k|    return answer;
 1511|  6.51k|}
roaring_bitmap_portable_size_in_bytes:
 1546|  13.0k|size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
 1547|  13.0k|    return ra_portable_size_in_bytes(&r->high_low_container);
 1548|  13.0k|}
roaring_bitmap_portable_deserialize_safe:
 1551|  13.0k|                                                           size_t maxbytes) {
 1552|  13.0k|    roaring_bitmap_t *ans =
 1553|  13.0k|        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
 1554|  13.0k|    if (ans == NULL) {
  ------------------
  |  Branch (1554:9): [True: 0, False: 13.0k]
  ------------------
 1555|      0|        return NULL;
 1556|      0|    }
 1557|  13.0k|    size_t bytesread;
 1558|  13.0k|    bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf,
 1559|  13.0k|                                         maxbytes, &bytesread);
 1560|  13.0k|    if (!is_ok) {
  ------------------
  |  Branch (1560:9): [True: 6.50k, False: 6.52k]
  ------------------
 1561|  6.50k|        roaring_free(ans);
 1562|  6.50k|        return NULL;
 1563|  6.50k|    }
 1564|  6.52k|    roaring_bitmap_set_copy_on_write(ans, false);
 1565|  6.52k|    if (!is_ok) {
  ------------------
  |  Branch (1565:9): [True: 0, False: 6.52k]
  ------------------
 1566|      0|        roaring_free(ans);
 1567|      0|        return NULL;
 1568|      0|    }
 1569|  6.52k|    return ans;
 1570|  6.52k|}
roaring_bitmap_portable_serialize:
 1581|  6.51k|size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) {
 1582|  6.51k|    return ra_portable_serialize(&r->high_low_container, buf);
 1583|  6.51k|}
roaring_iterate:
 1664|  6.50k|                     void *ptr) {
 1665|  6.50k|    const roaring_array_t *ra = &r->high_low_container;
 1666|       |
 1667|  74.9k|    for (int i = 0; i < ra->size; ++i)
  ------------------
  |  Branch (1667:21): [True: 68.4k, False: 6.50k]
  ------------------
 1668|  68.4k|        if (!container_iterate(ra->containers[i], ra->typecodes[i],
  ------------------
  |  Branch (1668:13): [True: 0, False: 68.4k]
  ------------------
 1669|  68.4k|                               ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) {
 1670|      0|            return false;
 1671|      0|        }
 1672|  6.50k|    return true;
 1673|  6.50k|}
roaring_iterator_init:
 1780|  13.0k|                           roaring_uint32_iterator_t *newit) {
 1781|  13.0k|    newit->parent = r;
 1782|  13.0k|    newit->container_index = 0;
 1783|  13.0k|    newit->has_value = loadfirstvalue(newit);
 1784|  13.0k|}
roaring_uint32_iterator_move_equalorlarger:
 1812|  6.51k|                                                uint32_t val) {
 1813|  6.51k|    uint16_t hb = val >> 16;
 1814|  6.51k|    const int i = ra_get_index(&it->parent->high_low_container, hb);
 1815|  6.51k|    if (i >= 0) {
  ------------------
  |  Branch (1815:9): [True: 6.43k, False: 84]
  ------------------
 1816|  6.43k|        uint32_t lowvalue =
 1817|  6.43k|            container_maximum(it->parent->high_low_container.containers[i],
 1818|  6.43k|                              it->parent->high_low_container.typecodes[i]);
 1819|  6.43k|        uint16_t lb = val & 0xFFFF;
 1820|  6.43k|        if (lowvalue < lb) {
  ------------------
  |  Branch (1820:13): [True: 9, False: 6.42k]
  ------------------
 1821|       |            // will have to load first value of next container
 1822|      9|            it->container_index = i + 1;
 1823|  6.42k|        } else {
 1824|       |            // the value is necessarily within the range of the container
 1825|  6.42k|            it->container_index = i;
 1826|  6.42k|            it->has_value = loadfirstvalue_largeorequal(it, val);
 1827|  6.42k|            return it->has_value;
 1828|  6.42k|        }
 1829|  6.43k|    } else {
 1830|       |        // there is no matching, so we are going for the next container
 1831|     84|        it->container_index = -i - 1;
 1832|     84|    }
 1833|     93|    it->has_value = loadfirstvalue(it);
 1834|     93|    return it->has_value;
 1835|  6.51k|}
roaring_uint32_iterator_advance:
 1837|   336k|bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) {
 1838|   336k|    if (it->container_index >= it->parent->high_low_container.size) {
  ------------------
  |  Branch (1838:9): [True: 0, False: 336k]
  ------------------
 1839|      0|        return (it->has_value = false);
 1840|      0|    }
 1841|   336k|    if (it->container_index < 0) {
  ------------------
  |  Branch (1841:9): [True: 0, False: 336k]
  ------------------
 1842|      0|        it->container_index = 0;
 1843|      0|        return (it->has_value = loadfirstvalue(it));
 1844|      0|    }
 1845|   336k|    uint16_t low16 = (uint16_t)it->current_value;
 1846|   336k|    if (container_iterator_next(it->container, it->typecode, &it->container_it,
  ------------------
  |  Branch (1846:9): [True: 330k, False: 6.28k]
  ------------------
 1847|   336k|                                &low16)) {
 1848|   330k|        it->current_value = it->highbits | low16;
 1849|   330k|        return (it->has_value = true);
 1850|   330k|    }
 1851|  6.28k|    it->container_index++;
 1852|  6.28k|    return (it->has_value = loadfirstvalue(it));
 1853|   336k|}
roaring_bitmap_equals:
 2048|  13.0k|                           const roaring_bitmap_t *r2) {
 2049|  13.0k|    const roaring_array_t *ra1 = &r1->high_low_container;
 2050|  13.0k|    const roaring_array_t *ra2 = &r2->high_low_container;
 2051|       |
 2052|  13.0k|    if (ra1->size != ra2->size) {
  ------------------
  |  Branch (2052:9): [True: 2.56k, False: 10.4k]
  ------------------
 2053|  2.56k|        return false;
 2054|  2.56k|    }
 2055|  94.2k|    for (int i = 0; i < ra1->size; ++i) {
  ------------------
  |  Branch (2055:21): [True: 83.8k, False: 10.4k]
  ------------------
 2056|  83.8k|        if (ra1->keys[i] != ra2->keys[i]) {
  ------------------
  |  Branch (2056:13): [True: 26, False: 83.7k]
  ------------------
 2057|     26|            return false;
 2058|     26|        }
 2059|  83.8k|    }
 2060|  90.3k|    for (int i = 0; i < ra1->size; ++i) {
  ------------------
  |  Branch (2060:21): [True: 83.7k, False: 6.51k]
  ------------------
 2061|  83.7k|        bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i],
 2062|  83.7k|                                         ra2->containers[i], ra2->typecodes[i]);
 2063|  83.7k|        if (!areequal) {
  ------------------
  |  Branch (2063:13): [True: 3.92k, False: 79.8k]
  ------------------
 2064|  3.92k|            return false;
 2065|  3.92k|        }
 2066|  83.7k|    }
 2067|  6.51k|    return true;
 2068|  10.4k|}
roaring_bitmap_is_subset:
 2071|  11.8k|                              const roaring_bitmap_t *r2) {
 2072|  11.8k|    const roaring_array_t *ra1 = &r1->high_low_container;
 2073|  11.8k|    const roaring_array_t *ra2 = &r2->high_low_container;
 2074|       |
 2075|  11.8k|    const int length1 = ra1->size, length2 = ra2->size;
 2076|       |
 2077|  11.8k|    int pos1 = 0, pos2 = 0;
 2078|       |
 2079|  11.8k|    while (pos1 < length1 && pos2 < length2) {
  ------------------
  |  Branch (2079:12): [True: 11.3k, False: 456]
  |  Branch (2079:30): [True: 11.3k, False: 42]
  ------------------
 2080|  11.3k|        const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1);
 2081|  11.3k|        const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2);
 2082|       |
 2083|  11.3k|        if (s1 == s2) {
  ------------------
  |  Branch (2083:13): [True: 11.2k, False: 107]
  ------------------
 2084|  11.2k|            uint8_t type1, type2;
 2085|  11.2k|            container_t *c1 =
 2086|  11.2k|                ra_get_container_at_index(ra1, (uint16_t)pos1, &type1);
 2087|  11.2k|            container_t *c2 =
 2088|  11.2k|                ra_get_container_at_index(ra2, (uint16_t)pos2, &type2);
 2089|  11.2k|            if (!container_is_subset(c1, type1, c2, type2)) return false;
  ------------------
  |  Branch (2089:17): [True: 11.2k, False: 0]
  ------------------
 2090|      0|            ++pos1;
 2091|      0|            ++pos2;
 2092|    107|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (2092:20): [True: 107, False: 0]
  ------------------
 2093|    107|            return false;
 2094|    107|        } else {  // s1 > s2
 2095|      0|            pos2 = ra_advance_until(ra2, s1, pos2);
 2096|      0|        }
 2097|  11.3k|    }
 2098|    498|    if (pos1 == length1)
  ------------------
  |  Branch (2098:9): [True: 456, False: 42]
  ------------------
 2099|    456|        return true;
 2100|     42|    else
 2101|     42|        return false;
 2102|    498|}
roaring_bitmap_flip_inplace:
 2271|  6.51k|                                 uint64_t range_end) {
 2272|  6.51k|    if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) {
  ------------------
  |  Branch (2272:9): [True: 6.00k, False: 509]
  |  Branch (2272:37): [True: 0, False: 509]
  ------------------
 2273|  6.00k|        return;
 2274|  6.00k|    }
 2275|    509|    roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start,
 2276|    509|                                       (uint32_t)(range_end - 1));
 2277|    509|}
roaring_bitmap_flip_inplace_closed:
 2281|  7.02k|                                        uint32_t range_end) {
 2282|  7.02k|    if (range_start > range_end) {
  ------------------
  |  Branch (2282:9): [True: 165, False: 6.86k]
  ------------------
 2283|    165|        return;  // empty range
 2284|    165|    }
 2285|       |
 2286|  6.86k|    uint16_t hb_start = (uint16_t)(range_start >> 16);
 2287|  6.86k|    const uint16_t lb_start = (uint16_t)range_start;
 2288|  6.86k|    uint16_t hb_end = (uint16_t)(range_end >> 16);
 2289|  6.86k|    const uint16_t lb_end = (uint16_t)range_end;
 2290|       |
 2291|  6.86k|    if (hb_start == hb_end) {
  ------------------
  |  Branch (2291:9): [True: 6.41k, False: 443]
  ------------------
 2292|  6.41k|        inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
 2293|  6.41k|                               lb_end);
 2294|  6.41k|    } else {
 2295|       |        // start and end containers are distinct
 2296|    443|        if (lb_start > 0) {
  ------------------
  |  Branch (2296:13): [True: 362, False: 81]
  ------------------
 2297|       |            // handle first (partial) container
 2298|    362|            inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
 2299|    362|                                   0xFFFF);
 2300|    362|            ++hb_start;  // for the full containers.  Can't wrap.
 2301|    362|        }
 2302|       |
 2303|    443|        if (lb_end != 0xFFFF) --hb_end;
  ------------------
  |  Branch (2303:13): [True: 423, False: 20]
  ------------------
 2304|       |
 2305|  22.2k|        for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
  ------------------
  |  Branch (2305:38): [True: 21.7k, False: 443]
  ------------------
 2306|  21.7k|            inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb);
 2307|  21.7k|        }
 2308|       |        // handle a partial final container
 2309|    443|        if (lb_end != 0xFFFF) {
  ------------------
  |  Branch (2309:13): [True: 423, False: 20]
  ------------------
 2310|    423|            inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
 2311|    423|                                   lb_end);
 2312|    423|            ++hb_end;
 2313|    423|        }
 2314|    443|    }
 2315|  6.86k|}
roaring_bitmap_rank:
 2781|  6.51k|uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
 2782|  6.51k|    uint64_t size = 0;
 2783|  6.51k|    uint32_t xhigh = x >> 16;
 2784|  6.51k|    for (int i = 0; i < bm->high_low_container.size; i++) {
  ------------------
  |  Branch (2784:21): [True: 6.51k, False: 0]
  ------------------
 2785|  6.51k|        uint32_t key = bm->high_low_container.keys[i];
 2786|  6.51k|        if (xhigh > key) {
  ------------------
  |  Branch (2786:13): [True: 0, False: 6.51k]
  ------------------
 2787|      0|            size +=
 2788|      0|                container_get_cardinality(bm->high_low_container.containers[i],
 2789|      0|                                          bm->high_low_container.typecodes[i]);
 2790|  6.51k|        } else if (xhigh == key) {
  ------------------
  |  Branch (2790:20): [True: 6.51k, False: 0]
  ------------------
 2791|  6.51k|            return size + container_rank(bm->high_low_container.containers[i],
 2792|  6.51k|                                         bm->high_low_container.typecodes[i],
 2793|  6.51k|                                         x & 0xFFFF);
 2794|  6.51k|        } else {
 2795|      0|            return size;
 2796|      0|        }
 2797|  6.51k|    }
 2798|      0|    return size;
 2799|  6.51k|}
roaring_bitmap_minimum:
 2860|  6.51k|uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
 2861|  6.51k|    if (bm->high_low_container.size > 0) {
  ------------------
  |  Branch (2861:9): [True: 6.47k, False: 42]
  ------------------
 2862|  6.47k|        container_t *c = bm->high_low_container.containers[0];
 2863|  6.47k|        uint8_t type = bm->high_low_container.typecodes[0];
 2864|  6.47k|        uint32_t key = bm->high_low_container.keys[0];
 2865|  6.47k|        uint32_t lowvalue = container_minimum(c, type);
 2866|  6.47k|        return lowvalue | (key << 16);
 2867|  6.47k|    }
 2868|     42|    return UINT32_MAX;
 2869|  6.51k|}
roaring_bitmap_maximum:
 2875|  6.51k|uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
 2876|  6.51k|    if (bm->high_low_container.size > 0) {
  ------------------
  |  Branch (2876:9): [True: 6.47k, False: 42]
  ------------------
 2877|  6.47k|        container_t *container =
 2878|  6.47k|            bm->high_low_container.containers[bm->high_low_container.size - 1];
 2879|  6.47k|        uint8_t typecode =
 2880|  6.47k|            bm->high_low_container.typecodes[bm->high_low_container.size - 1];
 2881|  6.47k|        uint32_t key =
 2882|  6.47k|            bm->high_low_container.keys[bm->high_low_container.size - 1];
 2883|  6.47k|        uint32_t lowvalue = container_maximum(container, typecode);
 2884|  6.47k|        return lowvalue | (key << 16);
 2885|  6.47k|    }
 2886|     42|    return 0;
 2887|  6.51k|}
roaring_bitmap_select:
 2890|  6.51k|                           uint32_t *element) {
 2891|  6.51k|    container_t *container;
 2892|  6.51k|    uint8_t typecode;
 2893|  6.51k|    uint16_t key;
 2894|  6.51k|    uint32_t start_rank = 0;
 2895|  6.51k|    int i = 0;
 2896|  6.51k|    bool valid = false;
 2897|  13.0k|    while (!valid && i < bm->high_low_container.size) {
  ------------------
  |  Branch (2897:12): [True: 6.95k, False: 6.07k]
  |  Branch (2897:22): [True: 6.51k, False: 437]
  ------------------
 2898|  6.51k|        container = bm->high_low_container.containers[i];
 2899|  6.51k|        typecode = bm->high_low_container.typecodes[i];
 2900|  6.51k|        valid =
 2901|  6.51k|            container_select(container, typecode, &start_rank, rank, element);
 2902|  6.51k|        i++;
 2903|  6.51k|    }
 2904|       |
 2905|  6.51k|    if (valid) {
  ------------------
  |  Branch (2905:9): [True: 6.07k, False: 437]
  ------------------
 2906|  6.07k|        key = bm->high_low_container.keys[i - 1];
 2907|  6.07k|        *element |= (((uint32_t)key) << 16);  // w/o cast, key promotes signed
 2908|  6.07k|        return true;
 2909|  6.07k|    } else
 2910|    437|        return false;
 2911|  6.51k|}
roaring_bitmap_intersect:
 2914|  6.51k|                              const roaring_bitmap_t *x2) {
 2915|  6.51k|    const int length1 = x1->high_low_container.size,
 2916|  6.51k|              length2 = x2->high_low_container.size;
 2917|  6.51k|    uint64_t answer = 0;
 2918|  6.51k|    int pos1 = 0, pos2 = 0;
 2919|       |
 2920|  6.92k|    while (pos1 < length1 && pos2 < length2) {
  ------------------
  |  Branch (2920:12): [True: 6.51k, False: 407]
  |  Branch (2920:30): [True: 6.47k, False: 42]
  ------------------
 2921|  6.47k|        const uint16_t s1 =
 2922|  6.47k|            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 2923|  6.47k|        const uint16_t s2 =
 2924|  6.47k|            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 2925|       |
 2926|  6.47k|        if (s1 == s2) {
  ------------------
  |  Branch (2926:13): [True: 6.40k, False: 72]
  ------------------
 2927|  6.40k|            uint8_t type1, type2;
 2928|  6.40k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 2929|  6.40k|                                                        (uint16_t)pos1, &type1);
 2930|  6.40k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 2931|  6.40k|                                                        (uint16_t)pos2, &type2);
 2932|  6.40k|            if (container_intersect(c1, type1, c2, type2)) return true;
  ------------------
  |  Branch (2932:17): [True: 6.06k, False: 335]
  ------------------
 2933|    335|            ++pos1;
 2934|    335|            ++pos2;
 2935|    335|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (2935:20): [True: 72, False: 0]
  ------------------
 2936|     72|            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
 2937|     72|        } else {  // s1 > s2
 2938|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
 2939|      0|        }
 2940|  6.47k|    }
 2941|    449|    return answer != 0;
 2942|  6.51k|}
roaring_bitmap_and_cardinality:
 2964|  26.0k|                                        const roaring_bitmap_t *x2) {
 2965|  26.0k|    const int length1 = x1->high_low_container.size,
 2966|  26.0k|              length2 = x2->high_low_container.size;
 2967|  26.0k|    uint64_t answer = 0;
 2968|  26.0k|    int pos1 = 0, pos2 = 0;
 2969|  51.9k|    while (pos1 < length1 && pos2 < length2) {
  ------------------
  |  Branch (2969:12): [True: 26.0k, False: 25.8k]
  |  Branch (2969:30): [True: 25.8k, False: 168]
  ------------------
 2970|  25.8k|        const uint16_t s1 =
 2971|  25.8k|            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
 2972|  25.8k|        const uint16_t s2 =
 2973|  25.8k|            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
 2974|       |
 2975|  25.8k|        if (s1 == s2) {
  ------------------
  |  Branch (2975:13): [True: 25.6k, False: 288]
  ------------------
 2976|  25.6k|            uint8_t type1, type2;
 2977|  25.6k|            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
 2978|  25.6k|                                                        (uint16_t)pos1, &type1);
 2979|  25.6k|            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
 2980|  25.6k|                                                        (uint16_t)pos2, &type2);
 2981|  25.6k|            answer += container_and_cardinality(c1, type1, c2, type2);
 2982|  25.6k|            ++pos1;
 2983|  25.6k|            ++pos2;
 2984|  25.6k|        } else if (s1 < s2) {  // s1 < s2
  ------------------
  |  Branch (2984:20): [True: 288, False: 0]
  ------------------
 2985|    288|            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
 2986|    288|        } else {  // s1 > s2
 2987|      0|            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
 2988|      0|        }
 2989|  25.8k|    }
 2990|  26.0k|    return answer;
 2991|  26.0k|}
roaring_bitmap_jaccard_index:
 2994|  6.51k|                                    const roaring_bitmap_t *x2) {
 2995|  6.51k|    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
 2996|  6.51k|    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
 2997|  6.51k|    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
 2998|  6.51k|    return (double)inter / (double)(c1 + c2 - inter);
 2999|  6.51k|}
roaring_bitmap_or_cardinality:
 3002|  6.51k|                                       const roaring_bitmap_t *x2) {
 3003|  6.51k|    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
 3004|  6.51k|    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
 3005|  6.51k|    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
 3006|  6.51k|    return c1 + c2 - inter;
 3007|  6.51k|}
roaring_bitmap_andnot_cardinality:
 3010|  6.51k|                                           const roaring_bitmap_t *x2) {
 3011|  6.51k|    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
 3012|  6.51k|    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
 3013|  6.51k|    return c1 - inter;
 3014|  6.51k|}
roaring_bitmap_xor_cardinality:
 3017|  6.51k|                                        const roaring_bitmap_t *x2) {
 3018|  6.51k|    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
 3019|  6.51k|    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
 3020|  6.51k|    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
 3021|  6.51k|    return c1 + c2 - 2 * inter;
 3022|  6.51k|}
roaring_bitmap_contains_range:
 3029|  6.51k|                                   uint64_t range_start, uint64_t range_end) {
 3030|  6.51k|    if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) {
  ------------------
  |  Branch (3030:9): [True: 5.70k, False: 815]
  |  Branch (3030:37): [True: 0, False: 815]
  ------------------
 3031|  5.70k|        return true;
 3032|  5.70k|    }
 3033|    815|    return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start,
 3034|    815|                                                (uint32_t)(range_end - 1));
 3035|  6.51k|}
roaring_bitmap_contains_range_closed:
 3043|    815|                                          uint32_t range_end) {
 3044|    815|    if (range_start > range_end) {
  ------------------
  |  Branch (3044:9): [True: 0, False: 815]
  ------------------
 3045|      0|        return true;
 3046|      0|    }  // empty range are always contained!
 3047|    815|    if (range_end == range_start) {
  ------------------
  |  Branch (3047:9): [True: 68, False: 747]
  ------------------
 3048|     68|        return roaring_bitmap_contains(r, (uint32_t)range_start);
 3049|     68|    }
 3050|    747|    uint16_t hb_rs = (uint16_t)(range_start >> 16);
 3051|    747|    uint16_t hb_re = (uint16_t)(range_end >> 16);
 3052|    747|    const int32_t span = hb_re - hb_rs;
 3053|    747|    const int32_t hlc_sz = ra_get_size(&r->high_low_container);
 3054|    747|    if (hlc_sz < span + 1) {
  ------------------
  |  Branch (3054:9): [True: 218, False: 529]
  ------------------
 3055|    218|        return false;
 3056|    218|    }
 3057|    529|    int32_t is = ra_get_index(&r->high_low_container, hb_rs);
 3058|    529|    int32_t ie = ra_get_index(&r->high_low_container, hb_re);
 3059|    529|    if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
  ------------------
  |  Branch (3059:9): [True: 51, False: 478]
  |  Branch (3059:21): [True: 15, False: 463]
  |  Branch (3059:33): [True: 7, False: 456]
  |  Branch (3059:56): [True: 0, False: 456]
  ------------------
 3060|     73|        return false;
 3061|     73|    }
 3062|    456|    const uint32_t lb_rs = range_start & 0xFFFF;
 3063|    456|    const uint32_t lb_re = (range_end & 0xFFFF) + 1;
 3064|    456|    uint8_t type;
 3065|    456|    container_t *c =
 3066|    456|        ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type);
 3067|    456|    if (hb_rs == hb_re) {
  ------------------
  |  Branch (3067:9): [True: 387, False: 69]
  ------------------
 3068|    387|        return container_contains_range(c, lb_rs, lb_re, type);
 3069|    387|    }
 3070|     69|    if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
  ------------------
  |  Branch (3070:9): [True: 15, False: 54]
  ------------------
 3071|     15|        return false;
 3072|     15|    }
 3073|     54|    c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type);
 3074|     54|    if (!container_contains_range(c, 0, lb_re, type)) {
  ------------------
  |  Branch (3074:9): [True: 7, False: 47]
  ------------------
 3075|      7|        return false;
 3076|      7|    }
 3077|  1.25k|    for (int32_t i = is + 1; i < ie; ++i) {
  ------------------
  |  Branch (3077:30): [True: 1.23k, False: 24]
  ------------------
 3078|  1.23k|        c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i,
 3079|  1.23k|                                      &type);
 3080|  1.23k|        if (!container_is_full(c, type)) {
  ------------------
  |  Branch (3080:13): [True: 23, False: 1.20k]
  ------------------
 3081|     23|            return false;
 3082|     23|        }
 3083|  1.23k|    }
 3084|     24|    return true;
 3085|     47|}
roaring_bitmap_is_strict_subset:
 3088|  6.51k|                                     const roaring_bitmap_t *r2) {
 3089|  6.51k|    return (roaring_bitmap_get_cardinality(r2) >
  ------------------
  |  Branch (3089:13): [True: 5.30k, False: 1.20k]
  ------------------
 3090|  6.51k|                roaring_bitmap_get_cardinality(r1) &&
 3091|  5.30k|            roaring_bitmap_is_subset(r1, r2));
  ------------------
  |  Branch (3091:13): [True: 228, False: 5.07k]
  ------------------
 3092|  6.51k|}
roaring.c:containerptr_roaring_bitmap_add:
   56|  19.5k|                                                           int *index) {
   57|  19.5k|    roaring_array_t *ra = &r->high_low_container;
   58|       |
   59|  19.5k|    uint16_t hb = val >> 16;
   60|  19.5k|    const int i = ra_get_index(ra, hb);
   61|  19.5k|    if (i >= 0) {
  ------------------
  |  Branch (61:9): [True: 6.51k, False: 13.0k]
  ------------------
   62|  6.51k|        ra_unshare_container_at_index(ra, (uint16_t)i);
   63|  6.51k|        container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type);
   64|  6.51k|        uint8_t new_type = *type;
   65|  6.51k|        container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
   66|  6.51k|        *index = i;
   67|  6.51k|        if (c2 != c) {
  ------------------
  |  Branch (67:13): [True: 2, False: 6.51k]
  ------------------
   68|      2|            container_free(c, *type);
   69|      2|            ra_set_container_at_index(ra, i, c2, new_type);
   70|      2|            *type = new_type;
   71|      2|            return c2;
   72|  6.51k|        } else {
   73|  6.51k|            return c;
   74|  6.51k|        }
   75|  13.0k|    } else {
   76|  13.0k|        array_container_t *new_ac = array_container_create();
   77|  13.0k|        container_t *c =
   78|  13.0k|            container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type);
  ------------------
  |  |   49|  13.0k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
   79|       |        // we could just assume that it stays an array container
   80|  13.0k|        ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
   81|  13.0k|        *index = -i - 1;
   82|  13.0k|        return c;
   83|  13.0k|    }
   84|  19.5k|}
roaring.c:add_bulk_impl:
  106|  8.14M|                                 uint32_t val) {
  107|  8.14M|    uint16_t key = val >> 16;
  108|  8.14M|    if (context->container == NULL || context->key != key) {
  ------------------
  |  Branch (108:9): [True: 0, False: 8.14M]
  |  Branch (108:39): [True: 0, False: 8.14M]
  ------------------
  109|      0|        uint8_t typecode;
  110|      0|        int idx;
  111|      0|        context->container =
  112|      0|            containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
  113|      0|        context->typecode = typecode;
  114|      0|        context->idx = idx;
  115|      0|        context->key = key;
  116|  8.14M|    } else {
  117|       |        // no need to seek the container, it is at hand
  118|       |        // because we already have the container at hand, we can do the
  119|       |        // insertion directly, bypassing the roaring_bitmap_add call
  120|  8.14M|        uint8_t new_typecode;
  121|  8.14M|        container_t *container2 = container_add(
  122|  8.14M|            context->container, val & 0xFFFF, context->typecode, &new_typecode);
  123|  8.14M|        if (container2 != context->container) {
  ------------------
  |  Branch (123:13): [True: 125, False: 8.14M]
  ------------------
  124|       |            // rare instance when we need to change the container type
  125|    125|            container_free(context->container, context->typecode);
  126|    125|            ra_set_container_at_index(&r->high_low_container, context->idx,
  127|    125|                                      container2, new_typecode);
  128|    125|            context->typecode = new_typecode;
  129|    125|            context->container = container2;
  130|    125|        }
  131|  8.14M|    }
  132|  8.14M|}
roaring.c:is_cow:
   41|   110k|static inline bool is_cow(const roaring_bitmap_t *r) {
   42|       |    return r->high_low_container.flags & ROARING_FLAG_COW;
  ------------------
  |  |   46|   110k|#define ROARING_FLAG_COW UINT8_C(0x1)
  ------------------
   43|   110k|}
roaring.c:loadfirstvalue:
 1728|  19.4k|    roaring_uint32_iterator_t *newit) {
 1729|  19.4k|    if (iter_new_container_partial_init(newit)) {
  ------------------
  |  Branch (1729:9): [True: 12.8k, False: 6.55k]
  ------------------
 1730|  12.8k|        uint16_t value = 0;
 1731|  12.8k|        newit->container_it =
 1732|  12.8k|            container_init_iterator(newit->container, newit->typecode, &value);
 1733|  12.8k|        newit->current_value = newit->highbits | value;
 1734|  12.8k|    }
 1735|  19.4k|    return newit->has_value;
 1736|  19.4k|}
roaring.c:iter_new_container_partial_init:
 1698|  25.8k|    roaring_uint32_iterator_t *newit) {
 1699|  25.8k|    newit->current_value = 0;
 1700|  25.8k|    if (newit->container_index >= newit->parent->high_low_container.size ||
  ------------------
  |  Branch (1700:9): [True: 6.55k, False: 19.2k]
  ------------------
 1701|  19.2k|        newit->container_index < 0) {
  ------------------
  |  Branch (1701:9): [True: 0, False: 19.2k]
  ------------------
 1702|  6.55k|        newit->current_value = UINT32_MAX;
 1703|  6.55k|        return (newit->has_value = false);
 1704|  6.55k|    }
 1705|  19.2k|    newit->has_value = true;
 1706|       |    // we precompute container, typecode and highbits so that successive
 1707|       |    // iterators do not have to grab them from odd memory locations
 1708|       |    // and have to worry about the (easily predicted) container_unwrap_shared
 1709|       |    // call.
 1710|  19.2k|    newit->container =
 1711|  19.2k|        newit->parent->high_low_container.containers[newit->container_index];
 1712|  19.2k|    newit->typecode =
 1713|  19.2k|        newit->parent->high_low_container.typecodes[newit->container_index];
 1714|  19.2k|    newit->highbits =
 1715|  19.2k|        ((uint32_t)
 1716|  19.2k|             newit->parent->high_low_container.keys[newit->container_index])
 1717|  19.2k|        << 16;
 1718|  19.2k|    newit->container =
 1719|  19.2k|        container_unwrap_shared(newit->container, &(newit->typecode));
 1720|       |    return true;
 1721|  25.8k|}
roaring.c:loadfirstvalue_largeorequal:
 1759|  6.42k|    roaring_uint32_iterator_t *newit, uint32_t val) {
 1760|  6.42k|    bool partial_init = iter_new_container_partial_init(newit);
 1761|  6.42k|    assert(partial_init);
 1762|  6.42k|    if (!partial_init) {
  ------------------
  |  Branch (1762:9): [True: 0, False: 6.42k]
  ------------------
 1763|      0|        return false;
 1764|      0|    }
 1765|  6.42k|    uint16_t value = 0;
 1766|  6.42k|    newit->container_it =
 1767|  6.42k|        container_init_iterator(newit->container, newit->typecode, &value);
 1768|  6.42k|    bool found = container_iterator_lower_bound(
 1769|  6.42k|        newit->container, newit->typecode, &newit->container_it, &value,
 1770|  6.42k|        val & 0xFFFF);
 1771|  6.42k|    assert(found);
 1772|  6.42k|    if (!found) {
  ------------------
  |  Branch (1772:9): [True: 0, False: 6.42k]
  ------------------
 1773|      0|        return false;
 1774|      0|    }
 1775|  6.42k|    newit->current_value = newit->highbits | value;
 1776|       |    return true;
 1777|  6.42k|}
roaring.c:inplace_flip_container:
 2133|  7.20k|                                   uint16_t lb_start, uint16_t lb_end) {
 2134|  7.20k|    const int i = ra_get_index(x1_arr, hb);
 2135|  7.20k|    uint8_t ctype_in, ctype_out;
 2136|  7.20k|    container_t *flipped_container = NULL;
 2137|  7.20k|    if (i >= 0) {
  ------------------
  |  Branch (2137:9): [True: 6.67k, False: 524]
  ------------------
 2138|  6.67k|        container_t *container_to_flip =
 2139|  6.67k|            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
 2140|  6.67k|        flipped_container = container_inot_range(
 2141|  6.67k|            container_to_flip, ctype_in, (uint32_t)lb_start,
 2142|  6.67k|            (uint32_t)(lb_end + 1), &ctype_out);
 2143|       |        // if a new container was created, the old one was already freed
 2144|  6.67k|        if (container_nonzero_cardinality(flipped_container, ctype_out)) {
  ------------------
  |  Branch (2144:13): [True: 6.65k, False: 26]
  ------------------
 2145|  6.65k|            ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
 2146|  6.65k|        } else {
 2147|     26|            container_free(flipped_container, ctype_out);
 2148|     26|            ra_remove_at_index(x1_arr, i);
 2149|     26|        }
 2150|       |
 2151|  6.67k|    } else {
 2152|    524|        flipped_container = container_range_of_ones(
 2153|    524|            (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
 2154|    524|        ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
 2155|    524|                                   ctype_out);
 2156|    524|    }
 2157|  7.20k|}
roaring.c:inplace_fully_flip_container:
 2184|  21.7k|static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
 2185|  21.7k|    const int i = ra_get_index(x1_arr, hb);
 2186|  21.7k|    uint8_t ctype_in, ctype_out;
 2187|  21.7k|    container_t *flipped_container = NULL;
 2188|  21.7k|    if (i >= 0) {
  ------------------
  |  Branch (2188:9): [True: 5.61k, False: 16.1k]
  ------------------
 2189|  5.61k|        container_t *container_to_flip =
 2190|  5.61k|            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
 2191|  5.61k|        flipped_container =
 2192|  5.61k|            container_inot(container_to_flip, ctype_in, &ctype_out);
 2193|       |
 2194|  5.61k|        if (container_nonzero_cardinality(flipped_container, ctype_out)) {
  ------------------
  |  Branch (2194:13): [True: 476, False: 5.13k]
  ------------------
 2195|    476|            ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
 2196|  5.13k|        } else {
 2197|  5.13k|            container_free(flipped_container, ctype_out);
 2198|  5.13k|            ra_remove_at_index(x1_arr, i);
 2199|  5.13k|        }
 2200|       |
 2201|  16.1k|    } else {
 2202|  16.1k|        flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
 2203|  16.1k|        ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
 2204|  16.1k|                                   ctype_out);
 2205|  16.1k|    }
 2206|  21.7k|}

ra_init_with_capacity:
   80|  65.2k|bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
   81|  65.2k|    if (!new_ra) return false;
  ------------------
  |  Branch (81:9): [True: 0, False: 65.2k]
  ------------------
   82|  65.2k|    ra_init(new_ra);
   83|       |
   84|       |    // Containers hold 64Ki elements, so 64Ki containers is enough to hold
   85|       |    // `0x10000 * 0x10000` (all 2^32) elements
   86|  65.2k|    if (cap > 0x10000) {
  ------------------
  |  Branch (86:9): [True: 0, False: 65.2k]
  ------------------
   87|      0|        cap = 0x10000;
   88|      0|    }
   89|       |
   90|  65.2k|    if (cap > 0) {
  ------------------
  |  Branch (90:9): [True: 32.5k, False: 32.6k]
  ------------------
   91|  32.5k|        void *bigalloc = roaring_malloc(
   92|  32.5k|            cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)));
   93|  32.5k|        if (bigalloc == NULL) return false;
  ------------------
  |  Branch (93:13): [True: 0, False: 32.5k]
  ------------------
   94|  32.5k|        new_ra->containers = (container_t **)bigalloc;
   95|  32.5k|        new_ra->keys = (uint16_t *)(new_ra->containers + cap);
   96|  32.5k|        new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
   97|       |        // Narrowing is safe because of above check
   98|  32.5k|        new_ra->allocation_size = (int32_t)cap;
   99|  32.5k|    }
  100|  65.2k|    return true;
  101|  65.2k|}
ra_shrink_to_fit:
  103|  71.6k|int ra_shrink_to_fit(roaring_array_t *ra) {
  104|  71.6k|    int savings = (ra->allocation_size - ra->size) *
  105|  71.6k|                  (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
  106|  71.6k|    if (!realloc_array(ra, ra->size)) {
  ------------------
  |  Branch (106:9): [True: 0, False: 71.6k]
  ------------------
  107|      0|        return 0;
  108|      0|    }
  109|  71.6k|    ra->allocation_size = ra->size;
  110|  71.6k|    return savings;
  111|  71.6k|}
ra_init:
  113|  65.2k|void ra_init(roaring_array_t *new_ra) {
  114|  65.2k|    if (!new_ra) {
  ------------------
  |  Branch (114:9): [True: 0, False: 65.2k]
  ------------------
  115|      0|        return;
  116|      0|    }
  117|  65.2k|    new_ra->keys = NULL;
  118|  65.2k|    new_ra->containers = NULL;
  119|  65.2k|    new_ra->typecodes = NULL;
  120|       |
  121|  65.2k|    new_ra->allocation_size = 0;
  122|  65.2k|    new_ra->size = 0;
  123|  65.2k|    new_ra->flags = 0;
  124|  65.2k|}
ra_overwrite:
  127|  14.6k|                  bool copy_on_write) {
  128|  14.6k|    ra_clear_containers(dest);  // we are going to overwrite them
  129|  14.6k|    if (source->size == 0) {    // Note: can't call memcpy(NULL), even w/size
  ------------------
  |  Branch (129:9): [True: 22, False: 14.6k]
  ------------------
  130|     22|        dest->size = 0;         // <--- This is important.
  131|     22|        return true;            // output was just cleared, so they match
  132|     22|    }
  133|  14.6k|    if (dest->allocation_size < source->size) {
  ------------------
  |  Branch (133:9): [True: 13.0k, False: 1.65k]
  ------------------
  134|  13.0k|        if (!realloc_array(dest, source->size)) {
  ------------------
  |  Branch (134:13): [True: 0, False: 13.0k]
  ------------------
  135|      0|            return false;
  136|      0|        }
  137|  13.0k|    }
  138|  14.6k|    dest->size = source->size;
  139|  14.6k|    memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
  140|       |    // we go through the containers, turning them into shared containers...
  141|  14.6k|    if (copy_on_write) {
  ------------------
  |  Branch (141:9): [True: 0, False: 14.6k]
  ------------------
  142|      0|        for (int32_t i = 0; i < dest->size; ++i) {
  ------------------
  |  Branch (142:29): [True: 0, False: 0]
  ------------------
  143|      0|            source->containers[i] = get_copy_of_container(
  144|      0|                source->containers[i], &source->typecodes[i], copy_on_write);
  145|      0|        }
  146|       |        // we do a shallow copy to the other bitmap
  147|      0|        memcpy(dest->containers, source->containers,
  148|      0|               dest->size * sizeof(container_t *));
  149|      0|        memcpy(dest->typecodes, source->typecodes,
  150|      0|               dest->size * sizeof(uint8_t));
  151|  14.6k|    } else {
  152|  14.6k|        memcpy(dest->typecodes, source->typecodes,
  153|  14.6k|               dest->size * sizeof(uint8_t));
  154|   176k|        for (int32_t i = 0; i < dest->size; i++) {
  ------------------
  |  Branch (154:29): [True: 161k, False: 14.6k]
  ------------------
  155|   161k|            dest->containers[i] =
  156|   161k|                container_clone(source->containers[i], source->typecodes[i]);
  157|   161k|            if (dest->containers[i] == NULL) {
  ------------------
  |  Branch (157:17): [True: 0, False: 161k]
  ------------------
  158|      0|                for (int32_t j = 0; j < i; j++) {
  ------------------
  |  Branch (158:37): [True: 0, False: 0]
  ------------------
  159|      0|                    container_free(dest->containers[j], dest->typecodes[j]);
  160|      0|                }
  161|      0|                ra_clear_without_containers(dest);
  162|      0|                return false;
  163|      0|            }
  164|   161k|        }
  165|  14.6k|    }
  166|  14.6k|    return true;
  167|  14.6k|}
ra_clear_containers:
  169|  79.9k|void ra_clear_containers(roaring_array_t *ra) {
  170|   554k|    for (int32_t i = 0; i < ra->size; ++i) {
  ------------------
  |  Branch (170:25): [True: 475k, False: 79.9k]
  ------------------
  171|   475k|        container_free(ra->containers[i], ra->typecodes[i]);
  172|   475k|    }
  173|  79.9k|}
ra_reset:
  175|  65.1k|void ra_reset(roaring_array_t *ra) {
  176|  65.1k|    ra_clear_containers(ra);
  177|  65.1k|    ra->size = 0;
  178|  65.1k|    ra_shrink_to_fit(ra);
  179|  65.1k|}
ra_clear_without_containers:
  181|     43|void ra_clear_without_containers(roaring_array_t *ra) {
  182|     43|    roaring_free(
  183|     43|        ra->containers);  // keys and typecodes are allocated with containers
  184|     43|    ra->size = 0;
  185|     43|    ra->allocation_size = 0;
  186|     43|    ra->containers = NULL;
  187|     43|    ra->keys = NULL;
  188|       |    ra->typecodes = NULL;
  189|     43|}
ra_clear:
  191|     43|void ra_clear(roaring_array_t *ra) {
  192|     43|    ra_clear_containers(ra);
  193|     43|    ra_clear_without_containers(ra);
  194|     43|}
extend_array:
  196|  88.2k|bool extend_array(roaring_array_t *ra, int32_t k) {
  197|  88.2k|    int32_t desired_size = ra->size + k;
  198|  88.2k|    const int32_t max_containers = 65536;
  199|  88.2k|    assert(desired_size <= max_containers);
  200|  88.2k|    if (desired_size > ra->allocation_size) {
  ------------------
  |  Branch (200:9): [True: 18.5k, False: 69.7k]
  ------------------
  201|  18.5k|        int32_t new_capacity =
  202|  18.5k|            (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
  ------------------
  |  Branch (202:13): [True: 18.5k, False: 0]
  ------------------
  203|  18.5k|        if (new_capacity > max_containers) {
  ------------------
  |  Branch (203:13): [True: 0, False: 18.5k]
  ------------------
  204|      0|            new_capacity = max_containers;
  205|      0|        }
  206|       |
  207|  18.5k|        return realloc_array(ra, new_capacity);
  208|  18.5k|    }
  209|  69.7k|    return true;
  210|  88.2k|}
ra_append:
  213|  25.1k|               uint8_t typecode) {
  214|  25.1k|    extend_array(ra, 1);
  215|  25.1k|    const int32_t pos = ra->size;
  216|       |
  217|  25.1k|    ra->keys[pos] = key;
  218|  25.1k|    ra->containers[pos] = c;
  219|  25.1k|    ra->typecodes[pos] = typecode;
  220|  25.1k|    ra->size++;
  221|  25.1k|}
ra_append_copy_range:
  254|  28.5k|                          bool copy_on_write) {
  255|  28.5k|    extend_array(ra, end_index - start_index);
  256|   214k|    for (int32_t i = start_index; i < end_index; ++i) {
  ------------------
  |  Branch (256:35): [True: 185k, False: 28.5k]
  ------------------
  257|   185k|        const int32_t pos = ra->size;
  258|   185k|        ra->keys[pos] = sa->keys[i];
  259|   185k|        if (copy_on_write) {
  ------------------
  |  Branch (259:13): [True: 0, False: 185k]
  ------------------
  260|      0|            sa->containers[i] = get_copy_of_container(
  261|      0|                sa->containers[i], &sa->typecodes[i], copy_on_write);
  262|      0|            ra->containers[pos] = sa->containers[i];
  263|      0|            ra->typecodes[pos] = sa->typecodes[i];
  264|   185k|        } else {
  265|   185k|            ra->containers[pos] =
  266|   185k|                container_clone(sa->containers[i], sa->typecodes[i]);
  267|   185k|            ra->typecodes[pos] = sa->typecodes[i];
  268|   185k|        }
  269|   185k|        ra->size++;
  270|   185k|    }
  271|  28.5k|}
ra_advance_until_freeing:
  340|     72|int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
  341|    144|    while (pos < ra->size && ra->keys[pos] < x) {
  ------------------
  |  Branch (341:12): [True: 144, False: 0]
  |  Branch (341:30): [True: 72, False: 72]
  ------------------
  342|     72|        container_free(ra->containers[pos], ra->typecodes[pos]);
  343|     72|        ++pos;
  344|     72|    }
  345|     72|    return pos;
  346|     72|}
ra_insert_new_key_value_at:
  349|  33.2k|                                container_t *c, uint8_t typecode) {
  350|  33.2k|    extend_array(ra, 1);
  351|       |    // May be an optimization opportunity with DIY memmove
  352|  33.2k|    memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
  353|  33.2k|            sizeof(uint16_t) * (ra->size - i));
  354|  33.2k|    memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
  355|  33.2k|            sizeof(container_t *) * (ra->size - i));
  356|  33.2k|    memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
  357|  33.2k|            sizeof(uint8_t) * (ra->size - i));
  358|  33.2k|    ra->keys[i] = key;
  359|  33.2k|    ra->containers[i] = c;
  360|  33.2k|    ra->typecodes[i] = typecode;
  361|  33.2k|    ra->size++;
  362|  33.2k|}
ra_downsize:
  369|  12.5k|void ra_downsize(roaring_array_t *ra, int32_t new_length) {
  370|       |    assert(new_length <= ra->size);
  371|  12.5k|    ra->size = new_length;
  372|  12.5k|}
ra_remove_at_index:
  374|  67.3k|void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
  375|  67.3k|    memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
  376|  67.3k|            sizeof(container_t *) * (ra->size - i - 1));
  377|  67.3k|    memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
  378|  67.3k|            sizeof(uint16_t) * (ra->size - i - 1));
  379|  67.3k|    memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
  380|  67.3k|            sizeof(uint8_t) * (ra->size - i - 1));
  381|  67.3k|    ra->size--;
  382|  67.3k|}
ra_remove_at_index_and_free:
  384|     62|void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
  385|     62|    container_free(ra->containers[i], ra->typecodes[i]);
  386|     62|    ra_remove_at_index(ra, i);
  387|     62|}
ra_copy_range:
  395|     35|                   uint32_t new_begin) {
  396|     35|    assert(begin <= end);
  397|     35|    assert(new_begin < begin);
  398|       |
  399|     35|    const int range = end - begin;
  400|       |
  401|       |    // We ensure to previously have freed overwritten containers
  402|       |    // that are not copied elsewhere
  403|       |
  404|     35|    memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
  405|     35|            sizeof(container_t *) * range);
  406|     35|    memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
  407|     35|            sizeof(uint16_t) * range);
  408|     35|    memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
  409|     35|            sizeof(uint8_t) * range);
  410|     35|}
ra_shift_tail:
  412|  2.03k|void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
  413|  2.03k|    if (distance > 0) {
  ------------------
  |  Branch (413:9): [True: 1.32k, False: 713]
  ------------------
  414|  1.32k|        extend_array(ra, distance);
  415|  1.32k|    }
  416|  2.03k|    int32_t srcpos = ra->size - count;
  417|  2.03k|    int32_t dstpos = srcpos + distance;
  418|  2.03k|    memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count);
  419|  2.03k|    memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
  420|  2.03k|            sizeof(container_t *) * count);
  421|  2.03k|    memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
  422|  2.03k|            sizeof(uint8_t) * count);
  423|  2.03k|    ra->size += distance;
  424|  2.03k|}
ra_to_uint32_array:
  426|  6.51k|void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
  427|  6.51k|    size_t ctr = 0;
  428|  74.8k|    for (int32_t i = 0; i < ra->size; ++i) {
  ------------------
  |  Branch (428:25): [True: 68.3k, False: 6.51k]
  ------------------
  429|  68.3k|        int num_added = container_to_uint32_array(
  430|  68.3k|            ans + ctr, ra->containers[i], ra->typecodes[i],
  431|  68.3k|            ((uint32_t)ra->keys[i]) << 16);
  432|  68.3k|        ctr += num_added;
  433|  68.3k|    }
  434|  6.51k|}
ra_has_run_container:
  436|  19.5k|bool ra_has_run_container(const roaring_array_t *ra) {
  437|   183k|    for (int32_t k = 0; k < ra->size; ++k) {
  ------------------
  |  Branch (437:25): [True: 166k, False: 16.9k]
  ------------------
  438|   166k|        if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
  ------------------
  |  Branch (438:13): [True: 2.64k, False: 163k]
  ------------------
  439|   166k|            RUN_CONTAINER_TYPE)
  ------------------
  |  |   50|   166k|#define RUN_CONTAINER_TYPE 3
  ------------------
  440|  2.64k|            return true;
  441|   166k|    }
  442|  16.9k|    return false;
  443|  19.5k|}
ra_portable_header_size:
  445|  13.0k|uint32_t ra_portable_header_size(const roaring_array_t *ra) {
  446|  13.0k|    if (ra_has_run_container(ra)) {
  ------------------
  |  Branch (446:9): [True: 2.64k, False: 10.3k]
  ------------------
  447|  2.64k|        if (ra->size <
  ------------------
  |  Branch (447:13): [True: 2.64k, False: 0]
  ------------------
  448|  2.64k|            NO_OFFSET_THRESHOLD) {  // for small bitmaps, we omit the offsets
  449|  2.64k|            return 4 + (ra->size + 7) / 8 + 4 * ra->size;
  450|  2.64k|        }
  451|      0|        return 4 + (ra->size + 7) / 8 +
  452|      0|               8 * ra->size;  // - 4 because we pack the size with the cookie
  453|  10.3k|    } else {
  454|  10.3k|        return 4 + 4 + 8 * ra->size;
  455|  10.3k|    }
  456|  13.0k|}
ra_portable_size_in_bytes:
  458|  13.0k|size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
  459|  13.0k|    size_t count = ra_portable_header_size(ra);
  460|       |
  461|  99.4k|    for (int32_t k = 0; k < ra->size; ++k) {
  ------------------
  |  Branch (461:25): [True: 86.3k, False: 13.0k]
  ------------------
  462|  86.3k|        count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
  463|  86.3k|    }
  464|  13.0k|    return count;
  465|  13.0k|}
ra_portable_serialize:
  469|  6.51k|size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
  470|  6.51k|    char *initbuf = buf;
  471|  6.51k|    uint32_t startOffset = 0;
  472|  6.51k|    bool hasrun = ra_has_run_container(ra);
  473|  6.51k|    if (hasrun) {
  ------------------
  |  Branch (473:9): [True: 0, False: 6.51k]
  ------------------
  474|      0|        uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16);
  475|      0|        uint32_t cookie_le = croaring_htole32(cookie);
  ------------------
  |  |  511|      0|#define croaring_htole32(x) (x)
  ------------------
  476|      0|        memcpy(buf, &cookie_le, sizeof(cookie_le));
  477|      0|        buf += sizeof(cookie_le);
  478|      0|        uint32_t s = (ra->size + 7) / 8;
  479|      0|        memset(buf, 0, s);
  480|      0|        for (int32_t i = 0; i < ra->size; ++i) {
  ------------------
  |  Branch (480:29): [True: 0, False: 0]
  ------------------
  481|      0|            if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
  ------------------
  |  Branch (481:17): [True: 0, False: 0]
  ------------------
  482|      0|                RUN_CONTAINER_TYPE) {
  ------------------
  |  |   50|      0|#define RUN_CONTAINER_TYPE 3
  ------------------
  483|      0|                buf[i / 8] |= 1 << (i % 8);
  484|      0|            }
  485|      0|        }
  486|      0|        buf += s;
  487|      0|        if (ra->size < NO_OFFSET_THRESHOLD) {
  ------------------
  |  Branch (487:13): [True: 0, False: 0]
  ------------------
  488|      0|            startOffset = 4 + 4 * ra->size + s;
  489|      0|        } else {
  490|      0|            startOffset = 4 + 8 * ra->size + s;
  491|      0|        }
  492|  6.51k|    } else {  // backwards compatibility
  493|  6.51k|        uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
  494|  6.51k|        uint32_t cookie_le = croaring_htole32(cookie);
  ------------------
  |  |  511|  6.51k|#define croaring_htole32(x) (x)
  ------------------
  495|  6.51k|        memcpy(buf, &cookie_le, sizeof(cookie_le));
  496|  6.51k|        buf += sizeof(cookie_le);
  497|  6.51k|        uint32_t size_le = croaring_htole32((uint32_t)ra->size);
  ------------------
  |  |  511|  6.51k|#define croaring_htole32(x) (x)
  ------------------
  498|  6.51k|        memcpy(buf, &size_le, sizeof(size_le));
  499|  6.51k|        buf += sizeof(size_le);
  500|       |
  501|  6.51k|        startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
  502|  6.51k|    }
  503|  86.3k|    for (int32_t k = 0; k < ra->size; ++k) {
  ------------------
  |  Branch (503:25): [True: 79.8k, False: 6.51k]
  ------------------
  504|  79.8k|        uint16_t key_le = croaring_htole16(ra->keys[k]);
  ------------------
  |  |  510|  79.8k|#define croaring_htole16(x) (x)
  ------------------
  505|  79.8k|        memcpy(buf, &key_le, sizeof(key_le));
  506|  79.8k|        buf += sizeof(key_le);
  507|       |        // get_cardinality returns a value in [1,1<<16], subtracting one
  508|       |        // we get [0,1<<16 - 1] which fits in 16 bits
  509|  79.8k|        uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k],
  510|  79.8k|                                                             ra->typecodes[k]) -
  511|  79.8k|                                   1);
  512|  79.8k|        uint16_t card_le = croaring_htole16(card);
  ------------------
  |  |  510|  79.8k|#define croaring_htole16(x) (x)
  ------------------
  513|  79.8k|        memcpy(buf, &card_le, sizeof(card_le));
  514|  79.8k|        buf += sizeof(card_le);
  515|  79.8k|    }
  516|  6.51k|    if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
  ------------------
  |  Branch (516:9): [True: 6.51k, False: 0]
  |  Branch (516:22): [True: 0, False: 0]
  ------------------
  517|       |        // writing the containers offsets
  518|  86.3k|        for (int32_t k = 0; k < ra->size; k++) {
  ------------------
  |  Branch (518:29): [True: 79.8k, False: 6.51k]
  ------------------
  519|  79.8k|            uint32_t off_le = croaring_htole32(startOffset);
  ------------------
  |  |  511|  79.8k|#define croaring_htole32(x) (x)
  ------------------
  520|  79.8k|            memcpy(buf, &off_le, sizeof(off_le));
  521|  79.8k|            buf += sizeof(off_le);
  522|  79.8k|            startOffset =
  523|  79.8k|                startOffset +
  524|  79.8k|                container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
  525|  79.8k|        }
  526|  6.51k|    }
  527|  86.3k|    for (int32_t k = 0; k < ra->size; ++k) {
  ------------------
  |  Branch (527:25): [True: 79.8k, False: 6.51k]
  ------------------
  528|  79.8k|        buf += container_write(ra->containers[k], ra->typecodes[k], buf);
  529|  79.8k|    }
  530|  6.51k|    return buf - initbuf;
  531|  6.51k|}
ra_portable_deserialize:
  634|  13.0k|                             const size_t maxbytes, size_t *readbytes) {
  635|  13.0k|    *readbytes = sizeof(int32_t);  // for cookie
  636|  13.0k|    if (*readbytes > maxbytes) {
  ------------------
  |  Branch (636:9): [True: 6.13k, False: 6.89k]
  ------------------
  637|       |        // Ran out of bytes while reading first 4 bytes.
  638|  6.13k|        return false;
  639|  6.13k|    }
  640|  6.89k|    uint32_t cookie;
  641|  6.89k|    memcpy(&cookie, buf, sizeof(int32_t));
  642|  6.89k|    cookie = croaring_letoh32(cookie);
  ------------------
  |  |  517|  6.89k|#define croaring_letoh32(x) croaring_htole32(x)
  |  |  ------------------
  |  |  |  |  511|  6.89k|#define croaring_htole32(x) (x)
  |  |  ------------------
  ------------------
  643|  6.89k|    buf += sizeof(uint32_t);
  644|  6.89k|    if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
  ------------------
  |  Branch (644:9): [True: 6.84k, False: 51]
  ------------------
  645|  6.84k|        cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
  ------------------
  |  Branch (645:9): [True: 254, False: 6.58k]
  ------------------
  646|       |        // "I failed to find one of the right cookies.
  647|    254|        return false;
  648|    254|    }
  649|  6.64k|    int32_t size;
  650|       |
  651|  6.64k|    if ((cookie & 0xFFFF) == SERIAL_COOKIE)
  ------------------
  |  Branch (651:9): [True: 51, False: 6.58k]
  ------------------
  652|     51|        size = (cookie >> 16) + 1;
  653|  6.58k|    else {
  654|  6.58k|        *readbytes += sizeof(int32_t);
  655|  6.58k|        if (*readbytes > maxbytes) {
  ------------------
  |  Branch (655:13): [True: 4, False: 6.58k]
  ------------------
  656|       |            // Ran out of bytes while reading second part of the cookie.
  657|      4|            return false;
  658|      4|        }
  659|  6.58k|        uint32_t size_le;
  660|  6.58k|        memcpy(&size_le, buf, sizeof(int32_t));
  661|  6.58k|        size = (int32_t)croaring_letoh32(size_le);
  ------------------
  |  |  517|  6.58k|#define croaring_letoh32(x) croaring_htole32(x)
  |  |  ------------------
  |  |  |  |  511|  6.58k|#define croaring_htole32(x) (x)
  |  |  ------------------
  ------------------
  662|  6.58k|        buf += sizeof(uint32_t);
  663|  6.58k|    }
  664|  6.63k|    if (size < 0) {
  ------------------
  |  Branch (664:9): [True: 26, False: 6.61k]
  ------------------
  665|       |        // You cannot have a negative number of containers, the data must be
  666|       |        // corrupted.
  667|     26|        return false;
  668|     26|    }
  669|  6.61k|    if (size > (1 << 16)) {
  ------------------
  |  Branch (669:9): [True: 17, False: 6.59k]
  ------------------
  670|       |        // You cannot have so many containers, the data must be corrupted.
  671|     17|        return false;
  672|     17|    }
  673|  6.59k|    const char *bitmapOfRunContainers = NULL;
  674|  6.59k|    bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
  675|  6.59k|    if (hasrun) {
  ------------------
  |  Branch (675:9): [True: 51, False: 6.54k]
  ------------------
  676|     51|        int32_t s = (size + 7) / 8;
  677|     51|        *readbytes += s;
  678|     51|        if (*readbytes > maxbytes) {  // data is corrupted?
  ------------------
  |  Branch (678:13): [True: 1, False: 50]
  ------------------
  679|       |            // Ran out of bytes while reading run bitmap.
  680|      1|            return false;
  681|      1|        }
  682|     50|        bitmapOfRunContainers = buf;
  683|     50|        buf += s;
  684|     50|    }
  685|  6.59k|    const char *keyscards = buf;
  686|       |
  687|  6.59k|    *readbytes += size * 2 * sizeof(uint16_t);
  688|  6.59k|    if (*readbytes > maxbytes) {
  ------------------
  |  Branch (688:9): [True: 22, False: 6.57k]
  ------------------
  689|       |        // Ran out of bytes while reading key-cardinality array.
  690|     22|        return false;
  691|     22|    }
  692|  6.57k|    buf += size * 2 * sizeof(uint16_t);
  693|       |
  694|  6.57k|    bool is_ok = ra_init_with_capacity(answer, size);
  695|  6.57k|    if (!is_ok) {
  ------------------
  |  Branch (695:9): [True: 0, False: 6.57k]
  ------------------
  696|       |        // Failed to allocate memory for roaring array. Bailing out.
  697|      0|        return false;
  698|      0|    }
  699|       |
  700|  86.6k|    for (int32_t k = 0; k < size; ++k) {
  ------------------
  |  Branch (700:25): [True: 80.0k, False: 6.57k]
  ------------------
  701|  80.0k|        uint16_t tmp;
  702|  80.0k|        memcpy(&tmp, keyscards + 4 * k, sizeof(tmp));
  703|  80.0k|        answer->keys[k] = croaring_letoh16(tmp);
  ------------------
  |  |  516|  80.0k|#define croaring_letoh16(x) croaring_htole16(x)
  |  |  ------------------
  |  |  |  |  510|  80.0k|#define croaring_htole16(x) (x)
  |  |  ------------------
  ------------------
  704|  80.0k|    }
  705|  6.57k|    if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
  ------------------
  |  Branch (705:9): [True: 6.52k, False: 49]
  |  Branch (705:22): [True: 22, False: 27]
  ------------------
  706|  6.54k|        *readbytes += size * 4;
  707|  6.54k|        if (*readbytes > maxbytes) {  // data is corrupted?
  ------------------
  |  Branch (707:13): [True: 6, False: 6.53k]
  ------------------
  708|       |            // Ran out of bytes while reading offsets.
  709|      6|            ra_clear(answer);  // we need to clear the containers already
  710|       |                               // allocated, and the roaring array
  711|      6|            return false;
  712|      6|        }
  713|       |
  714|       |        // skipping the offsets
  715|  6.53k|        buf += size * 4;
  716|  6.53k|    }
  717|       |    // Reading the containers
  718|  86.5k|    for (int32_t k = 0; k < size; ++k) {
  ------------------
  |  Branch (718:25): [True: 80.0k, False: 6.52k]
  ------------------
  719|  80.0k|        uint16_t tmp;
  720|  80.0k|        memcpy(&tmp, keyscards + 4 * k + 2, sizeof(tmp));
  721|  80.0k|        tmp = croaring_letoh16(tmp);
  ------------------
  |  |  516|  80.0k|#define croaring_letoh16(x) croaring_htole16(x)
  |  |  ------------------
  |  |  |  |  510|  80.0k|#define croaring_htole16(x) (x)
  |  |  ------------------
  ------------------
  722|  80.0k|        uint32_t thiscard = tmp + 1;
  723|  80.0k|        bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
  724|  80.0k|        bool isrun = false;
  725|  80.0k|        if (hasrun) {
  ------------------
  |  Branch (725:13): [True: 135, False: 79.8k]
  ------------------
  726|    135|            if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
  ------------------
  |  Branch (726:17): [True: 74, False: 61]
  ------------------
  727|     74|                isbitmap = false;
  728|     74|                isrun = true;
  729|     74|            }
  730|    135|        }
  731|  80.0k|        if (isbitmap) {
  ------------------
  |  Branch (731:13): [True: 72.5k, False: 7.44k]
  ------------------
  732|       |            // we check that the read is allowed
  733|  72.5k|            size_t containersize =
  734|  72.5k|                BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
  735|  72.5k|            *readbytes += containersize;
  736|  72.5k|            if (*readbytes > maxbytes) {
  ------------------
  |  Branch (736:17): [True: 9, False: 72.5k]
  ------------------
  737|       |                // Running out of bytes while reading a bitset container.
  738|      9|                ra_clear(answer);  // we need to clear the containers already
  739|       |                                   // allocated, and the roaring array
  740|      9|                return false;
  741|      9|            }
  742|       |            // it is now safe to read
  743|  72.5k|            bitset_container_t *c = bitset_container_create();
  744|  72.5k|            if (c == NULL) {  // memory allocation failure
  ------------------
  |  Branch (744:17): [True: 0, False: 72.5k]
  ------------------
  745|       |                // Failed to allocate memory for a bitset container.
  746|      0|                ra_clear(answer);  // we need to clear the containers already
  747|       |                                   // allocated, and the roaring array
  748|      0|                return false;
  749|      0|            }
  750|  72.5k|            answer->size++;
  751|  72.5k|            buf += bitset_container_read(thiscard, c, buf);
  752|  72.5k|            answer->containers[k] = c;
  753|  72.5k|            answer->typecodes[k] = BITSET_CONTAINER_TYPE;
  ------------------
  |  |   48|  72.5k|#define BITSET_CONTAINER_TYPE 1
  ------------------
  754|  72.5k|        } else if (isrun) {
  ------------------
  |  Branch (754:20): [True: 74, False: 7.37k]
  ------------------
  755|       |            // we check that the read is allowed
  756|     74|            *readbytes += sizeof(uint16_t);
  757|     74|            if (*readbytes > maxbytes) {
  ------------------
  |  Branch (757:17): [True: 9, False: 65]
  ------------------
  758|       |                // Running out of bytes while reading a run container (header).
  759|      9|                ra_clear(answer);  // we need to clear the containers already
  760|       |                                   // allocated, and the roaring array
  761|      9|                return false;
  762|      9|            }
  763|     65|            uint16_t n_runs;
  764|     65|            memcpy(&n_runs, buf, sizeof(uint16_t));
  765|     65|            n_runs = croaring_letoh16(n_runs);
  ------------------
  |  |  516|     65|#define croaring_letoh16(x) croaring_htole16(x)
  |  |  ------------------
  |  |  |  |  510|     65|#define croaring_htole16(x) (x)
  |  |  ------------------
  ------------------
  766|     65|            size_t containersize = n_runs * sizeof(rle16_t);
  767|     65|            *readbytes += containersize;
  768|     65|            if (*readbytes > maxbytes) {  // data is corrupted?
  ------------------
  |  Branch (768:17): [True: 9, False: 56]
  ------------------
  769|       |                // Running out of bytes while reading a run container.
  770|      9|                ra_clear(answer);  // we need to clear the containers already
  771|       |                                   // allocated, and the roaring array
  772|      9|                return false;
  773|      9|            }
  774|       |            // it is now safe to read
  775|       |
  776|     56|            run_container_t *c = run_container_create();
  777|     56|            if (c == NULL) {  // memory allocation failure
  ------------------
  |  Branch (777:17): [True: 0, False: 56]
  ------------------
  778|       |                // Failed to allocate memory for a run container.
  779|      0|                ra_clear(answer);  // we need to clear the containers already
  780|       |                                   // allocated, and the roaring array
  781|      0|                return false;
  782|      0|            }
  783|     56|            answer->size++;
  784|     56|            buf += run_container_read(thiscard, c, buf);
  785|     56|            answer->containers[k] = c;
  786|     56|            answer->typecodes[k] = RUN_CONTAINER_TYPE;
  ------------------
  |  |   50|     56|#define RUN_CONTAINER_TYPE 3
  ------------------
  787|  7.37k|        } else {
  788|       |            // we check that the read is allowed
  789|  7.37k|            size_t containersize = thiscard * sizeof(uint16_t);
  790|  7.37k|            *readbytes += containersize;
  791|  7.37k|            if (*readbytes > maxbytes) {  // data is corrupted?
  ------------------
  |  Branch (791:17): [True: 10, False: 7.36k]
  ------------------
  792|       |                // Running out of bytes while reading an array container.
  793|     10|                ra_clear(answer);  // we need to clear the containers already
  794|       |                                   // allocated, and the roaring array
  795|     10|                return false;
  796|     10|            }
  797|       |            // it is now safe to read
  798|  7.36k|            array_container_t *c =
  799|  7.36k|                array_container_create_given_capacity(thiscard);
  800|  7.36k|            if (c == NULL) {  // memory allocation failure
  ------------------
  |  Branch (800:17): [True: 0, False: 7.36k]
  ------------------
  801|       |                // Failed to allocate memory for an array container.
  802|      0|                ra_clear(answer);  // we need to clear the containers already
  803|       |                                   // allocated, and the roaring array
  804|      0|                return false;
  805|      0|            }
  806|  7.36k|            answer->size++;
  807|  7.36k|            buf += array_container_read(thiscard, c, buf);
  808|  7.36k|            answer->containers[k] = c;
  809|  7.36k|            answer->typecodes[k] = ARRAY_CONTAINER_TYPE;
  ------------------
  |  |   49|  7.36k|#define ARRAY_CONTAINER_TYPE 2
  ------------------
  810|  7.36k|        }
  811|  80.0k|    }
  812|  6.52k|    return true;
  813|  6.56k|}
roaring_array.c:realloc_array:
   42|   103k|static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
   43|       |    //
   44|       |    // Note: not implemented using C's realloc(), because the memory layout is
   45|       |    // Struct-of-Arrays vs. Array-of-Structs:
   46|       |    // https://github.com/RoaringBitmap/CRoaring/issues/256
   47|       |
   48|   103k|    if (new_capacity == 0) {
  ------------------
  |  Branch (48:9): [True: 65.1k, False: 38.1k]
  ------------------
   49|  65.1k|        roaring_free(ra->containers);
   50|  65.1k|        ra->containers = NULL;
   51|  65.1k|        ra->keys = NULL;
   52|  65.1k|        ra->typecodes = NULL;
   53|  65.1k|        ra->allocation_size = 0;
   54|  65.1k|        return true;
   55|  65.1k|    }
   56|  38.1k|    const size_t memoryneeded =
   57|  38.1k|        new_capacity *
   58|  38.1k|        (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t));
   59|  38.1k|    void *bigalloc = roaring_malloc(memoryneeded);
   60|  38.1k|    if (!bigalloc) return false;
  ------------------
  |  Branch (60:9): [True: 0, False: 38.1k]
  ------------------
   61|  38.1k|    void *oldbigalloc = ra->containers;
   62|  38.1k|    container_t **newcontainers = (container_t **)bigalloc;
   63|  38.1k|    uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
   64|  38.1k|    uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
   65|  38.1k|    assert((char *)(newtypecodes + new_capacity) ==
   66|  38.1k|           (char *)bigalloc + memoryneeded);
   67|  38.1k|    if (ra->size > 0) {
  ------------------
  |  Branch (67:9): [True: 12.0k, False: 26.0k]
  ------------------
   68|  12.0k|        memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size);
   69|  12.0k|        memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
   70|  12.0k|        memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
   71|  12.0k|    }
   72|  38.1k|    ra->containers = newcontainers;
   73|  38.1k|    ra->keys = newkeys;
   74|  38.1k|    ra->typecodes = newtypecodes;
   75|  38.1k|    ra->allocation_size = new_capacity;
   76|  38.1k|    roaring_free(oldbigalloc);
   77|       |    return true;
   78|  38.1k|}

_Z17ConsumeVecInRangeR18FuzzedDataProvidermjj:
   24|  13.0k|                                        uint32_t max_value) {
   25|  13.0k|    std::vector<uint32_t> result = {0};
   26|  13.0k|    result.resize(length);
   27|  13.0k|    std::generate(result.begin(), result.end(), [&]() {
   28|  13.0k|        return fdp.ConsumeIntegralInRange<uint32_t>(min_value, max_value);
   29|  13.0k|    });
   30|  13.0k|    return result;
   31|  13.0k|}
LLVMFuzzerTestOneInput:
   33|  6.51k|extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   34|       |    /**
   35|       |     * A bitmap may contain up to 2**32 elements. Later this function will
   36|       |     * output the content to an array where each element uses 32 bits of
   37|       |     * storage. That would use 16 GB. Thus this function is bound to run out of
   38|       |     * memory.
   39|       |     *
   40|       |     * Even without the full serialization to a 32-bit array, a bitmap may still
   41|       |     * use over 512 MB in the normal course of operation: that is to be expected
   42|       |     * since it can represent all sets of integers in [0,2**32]. This function
   43|       |     * may hold several bitmaps in memory at once, so it can require gigabytes
   44|       |     * of memory (without bugs). Hence, unless it has a generous memory
   45|       |     * capacity, this function will run out of memory almost certainly.
   46|       |     *
   47|       |     * For sanity, we may limit the range to, say, 10,000,000 which will use 38
   48|       |     * MB or so. With such a limited range, if we run out of memory, then we can
   49|       |     * almost certain that it has to do with a genuine bug.
   50|       |     */
   51|       |
   52|  6.51k|    uint32_t range_start = 0;
   53|  6.51k|    uint32_t range_end = 10'000'000;
   54|       |
   55|       |    /**
   56|       |     * We are not solely dependent on the range [range_start, range_end) because
   57|       |     * ConsumeVecInRange below produce integers in a small range starting at 0.
   58|       |     */
   59|       |
   60|  6.51k|    FuzzedDataProvider fdp(data, size);
   61|       |    /**
   62|       |     * The next line was ConsumeVecInRange(fdp, 500, 0, 1000) but it would pick
   63|       |     * 500 values at random from 0, 1000, making almost certain that all of the
   64|       |     * values are picked. It seems more useful to pick 500 values in the range
   65|       |     * 0,1000.
   66|       |     */
   67|  6.51k|    std::vector<uint32_t> bitmap_data_a = ConsumeVecInRange(fdp, 500, 0, 1000);
   68|  6.51k|    roaring::Roaring a(bitmap_data_a.size(), bitmap_data_a.data());
   69|  6.51k|    a.runOptimize();
   70|  6.51k|    a.shrinkToFit();
   71|       |
   72|  6.51k|    std::vector<uint32_t> bitmap_data_b = ConsumeVecInRange(fdp, 500, 0, 1000);
   73|  6.51k|    roaring::Roaring b(bitmap_data_b.size(), bitmap_data_b.data());
   74|  6.51k|    b.runOptimize();
   75|  6.51k|    b.add(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   76|  6.51k|    b.addChecked(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   77|  6.51k|    b.addRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
   78|  6.51k|               fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   79|       |    // add half of a to b.
   80|  6.51k|    b.addMany(bitmap_data_a.size() / 2, bitmap_data_a.data());
   81|  6.51k|    b.remove(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   82|  6.51k|    b.removeChecked(
   83|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   84|  6.51k|    b.removeRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
   85|  6.51k|                  fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   86|  6.51k|    b.removeRangeClosed(
   87|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
   88|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   89|  6.51k|    b.maximum();
   90|  6.51k|    b.minimum();
   91|  6.51k|    b.contains(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   92|  6.51k|    b.containsRange(
   93|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
   94|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
   95|       |
   96|  6.51k|    uint32_t element = 0;
   97|  6.51k|    a.select(fdp.ConsumeIntegralInRange<uint32_t>(0, 1000), &element);
   98|  6.51k|    a.intersect(b);
   99|  6.51k|    a.jaccard_index(b);
  100|  6.51k|    a.or_cardinality(b);
  101|  6.51k|    a.andnot_cardinality(b);
  102|  6.51k|    a.xor_cardinality(b);
  103|  6.51k|    a.rank(fdp.ConsumeIntegralInRange<uint32_t>(0, 5000));
  104|  6.51k|    a.getSizeInBytes();
  105|       |
  106|  6.51k|    roaring::Roaring c = a & b;
  107|  6.51k|    roaring::Roaring d = a - b;
  108|  6.51k|    roaring::Roaring e = a | b;
  109|  6.51k|    roaring::Roaring f = a ^ b;
  110|  6.51k|    a |= e;
  111|  6.51k|    a &= b;
  112|  6.51k|    a -= c;
  113|  6.51k|    a ^= f;
  114|       |
  115|  6.51k|    volatile bool is_equal = (a == b);
  116|       |
  117|  6.51k|    std::vector<uint32_t> b_as_array = {0};
  118|  6.51k|    b_as_array.resize(b.cardinality());
  119|  6.51k|    b.isEmpty();
  120|  6.51k|    b.toUint32Array(b_as_array.data());
  121|       |
  122|  6.51k|    a.isSubset(b);
  123|  6.51k|    a.isStrictSubset(b);
  124|  6.51k|    b.flip(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
  125|  6.51k|           fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
  126|  6.51k|    b.flipClosed(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
  127|  6.51k|                 fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
  128|  6.51k|    b.removeRunCompression();
  129|       |
  130|       |    // Move/copy constructors
  131|  6.51k|    roaring::Roaring copied = b;
  132|  6.51k|    roaring::Roaring moved = std::move(b);
  133|       |
  134|       |    // Asignment operators
  135|  6.51k|    b = copied;
  136|  6.51k|    b = std::move(moved);
  137|       |
  138|       |    // Safe read from serialized
  139|  6.51k|    std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100);
  140|  6.51k|    try {
  141|  6.51k|        roaring::Roaring read_safely =
  142|  6.51k|            roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size());
  143|       |        // The above is guaranteed to be safe. However, read_safely is maybe
  144|       |        // in an improper state and it cannot be used safely (including for
  145|       |        // reserialization).
  146|  6.51k|    } catch (...) {
  147|  6.50k|    }
  148|       |
  149|       |    // The bitmap b can be serialized and re-read.
  150|  6.51k|    std::size_t expected_size_in_bytes = b.getSizeInBytes();
  151|  6.51k|    std::vector<char> buffer(expected_size_in_bytes);
  152|  6.51k|    std::size_t size_in_bytes = b.write(buffer.data());
  153|  6.51k|    assert(expected_size_in_bytes == size_in_bytes);
  ------------------
  |  Branch (153:5): [True: 6.51k, False: 0]
  ------------------
  154|  6.51k|    roaring::Roaring bread =
  155|  6.51k|        roaring::Roaring::readSafe(buffer.data(), size_in_bytes);
  156|  6.51k|    assert(bread == b);
  ------------------
  |  Branch (156:5): [True: 6.51k, False: 0]
  ------------------
  157|       |
  158|  6.51k|    f.toString();
  159|       |
  160|  6.51k|    volatile int unused = 0;
  161|       |
  162|   342k|    for (roaring::Roaring::const_iterator i = a.begin(); i != a.end(); i++) {
  ------------------
  |  Branch (162:58): [True: 336k, False: 6.51k]
  ------------------
  163|   336k|        unused++;
  164|   336k|    }
  165|       |
  166|  6.51k|    roaring::Roaring::const_iterator b_iter = b.begin();
  167|  6.51k|    b_iter.equalorlarger(
  168|  6.51k|        fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
  169|       |
  170|  6.51k|    return 0;
  171|  6.51k|}
croaring_fuzzer_cc.cc:_ZZ17ConsumeVecInRangeR18FuzzedDataProvidermjjENK3$_0clEv:
   27|  6.51M|    std::generate(result.begin(), result.end(), [&]() {
   28|  6.51M|        return fdp.ConsumeIntegralInRange<uint32_t>(min_value, max_value);
   29|  6.51M|    });

