Coverage Report

Created: 2024-09-08 06:23

/src/git/xdiff/xhistogram.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2010, Google Inc.
3
 * and other copyright owners as documented in JGit's IP log.
4
 *
5
 * This program and the accompanying materials are made available
6
 * under the terms of the Eclipse Distribution License v1.0 which
7
 * accompanies this distribution, is reproduced below, and is
8
 * available at http://www.eclipse.org/org/documents/edl-v10.php
9
 *
10
 * All rights reserved.
11
 *
12
 * Redistribution and use in source and binary forms, with or
13
 * without modification, are permitted provided that the following
14
 * conditions are met:
15
 *
16
 * - Redistributions of source code must retain the above copyright
17
 *   notice, this list of conditions and the following disclaimer.
18
 *
19
 * - Redistributions in binary form must reproduce the above
20
 *   copyright notice, this list of conditions and the following
21
 *   disclaimer in the documentation and/or other materials provided
22
 *   with the distribution.
23
 *
24
 * - Neither the name of the Eclipse Foundation, Inc. nor the
25
 *   names of its contributors may be used to endorse or promote
26
 *   products derived from this software without specific prior
27
 *   written permission.
28
 *
29
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42
 */
43
44
#include "xinclude.h"
45
46
0
#define MAX_PTR UINT_MAX
47
#define MAX_CNT UINT_MAX
48
49
0
#define LINE_END(n) (line##n + count##n - 1)
50
#define LINE_END_PTR(n) (*line##n + *count##n - 1)
51
52
struct histindex {
53
  struct record {
54
    unsigned int ptr, cnt;
55
    struct record *next;
56
  } **records, /* an occurrence */
57
    **line_map; /* map of line to record chain */
58
  chastore_t rcha;
59
  unsigned int *next_ptrs;
60
  unsigned int table_bits,
61
         records_size,
62
         line_map_size;
63
64
  unsigned int max_chain_length,
65
         key_shift,
66
         ptr_shift;
67
68
  unsigned int cnt,
69
         has_common;
70
71
  xdfenv_t *env;
72
  xpparam_t const *xpp;
73
};
74
75
struct region {
76
  unsigned int begin1, end1;
77
  unsigned int begin2, end2;
78
};
79
80
0
#define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift])
81
82
#define NEXT_PTR(index, ptr) \
83
0
  (index->next_ptrs[(ptr) - index->ptr_shift])
84
85
#define CNT(index, ptr) \
86
  ((LINE_MAP(index, ptr))->cnt)
87
88
#define REC(env, s, l) \
89
0
  (env->xdf##s.recs[l - 1])
90
91
static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
92
0
{
93
0
  return r1->ha == r2->ha;
94
95
0
}
96
97
#define CMP(i, s1, l1, s2, l2) \
98
0
  (cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2)))
99
100
#define TABLE_HASH(index, side, line) \
101
0
  XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits)
102
103
static int scanA(struct histindex *index, int line1, int count1)
104
0
{
105
0
  unsigned int ptr, tbl_idx;
106
0
  unsigned int chain_len;
107
0
  struct record **rec_chain, *rec;
108
109
0
  for (ptr = LINE_END(1); line1 <= ptr; ptr--) {
110
0
    tbl_idx = TABLE_HASH(index, 1, ptr);
111
0
    rec_chain = index->records + tbl_idx;
112
0
    rec = *rec_chain;
113
114
0
    chain_len = 0;
115
0
    while (rec) {
116
0
      if (CMP(index, 1, rec->ptr, 1, ptr)) {
117
        /*
118
         * ptr is identical to another element. Insert
119
         * it onto the front of the existing element
120
         * chain.
121
         */
122
0
        NEXT_PTR(index, ptr) = rec->ptr;
123
0
        rec->ptr = ptr;
124
        /* cap rec->cnt at MAX_CNT */
125
0
        rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1);
126
0
        LINE_MAP(index, ptr) = rec;
127
0
        goto continue_scan;
128
0
      }
129
130
0
      rec = rec->next;
131
0
      chain_len++;
132
0
    }
133
134
0
    if (chain_len == index->max_chain_length)
135
0
      return -1;
136
137
    /*
138
     * This is the first time we have ever seen this particular
139
     * element in the sequence. Construct a new chain for it.
140
     */
141
0
    if (!(rec = xdl_cha_alloc(&index->rcha)))
142
0
      return -1;
143
0
    rec->ptr = ptr;
144
0
    rec->cnt = 1;
145
0
    rec->next = *rec_chain;
146
0
    *rec_chain = rec;
147
0
    LINE_MAP(index, ptr) = rec;
148
149
0
continue_scan:
150
0
    ; /* no op */
151
0
  }
152
153
0
  return 0;
154
0
}
155
156
static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr,
157
  int line1, int count1, int line2, int count2)
158
0
{
159
0
  unsigned int b_next = b_ptr + 1;
160
0
  struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)];
161
0
  unsigned int as, ae, bs, be, np, rc;
162
0
  int should_break;
163
164
0
  for (; rec; rec = rec->next) {
165
0
    if (rec->cnt > index->cnt) {
166
0
      if (!index->has_common)
167
0
        index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr);
168
0
      continue;
169
0
    }
170
171
0
    as = rec->ptr;
172
0
    if (!CMP(index, 1, as, 2, b_ptr))
173
0
      continue;
174
175
0
    index->has_common = 1;
176
0
    for (;;) {
177
0
      should_break = 0;
178
0
      np = NEXT_PTR(index, as);
179
0
      bs = b_ptr;
180
0
      ae = as;
181
0
      be = bs;
182
0
      rc = rec->cnt;
183
184
0
      while (line1 < as && line2 < bs
185
0
        && CMP(index, 1, as - 1, 2, bs - 1)) {
186
0
        as--;
187
0
        bs--;
188
0
        if (1 < rc)
189
0
          rc = XDL_MIN(rc, CNT(index, as));
190
0
      }
191
0
      while (ae < LINE_END(1) && be < LINE_END(2)
192
0
        && CMP(index, 1, ae + 1, 2, be + 1)) {
193
0
        ae++;
194
0
        be++;
195
0
        if (1 < rc)
196
0
          rc = XDL_MIN(rc, CNT(index, ae));
197
0
      }
198
199
0
      if (b_next <= be)
200
0
        b_next = be + 1;
201
0
      if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) {
202
0
        lcs->begin1 = as;
203
0
        lcs->begin2 = bs;
204
0
        lcs->end1 = ae;
205
0
        lcs->end2 = be;
206
0
        index->cnt = rc;
207
0
      }
208
209
0
      if (np == 0)
210
0
        break;
211
212
0
      while (np <= ae) {
213
0
        np = NEXT_PTR(index, np);
214
0
        if (np == 0) {
215
0
          should_break = 1;
216
0
          break;
217
0
        }
218
0
      }
219
220
0
      if (should_break)
221
0
        break;
222
223
0
      as = np;
224
0
    }
225
0
  }
226
0
  return b_next;
227
0
}
228
229
static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
230
    int line1, int count1, int line2, int count2)
231
0
{
232
0
  xpparam_t xpparam;
233
234
0
  memset(&xpparam, 0, sizeof(xpparam));
235
0
  xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
236
237
0
  return xdl_fall_back_diff(env, &xpparam,
238
0
          line1, count1, line2, count2);
239
0
}
240
241
static inline void free_index(struct histindex *index)
242
0
{
243
0
  xdl_free(index->records);
244
0
  xdl_free(index->line_map);
245
0
  xdl_free(index->next_ptrs);
246
0
  xdl_cha_free(&index->rcha);
247
0
}
248
249
static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
250
        struct region *lcs,
251
        int line1, int count1, int line2, int count2)
252
0
{
253
0
  int b_ptr;
254
0
  int ret = -1;
255
0
  struct histindex index;
256
257
0
  memset(&index, 0, sizeof(index));
258
259
0
  index.env = env;
260
0
  index.xpp = xpp;
261
262
0
  index.records = NULL;
263
0
  index.line_map = NULL;
264
  /* in case of early xdl_cha_free() */
265
0
  index.rcha.head = NULL;
266
267
0
  index.table_bits = xdl_hashbits(count1);
268
0
  index.records_size = 1 << index.table_bits;
269
0
  if (!XDL_CALLOC_ARRAY(index.records, index.records_size))
270
0
    goto cleanup;
271
272
0
  index.line_map_size = count1;
273
0
  if (!XDL_CALLOC_ARRAY(index.line_map, index.line_map_size))
274
0
    goto cleanup;
275
276
0
  if (!XDL_CALLOC_ARRAY(index.next_ptrs, index.line_map_size))
277
0
    goto cleanup;
278
279
  /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */
280
0
  if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0)
281
0
    goto cleanup;
282
283
0
  index.ptr_shift = line1;
284
0
  index.max_chain_length = 64;
285
286
0
  if (scanA(&index, line1, count1))
287
0
    goto cleanup;
288
289
0
  index.cnt = index.max_chain_length + 1;
290
291
0
  for (b_ptr = line2; b_ptr <= LINE_END(2); )
292
0
    b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
293
294
0
  if (index.has_common && index.max_chain_length < index.cnt)
295
0
    ret = 1;
296
0
  else
297
0
    ret = 0;
298
299
0
cleanup:
300
0
  free_index(&index);
301
0
  return ret;
302
0
}
303
304
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
305
  int line1, int count1, int line2, int count2)
306
0
{
307
0
  struct region lcs;
308
0
  int lcs_found;
309
0
  int result;
310
0
redo:
311
0
  result = -1;
312
313
0
  if (count1 <= 0 && count2 <= 0)
314
0
    return 0;
315
316
0
  if (LINE_END(1) >= MAX_PTR)
317
0
    return -1;
318
319
0
  if (!count1) {
320
0
    while(count2--)
321
0
      env->xdf2.rchg[line2++ - 1] = 1;
322
0
    return 0;
323
0
  } else if (!count2) {
324
0
    while(count1--)
325
0
      env->xdf1.rchg[line1++ - 1] = 1;
326
0
    return 0;
327
0
  }
328
329
0
  memset(&lcs, 0, sizeof(lcs));
330
0
  lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
331
0
  if (lcs_found < 0)
332
0
    goto out;
333
0
  else if (lcs_found)
334
0
    result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
335
0
  else {
336
0
    if (lcs.begin1 == 0 && lcs.begin2 == 0) {
337
0
      while (count1--)
338
0
        env->xdf1.rchg[line1++ - 1] = 1;
339
0
      while (count2--)
340
0
        env->xdf2.rchg[line2++ - 1] = 1;
341
0
      result = 0;
342
0
    } else {
343
0
      result = histogram_diff(xpp, env,
344
0
            line1, lcs.begin1 - line1,
345
0
            line2, lcs.begin2 - line2);
346
0
      if (result)
347
0
        goto out;
348
      /*
349
       * result = histogram_diff(xpp, env,
350
       *            lcs.end1 + 1, LINE_END(1) - lcs.end1,
351
       *            lcs.end2 + 1, LINE_END(2) - lcs.end2);
352
       * but let's optimize tail recursion ourself:
353
      */
354
0
      count1 = LINE_END(1) - lcs.end1;
355
0
      line1 = lcs.end1 + 1;
356
0
      count2 = LINE_END(2) - lcs.end2;
357
0
      line2 = lcs.end2 + 1;
358
0
      goto redo;
359
0
    }
360
0
  }
361
0
out:
362
0
  return result;
363
0
}
364
365
int xdl_do_histogram_diff(xpparam_t const *xpp, xdfenv_t *env)
366
0
{
367
0
  return histogram_diff(xpp, env,
368
0
    env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1,
369
0
    env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1);
370
0
}