Coverage Report

Created: 2023-02-27 06:35

/src/git/convert.c
Line
Count
Source (jump to first uncovered line)
1
#include "cache.h"
2
#include "config.h"
3
#include "object-store.h"
4
#include "attr.h"
5
#include "run-command.h"
6
#include "quote.h"
7
#include "sigchain.h"
8
#include "pkt-line.h"
9
#include "sub-process.h"
10
#include "utf8.h"
11
#include "ll-merge.h"
12
13
/*
14
 * convert.c - convert a file when checking it out and checking it in.
15
 *
16
 * This should use the pathname to decide on whether it wants to do some
17
 * more interesting conversions (automatic gzip/unzip, general format
18
 * conversions etc etc), but by default it just does automatic CRLF<->LF
19
 * translation when the "text" attribute or "auto_crlf" option is set.
20
 */
21
22
/* Stat bits: When BIN is set, the txt bits are unset */
23
0
#define CONVERT_STAT_BITS_TXT_LF    0x1
24
0
#define CONVERT_STAT_BITS_TXT_CRLF  0x2
25
0
#define CONVERT_STAT_BITS_BIN       0x4
26
27
struct text_stat {
28
  /* NUL, CR, LF and CRLF counts */
29
  unsigned nul, lonecr, lonelf, crlf;
30
31
  /* These are just approximations! */
32
  unsigned printable, nonprintable;
33
};
34
35
static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
36
0
{
37
0
  unsigned long i;
38
39
0
  memset(stats, 0, sizeof(*stats));
40
41
0
  for (i = 0; i < size; i++) {
42
0
    unsigned char c = buf[i];
43
0
    if (c == '\r') {
44
0
      if (i+1 < size && buf[i+1] == '\n') {
45
0
        stats->crlf++;
46
0
        i++;
47
0
      } else
48
0
        stats->lonecr++;
49
0
      continue;
50
0
    }
51
0
    if (c == '\n') {
52
0
      stats->lonelf++;
53
0
      continue;
54
0
    }
55
0
    if (c == 127)
56
      /* DEL */
57
0
      stats->nonprintable++;
58
0
    else if (c < 32) {
59
0
      switch (c) {
60
        /* BS, HT, ESC and FF */
61
0
      case '\b': case '\t': case '\033': case '\014':
62
0
        stats->printable++;
63
0
        break;
64
0
      case 0:
65
0
        stats->nul++;
66
        /* fall through */
67
0
      default:
68
0
        stats->nonprintable++;
69
0
      }
70
0
    }
71
0
    else
72
0
      stats->printable++;
73
0
  }
74
75
  /* If file ends with EOF then don't count this EOF as non-printable. */
76
0
  if (size >= 1 && buf[size-1] == '\032')
77
0
    stats->nonprintable--;
78
0
}
79
80
/*
81
 * The same heuristics as diff.c::mmfile_is_binary()
82
 * We treat files with bare CR as binary
83
 */
84
static int convert_is_binary(const struct text_stat *stats)
85
0
{
86
0
  if (stats->lonecr)
87
0
    return 1;
88
0
  if (stats->nul)
89
0
    return 1;
90
0
  if ((stats->printable >> 7) < stats->nonprintable)
91
0
    return 1;
92
0
  return 0;
93
0
}
94
95
static unsigned int gather_convert_stats(const char *data, unsigned long size)
96
0
{
97
0
  struct text_stat stats;
98
0
  int ret = 0;
99
0
  if (!data || !size)
100
0
    return 0;
101
0
  gather_stats(data, size, &stats);
102
0
  if (convert_is_binary(&stats))
103
0
    ret |= CONVERT_STAT_BITS_BIN;
104
0
  if (stats.crlf)
105
0
    ret |= CONVERT_STAT_BITS_TXT_CRLF;
106
0
  if (stats.lonelf)
107
0
    ret |=  CONVERT_STAT_BITS_TXT_LF;
108
109
0
  return ret;
110
0
}
111
112
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
113
0
{
114
0
  unsigned int convert_stats = gather_convert_stats(data, size);
115
116
0
  if (convert_stats & CONVERT_STAT_BITS_BIN)
117
0
    return "-text";
118
0
  switch (convert_stats) {
119
0
  case CONVERT_STAT_BITS_TXT_LF:
120
0
    return "lf";
121
0
  case CONVERT_STAT_BITS_TXT_CRLF:
122
0
    return "crlf";
123
0
  case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
124
0
    return "mixed";
125
0
  default:
126
0
    return "none";
127
0
  }
128
0
}
129
130
const char *get_cached_convert_stats_ascii(struct index_state *istate,
131
             const char *path)
132
0
{
133
0
  const char *ret;
134
0
  unsigned long sz;
135
0
  void *data = read_blob_data_from_index(istate, path, &sz);
136
0
  ret = gather_convert_stats_ascii(data, sz);
137
0
  free(data);
138
0
  return ret;
139
0
}
140
141
const char *get_wt_convert_stats_ascii(const char *path)
142
0
{
143
0
  const char *ret = "";
144
0
  struct strbuf sb = STRBUF_INIT;
145
0
  if (strbuf_read_file(&sb, path, 0) >= 0)
146
0
    ret = gather_convert_stats_ascii(sb.buf, sb.len);
147
0
  strbuf_release(&sb);
148
0
  return ret;
149
0
}
150
151
static int text_eol_is_crlf(void)
152
0
{
153
0
  if (auto_crlf == AUTO_CRLF_TRUE)
154
0
    return 1;
155
0
  else if (auto_crlf == AUTO_CRLF_INPUT)
156
0
    return 0;
157
0
  if (core_eol == EOL_CRLF)
158
0
    return 1;
159
0
  if (core_eol == EOL_UNSET && EOL_NATIVE == EOL_CRLF)
160
0
    return 1;
161
0
  return 0;
162
0
}
163
164
static enum eol output_eol(enum convert_crlf_action crlf_action)
165
0
{
166
0
  switch (crlf_action) {
167
0
  case CRLF_BINARY:
168
0
    return EOL_UNSET;
169
0
  case CRLF_TEXT_CRLF:
170
0
    return EOL_CRLF;
171
0
  case CRLF_TEXT_INPUT:
172
0
    return EOL_LF;
173
0
  case CRLF_UNDEFINED:
174
0
  case CRLF_AUTO_CRLF:
175
0
    return EOL_CRLF;
176
0
  case CRLF_AUTO_INPUT:
177
0
    return EOL_LF;
178
0
  case CRLF_TEXT:
179
0
  case CRLF_AUTO:
180
    /* fall through */
181
0
    return text_eol_is_crlf() ? EOL_CRLF : EOL_LF;
182
0
  }
183
0
  warning(_("illegal crlf_action %d"), (int)crlf_action);
184
0
  return core_eol;
185
0
}
186
187
static void check_global_conv_flags_eol(const char *path,
188
          struct text_stat *old_stats, struct text_stat *new_stats,
189
          int conv_flags)
190
0
{
191
0
  if (old_stats->crlf && !new_stats->crlf ) {
192
    /*
193
     * CRLFs would not be restored by checkout
194
     */
195
0
    if (conv_flags & CONV_EOL_RNDTRP_DIE)
196
0
      die(_("CRLF would be replaced by LF in %s"), path);
197
0
    else if (conv_flags & CONV_EOL_RNDTRP_WARN)
198
0
      warning(_("in the working copy of '%s', CRLF will be"
199
0
          " replaced by LF the next time Git touches"
200
0
          " it"), path);
201
0
  } else if (old_stats->lonelf && !new_stats->lonelf ) {
202
    /*
203
     * CRLFs would be added by checkout
204
     */
205
0
    if (conv_flags & CONV_EOL_RNDTRP_DIE)
206
0
      die(_("LF would be replaced by CRLF in %s"), path);
207
0
    else if (conv_flags & CONV_EOL_RNDTRP_WARN)
208
0
      warning(_("in the working copy of '%s', LF will be"
209
0
          " replaced by CRLF the next time Git touches"
210
0
          " it"), path);
211
0
  }
212
0
}
213
214
static int has_crlf_in_index(struct index_state *istate, const char *path)
215
0
{
216
0
  unsigned long sz;
217
0
  void *data;
218
0
  const char *crp;
219
0
  int has_crlf = 0;
220
221
0
  data = read_blob_data_from_index(istate, path, &sz);
222
0
  if (!data)
223
0
    return 0;
224
225
0
  crp = memchr(data, '\r', sz);
226
0
  if (crp) {
227
0
    unsigned int ret_stats;
228
0
    ret_stats = gather_convert_stats(data, sz);
229
0
    if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
230
0
        (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
231
0
      has_crlf = 1;
232
0
  }
233
0
  free(data);
234
0
  return has_crlf;
235
0
}
236
237
static int will_convert_lf_to_crlf(struct text_stat *stats,
238
           enum convert_crlf_action crlf_action)
239
0
{
240
0
  if (output_eol(crlf_action) != EOL_CRLF)
241
0
    return 0;
242
  /* No "naked" LF? Nothing to convert, regardless. */
243
0
  if (!stats->lonelf)
244
0
    return 0;
245
246
0
  if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
247
    /* If we have any CR or CRLF line endings, we do not touch it */
248
    /* This is the new safer autocrlf-handling */
249
0
    if (stats->lonecr || stats->crlf)
250
0
      return 0;
251
252
0
    if (convert_is_binary(stats))
253
0
      return 0;
254
0
  }
255
0
  return 1;
256
257
0
}
258
259
static int validate_encoding(const char *path, const char *enc,
260
          const char *data, size_t len, int die_on_error)
261
0
{
262
0
  const char *stripped;
263
264
  /* We only check for UTF here as UTF?? can be an alias for UTF-?? */
265
0
  if (skip_iprefix(enc, "UTF", &stripped)) {
266
0
    skip_prefix(stripped, "-", &stripped);
267
268
    /*
269
     * Check for detectable errors in UTF encodings
270
     */
271
0
    if (has_prohibited_utf_bom(enc, data, len)) {
272
0
      const char *error_msg = _(
273
0
        "BOM is prohibited in '%s' if encoded as %s");
274
      /*
275
       * This advice is shown for UTF-??BE and UTF-??LE encodings.
276
       * We cut off the last two characters of the encoding name
277
       * to generate the encoding name suitable for BOMs.
278
       */
279
0
      const char *advise_msg = _(
280
0
        "The file '%s' contains a byte order "
281
0
        "mark (BOM). Please use UTF-%.*s as "
282
0
        "working-tree-encoding.");
283
0
      int stripped_len = strlen(stripped) - strlen("BE");
284
0
      advise(advise_msg, path, stripped_len, stripped);
285
0
      if (die_on_error)
286
0
        die(error_msg, path, enc);
287
0
      else {
288
0
        return error(error_msg, path, enc);
289
0
      }
290
291
0
    } else if (is_missing_required_utf_bom(enc, data, len)) {
292
0
      const char *error_msg = _(
293
0
        "BOM is required in '%s' if encoded as %s");
294
0
      const char *advise_msg = _(
295
0
        "The file '%s' is missing a byte order "
296
0
        "mark (BOM). Please use UTF-%sBE or UTF-%sLE "
297
0
        "(depending on the byte order) as "
298
0
        "working-tree-encoding.");
299
0
      advise(advise_msg, path, stripped, stripped);
300
0
      if (die_on_error)
301
0
        die(error_msg, path, enc);
302
0
      else {
303
0
        return error(error_msg, path, enc);
304
0
      }
305
0
    }
306
307
0
  }
308
0
  return 0;
309
0
}
310
311
static void trace_encoding(const char *context, const char *path,
312
         const char *encoding, const char *buf, size_t len)
313
0
{
314
0
  static struct trace_key coe = TRACE_KEY_INIT(WORKING_TREE_ENCODING);
315
0
  struct strbuf trace = STRBUF_INIT;
316
0
  int i;
317
318
0
  strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, encoding);
319
0
  for (i = 0; i < len && buf; ++i) {
320
0
    strbuf_addf(
321
0
      &trace, "| \033[2m%2i:\033[0m %2x \033[2m%c\033[0m%c",
322
0
      i,
323
0
      (unsigned char) buf[i],
324
0
      (buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),
325
0
      ((i+1) % 8 && (i+1) < len ? ' ' : '\n')
326
0
    );
327
0
  }
328
0
  strbuf_addchars(&trace, '\n', 1);
329
330
0
  trace_strbuf(&coe, &trace);
331
0
  strbuf_release(&trace);
332
0
}
333
334
static int check_roundtrip(const char *enc_name)
335
0
{
336
  /*
337
   * check_roundtrip_encoding contains a string of comma and/or
338
   * space separated encodings (eg. "UTF-16, ASCII, CP1125").
339
   * Search for the given encoding in that string.
340
   */
341
0
  const char *found = strcasestr(check_roundtrip_encoding, enc_name);
342
0
  const char *next;
343
0
  int len;
344
0
  if (!found)
345
0
    return 0;
346
0
  next = found + strlen(enc_name);
347
0
  len = strlen(check_roundtrip_encoding);
348
0
  return (found && (
349
      /*
350
       * check that the found encoding is at the
351
       * beginning of check_roundtrip_encoding or
352
       * that it is prefixed with a space or comma
353
       */
354
0
      found == check_roundtrip_encoding || (
355
0
        (isspace(found[-1]) || found[-1] == ',')
356
0
      )
357
0
    ) && (
358
      /*
359
       * check that the found encoding is at the
360
       * end of check_roundtrip_encoding or
361
       * that it is suffixed with a space or comma
362
       */
363
0
      next == check_roundtrip_encoding + len || (
364
0
        next < check_roundtrip_encoding + len &&
365
0
        (isspace(next[0]) || next[0] == ',')
366
0
      )
367
0
    ));
368
0
}
369
370
static const char *default_encoding = "UTF-8";
371
372
static int encode_to_git(const char *path, const char *src, size_t src_len,
373
       struct strbuf *buf, const char *enc, int conv_flags)
374
28.8k
{
375
28.8k
  char *dst;
376
28.8k
  size_t dst_len;
377
28.8k
  int die_on_error = conv_flags & CONV_WRITE_OBJECT;
378
379
  /*
380
   * No encoding is specified or there is nothing to encode.
381
   * Tell the caller that the content was not modified.
382
   */
383
28.8k
  if (!enc || (src && !src_len))
384
28.8k
    return 0;
385
386
  /*
387
   * Looks like we got called from "would_convert_to_git()".
388
   * This means Git wants to know if it would encode (= modify!)
389
   * the content. Let's answer with "yes", since an encoding was
390
   * specified.
391
   */
392
0
  if (!buf && !src)
393
0
    return 1;
394
395
0
  if (validate_encoding(path, enc, src, src_len, die_on_error))
396
0
    return 0;
397
398
0
  trace_encoding("source", path, enc, src, src_len);
399
0
  dst = reencode_string_len(src, src_len, default_encoding, enc,
400
0
          &dst_len);
401
0
  if (!dst) {
402
    /*
403
     * We could add the blob "as-is" to Git. However, on checkout
404
     * we would try to re-encode to the original encoding. This
405
     * would fail and we would leave the user with a messed-up
406
     * working tree. Let's try to avoid this by screaming loud.
407
     */
408
0
    const char* msg = _("failed to encode '%s' from %s to %s");
409
0
    if (die_on_error)
410
0
      die(msg, path, enc, default_encoding);
411
0
    else {
412
0
      error(msg, path, enc, default_encoding);
413
0
      return 0;
414
0
    }
415
0
  }
416
0
  trace_encoding("destination", path, default_encoding, dst, dst_len);
417
418
  /*
419
   * UTF supports lossless conversion round tripping [1] and conversions
420
   * between UTF and other encodings are mostly round trip safe as
421
   * Unicode aims to be a superset of all other character encodings.
422
   * However, certain encodings (e.g. SHIFT-JIS) are known to have round
423
   * trip issues [2]. Check the round trip conversion for all encodings
424
   * listed in core.checkRoundtripEncoding.
425
   *
426
   * The round trip check is only performed if content is written to Git.
427
   * This ensures that no information is lost during conversion to/from
428
   * the internal UTF-8 representation.
429
   *
430
   * Please note, the code below is not tested because I was not able to
431
   * generate a faulty round trip without an iconv error. Iconv errors
432
   * are already caught above.
433
   *
434
   * [1] http://unicode.org/faq/utf_bom.html#gen2
435
   * [2] https://support.microsoft.com/en-us/help/170559/prb-conversion-problem-between-shift-jis-and-unicode
436
   */
437
0
  if (die_on_error && check_roundtrip(enc)) {
438
0
    char *re_src;
439
0
    size_t re_src_len;
440
441
0
    re_src = reencode_string_len(dst, dst_len,
442
0
               enc, default_encoding,
443
0
               &re_src_len);
444
445
0
    trace_printf("Checking roundtrip encoding for %s...\n", enc);
446
0
    trace_encoding("reencoded source", path, enc,
447
0
             re_src, re_src_len);
448
449
0
    if (!re_src || src_len != re_src_len ||
450
0
        memcmp(src, re_src, src_len)) {
451
0
      const char* msg = _("encoding '%s' from %s to %s and "
452
0
              "back is not the same");
453
0
      die(msg, path, enc, default_encoding);
454
0
    }
455
456
0
    free(re_src);
457
0
  }
458
459
0
  strbuf_attach(buf, dst, dst_len, dst_len + 1);
460
0
  return 1;
461
0
}
462
463
static int encode_to_worktree(const char *path, const char *src, size_t src_len,
464
            struct strbuf *buf, const char *enc)
465
0
{
466
0
  char *dst;
467
0
  size_t dst_len;
468
469
  /*
470
   * No encoding is specified or there is nothing to encode.
471
   * Tell the caller that the content was not modified.
472
   */
473
0
  if (!enc || (src && !src_len))
474
0
    return 0;
475
476
0
  dst = reencode_string_len(src, src_len, enc, default_encoding,
477
0
          &dst_len);
478
0
  if (!dst) {
479
0
    error(_("failed to encode '%s' from %s to %s"),
480
0
          path, default_encoding, enc);
481
0
    return 0;
482
0
  }
483
484
0
  strbuf_attach(buf, dst, dst_len, dst_len + 1);
485
0
  return 1;
486
0
}
487
488
static int crlf_to_git(struct index_state *istate,
489
           const char *path, const char *src, size_t len,
490
           struct strbuf *buf,
491
           enum convert_crlf_action crlf_action, int conv_flags)
492
28.8k
{
493
28.8k
  struct text_stat stats;
494
28.8k
  char *dst;
495
28.8k
  int convert_crlf_into_lf;
496
497
28.8k
  if (crlf_action == CRLF_BINARY ||
498
28.8k
      (src && !len))
499
28.8k
    return 0;
500
501
  /*
502
   * If we are doing a dry-run and have no source buffer, there is
503
   * nothing to analyze; we must assume we would convert.
504
   */
505
0
  if (!buf && !src)
506
0
    return 1;
507
508
0
  gather_stats(src, len, &stats);
509
  /* Optimization: No CRLF? Nothing to convert, regardless. */
510
0
  convert_crlf_into_lf = !!stats.crlf;
511
512
0
  if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
513
0
    if (convert_is_binary(&stats))
514
0
      return 0;
515
    /*
516
     * If the file in the index has any CR in it, do not
517
     * convert.  This is the new safer autocrlf handling,
518
     * unless we want to renormalize in a merge or
519
     * cherry-pick.
520
     */
521
0
    if ((!(conv_flags & CONV_EOL_RENORMALIZE)) &&
522
0
        has_crlf_in_index(istate, path))
523
0
      convert_crlf_into_lf = 0;
524
0
  }
525
0
  if (((conv_flags & CONV_EOL_RNDTRP_WARN) ||
526
0
       ((conv_flags & CONV_EOL_RNDTRP_DIE) && len))) {
527
0
    struct text_stat new_stats;
528
0
    memcpy(&new_stats, &stats, sizeof(new_stats));
529
    /* simulate "git add" */
530
0
    if (convert_crlf_into_lf) {
531
0
      new_stats.lonelf += new_stats.crlf;
532
0
      new_stats.crlf = 0;
533
0
    }
534
    /* simulate "git checkout" */
535
0
    if (will_convert_lf_to_crlf(&new_stats, crlf_action)) {
536
0
      new_stats.crlf += new_stats.lonelf;
537
0
      new_stats.lonelf = 0;
538
0
    }
539
0
    check_global_conv_flags_eol(path, &stats, &new_stats, conv_flags);
540
0
  }
541
0
  if (!convert_crlf_into_lf)
542
0
    return 0;
543
544
  /*
545
   * At this point all of our source analysis is done, and we are sure we
546
   * would convert. If we are in dry-run mode, we can give an answer.
547
   */
548
0
  if (!buf)
549
0
    return 1;
550
551
  /* only grow if not in place */
552
0
  if (strbuf_avail(buf) + buf->len < len)
553
0
    strbuf_grow(buf, len - buf->len);
554
0
  dst = buf->buf;
555
0
  if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
556
    /*
557
     * If we guessed, we already know we rejected a file with
558
     * lone CR, and we can strip a CR without looking at what
559
     * follow it.
560
     */
561
0
    do {
562
0
      unsigned char c = *src++;
563
0
      if (c != '\r')
564
0
        *dst++ = c;
565
0
    } while (--len);
566
0
  } else {
567
0
    do {
568
0
      unsigned char c = *src++;
569
0
      if (! (c == '\r' && (1 < len && *src == '\n')))
570
0
        *dst++ = c;
571
0
    } while (--len);
572
0
  }
573
0
  strbuf_setlen(buf, dst - buf->buf);
574
0
  return 1;
575
0
}
576
577
static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,
578
          enum convert_crlf_action crlf_action)
579
0
{
580
0
  char *to_free = NULL;
581
0
  struct text_stat stats;
582
583
0
  if (!len || output_eol(crlf_action) != EOL_CRLF)
584
0
    return 0;
585
586
0
  gather_stats(src, len, &stats);
587
0
  if (!will_convert_lf_to_crlf(&stats, crlf_action))
588
0
    return 0;
589
590
  /* are we "faking" in place editing ? */
591
0
  if (src == buf->buf)
592
0
    to_free = strbuf_detach(buf, NULL);
593
594
0
  strbuf_grow(buf, len + stats.lonelf);
595
0
  for (;;) {
596
0
    const char *nl = memchr(src, '\n', len);
597
0
    if (!nl)
598
0
      break;
599
0
    if (nl > src && nl[-1] == '\r') {
600
0
      strbuf_add(buf, src, nl + 1 - src);
601
0
    } else {
602
0
      strbuf_add(buf, src, nl - src);
603
0
      strbuf_addstr(buf, "\r\n");
604
0
    }
605
0
    len -= nl + 1 - src;
606
0
    src  = nl + 1;
607
0
  }
608
0
  strbuf_add(buf, src, len);
609
610
0
  free(to_free);
611
0
  return 1;
612
0
}
613
614
struct filter_params {
615
  const char *src;
616
  size_t size;
617
  int fd;
618
  const char *cmd;
619
  const char *path;
620
};
621
622
static int filter_buffer_or_fd(int in UNUSED, int out, void *data)
623
0
{
624
  /*
625
   * Spawn cmd and feed the buffer contents through its stdin.
626
   */
627
0
  struct child_process child_process = CHILD_PROCESS_INIT;
628
0
  struct filter_params *params = (struct filter_params *)data;
629
0
  int write_err, status;
630
631
  /* apply % substitution to cmd */
632
0
  struct strbuf cmd = STRBUF_INIT;
633
0
  struct strbuf path = STRBUF_INIT;
634
0
  struct strbuf_expand_dict_entry dict[] = {
635
0
    { "f", NULL, },
636
0
    { NULL, NULL, },
637
0
  };
638
639
  /* quote the path to preserve spaces, etc. */
640
0
  sq_quote_buf(&path, params->path);
641
0
  dict[0].value = path.buf;
642
643
  /* expand all %f with the quoted path */
644
0
  strbuf_expand(&cmd, params->cmd, strbuf_expand_dict_cb, &dict);
645
0
  strbuf_release(&path);
646
647
0
  strvec_push(&child_process.args, cmd.buf);
648
0
  child_process.use_shell = 1;
649
0
  child_process.in = -1;
650
0
  child_process.out = out;
651
652
0
  if (start_command(&child_process)) {
653
0
    strbuf_release(&cmd);
654
0
    return error(_("cannot fork to run external filter '%s'"),
655
0
           params->cmd);
656
0
  }
657
658
0
  sigchain_push(SIGPIPE, SIG_IGN);
659
660
0
  if (params->src) {
661
0
    write_err = (write_in_full(child_process.in,
662
0
             params->src, params->size) < 0);
663
0
    if (errno == EPIPE)
664
0
      write_err = 0;
665
0
  } else {
666
0
    write_err = copy_fd(params->fd, child_process.in);
667
0
    if (write_err == COPY_WRITE_ERROR && errno == EPIPE)
668
0
      write_err = 0;
669
0
  }
670
671
0
  if (close(child_process.in))
672
0
    write_err = 1;
673
0
  if (write_err)
674
0
    error(_("cannot feed the input to external filter '%s'"),
675
0
          params->cmd);
676
677
0
  sigchain_pop(SIGPIPE);
678
679
0
  status = finish_command(&child_process);
680
0
  if (status)
681
0
    error(_("external filter '%s' failed %d"), params->cmd, status);
682
683
0
  strbuf_release(&cmd);
684
0
  return (write_err || status);
685
0
}
686
687
static int apply_single_file_filter(const char *path, const char *src, size_t len, int fd,
688
            struct strbuf *dst, const char *cmd)
689
0
{
690
  /*
691
   * Create a pipeline to have the command filter the buffer's
692
   * contents.
693
   *
694
   * (child --> cmd) --> us
695
   */
696
0
  int err = 0;
697
0
  struct strbuf nbuf = STRBUF_INIT;
698
0
  struct async async;
699
0
  struct filter_params params;
700
701
0
  memset(&async, 0, sizeof(async));
702
0
  async.proc = filter_buffer_or_fd;
703
0
  async.data = &params;
704
0
  async.out = -1;
705
0
  params.src = src;
706
0
  params.size = len;
707
0
  params.fd = fd;
708
0
  params.cmd = cmd;
709
0
  params.path = path;
710
711
0
  fflush(NULL);
712
0
  if (start_async(&async))
713
0
    return 0; /* error was already reported */
714
715
0
  if (strbuf_read(&nbuf, async.out, 0) < 0) {
716
0
    err = error(_("read from external filter '%s' failed"), cmd);
717
0
  }
718
0
  if (close(async.out)) {
719
0
    err = error(_("read from external filter '%s' failed"), cmd);
720
0
  }
721
0
  if (finish_async(&async)) {
722
0
    err = error(_("external filter '%s' failed"), cmd);
723
0
  }
724
725
0
  if (!err) {
726
0
    strbuf_swap(dst, &nbuf);
727
0
  }
728
0
  strbuf_release(&nbuf);
729
0
  return !err;
730
0
}
731
732
28.8k
#define CAP_CLEAN    (1u<<0)
733
0
#define CAP_SMUDGE   (1u<<1)
734
0
#define CAP_DELAY    (1u<<2)
735
736
struct cmd2process {
737
  struct subprocess_entry subprocess; /* must be the first member! */
738
  unsigned int supported_capabilities;
739
};
740
741
static int subprocess_map_initialized;
742
static struct hashmap subprocess_map;
743
744
static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
745
0
{
746
0
  static int versions[] = {2, 0};
747
0
  static struct subprocess_capability capabilities[] = {
748
0
    { "clean",  CAP_CLEAN  },
749
0
    { "smudge", CAP_SMUDGE },
750
0
    { "delay",  CAP_DELAY  },
751
0
    { NULL, 0 }
752
0
  };
753
0
  struct cmd2process *entry = (struct cmd2process *)subprocess;
754
0
  return subprocess_handshake(subprocess, "git-filter", versions, NULL,
755
0
            capabilities,
756
0
            &entry->supported_capabilities);
757
0
}
758
759
static void handle_filter_error(const struct strbuf *filter_status,
760
        struct cmd2process *entry,
761
        const unsigned int wanted_capability)
762
0
{
763
0
  if (!strcmp(filter_status->buf, "error"))
764
0
    ; /* The filter signaled a problem with the file. */
765
0
  else if (!strcmp(filter_status->buf, "abort") && wanted_capability) {
766
    /*
767
     * The filter signaled a permanent problem. Don't try to filter
768
     * files with the same command for the lifetime of the current
769
     * Git process.
770
     */
771
0
     entry->supported_capabilities &= ~wanted_capability;
772
0
  } else {
773
    /*
774
     * Something went wrong with the protocol filter.
775
     * Force shutdown and restart if another blob requires filtering.
776
     */
777
0
    error(_("external filter '%s' failed"), entry->subprocess.cmd);
778
0
    subprocess_stop(&subprocess_map, &entry->subprocess);
779
0
    free(entry);
780
0
  }
781
0
}
782
783
static int apply_multi_file_filter(const char *path, const char *src, size_t len,
784
           int fd, struct strbuf *dst, const char *cmd,
785
           const unsigned int wanted_capability,
786
           const struct checkout_metadata *meta,
787
           struct delayed_checkout *dco)
788
0
{
789
0
  int err;
790
0
  int can_delay = 0;
791
0
  struct cmd2process *entry;
792
0
  struct child_process *process;
793
0
  struct strbuf nbuf = STRBUF_INIT;
794
0
  struct strbuf filter_status = STRBUF_INIT;
795
0
  const char *filter_type;
796
797
0
  if (!subprocess_map_initialized) {
798
0
    subprocess_map_initialized = 1;
799
0
    hashmap_init(&subprocess_map, cmd2process_cmp, NULL, 0);
800
0
    entry = NULL;
801
0
  } else {
802
0
    entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);
803
0
  }
804
805
0
  fflush(NULL);
806
807
0
  if (!entry) {
808
0
    entry = xmalloc(sizeof(*entry));
809
0
    entry->supported_capabilities = 0;
810
811
0
    if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, start_multi_file_filter_fn)) {
812
0
      free(entry);
813
0
      return 0;
814
0
    }
815
0
  }
816
0
  process = &entry->subprocess.process;
817
818
0
  if (!(entry->supported_capabilities & wanted_capability))
819
0
    return 0;
820
821
0
  if (wanted_capability & CAP_CLEAN)
822
0
    filter_type = "clean";
823
0
  else if (wanted_capability & CAP_SMUDGE)
824
0
    filter_type = "smudge";
825
0
  else
826
0
    die(_("unexpected filter type"));
827
828
0
  sigchain_push(SIGPIPE, SIG_IGN);
829
830
0
  assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));
831
0
  err = packet_write_fmt_gently(process->in, "command=%s\n", filter_type);
832
0
  if (err)
833
0
    goto done;
834
835
0
  err = strlen(path) > LARGE_PACKET_DATA_MAX - strlen("pathname=\n");
836
0
  if (err) {
837
0
    error(_("path name too long for external filter"));
838
0
    goto done;
839
0
  }
840
841
0
  err = packet_write_fmt_gently(process->in, "pathname=%s\n", path);
842
0
  if (err)
843
0
    goto done;
844
845
0
  if (meta && meta->refname) {
846
0
    err = packet_write_fmt_gently(process->in, "ref=%s\n", meta->refname);
847
0
    if (err)
848
0
      goto done;
849
0
  }
850
851
0
  if (meta && !is_null_oid(&meta->treeish)) {
852
0
    err = packet_write_fmt_gently(process->in, "treeish=%s\n", oid_to_hex(&meta->treeish));
853
0
    if (err)
854
0
      goto done;
855
0
  }
856
857
0
  if (meta && !is_null_oid(&meta->blob)) {
858
0
    err = packet_write_fmt_gently(process->in, "blob=%s\n", oid_to_hex(&meta->blob));
859
0
    if (err)
860
0
      goto done;
861
0
  }
862
863
0
  if ((entry->supported_capabilities & CAP_DELAY) &&
864
0
      dco && dco->state == CE_CAN_DELAY) {
865
0
    can_delay = 1;
866
0
    err = packet_write_fmt_gently(process->in, "can-delay=1\n");
867
0
    if (err)
868
0
      goto done;
869
0
  }
870
871
0
  err = packet_flush_gently(process->in);
872
0
  if (err)
873
0
    goto done;
874
875
0
  if (fd >= 0)
876
0
    err = write_packetized_from_fd_no_flush(fd, process->in);
877
0
  else
878
0
    err = write_packetized_from_buf_no_flush(src, len, process->in);
879
0
  if (err)
880
0
    goto done;
881
882
0
  err = packet_flush_gently(process->in);
883
0
  if (err)
884
0
    goto done;
885
886
0
  err = subprocess_read_status(process->out, &filter_status);
887
0
  if (err)
888
0
    goto done;
889
890
0
  if (can_delay && !strcmp(filter_status.buf, "delayed")) {
891
0
    string_list_insert(&dco->filters, cmd);
892
0
    string_list_insert(&dco->paths, path);
893
0
  } else {
894
    /* The filter got the blob and wants to send us a response. */
895
0
    err = strcmp(filter_status.buf, "success");
896
0
    if (err)
897
0
      goto done;
898
899
0
    err = read_packetized_to_strbuf(process->out, &nbuf,
900
0
            PACKET_READ_GENTLE_ON_EOF) < 0;
901
0
    if (err)
902
0
      goto done;
903
904
0
    err = subprocess_read_status(process->out, &filter_status);
905
0
    if (err)
906
0
      goto done;
907
908
0
    err = strcmp(filter_status.buf, "success");
909
0
  }
910
911
0
done:
912
0
  sigchain_pop(SIGPIPE);
913
914
0
  if (err)
915
0
    handle_filter_error(&filter_status, entry, wanted_capability);
916
0
  else
917
0
    strbuf_swap(dst, &nbuf);
918
0
  strbuf_release(&nbuf);
919
0
  strbuf_release(&filter_status);
920
0
  return !err;
921
0
}
922
923
924
int async_query_available_blobs(const char *cmd, struct string_list *available_paths)
925
0
{
926
0
  int err;
927
0
  char *line;
928
0
  struct cmd2process *entry;
929
0
  struct child_process *process;
930
0
  struct strbuf filter_status = STRBUF_INIT;
931
932
0
  assert(subprocess_map_initialized);
933
0
  entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);
934
0
  if (!entry) {
935
0
    error(_("external filter '%s' is not available anymore although "
936
0
      "not all paths have been filtered"), cmd);
937
0
    return 0;
938
0
  }
939
0
  process = &entry->subprocess.process;
940
0
  sigchain_push(SIGPIPE, SIG_IGN);
941
942
0
  err = packet_write_fmt_gently(
943
0
    process->in, "command=list_available_blobs\n");
944
0
  if (err)
945
0
    goto done;
946
947
0
  err = packet_flush_gently(process->in);
948
0
  if (err)
949
0
    goto done;
950
951
0
  while ((line = packet_read_line(process->out, NULL))) {
952
0
    const char *path;
953
0
    if (skip_prefix(line, "pathname=", &path))
954
0
      string_list_insert(available_paths, xstrdup(path));
955
0
    else
956
0
      ; /* ignore unknown keys */
957
0
  }
958
959
0
  err = subprocess_read_status(process->out, &filter_status);
960
0
  if (err)
961
0
    goto done;
962
963
0
  err = strcmp(filter_status.buf, "success");
964
965
0
done:
966
0
  sigchain_pop(SIGPIPE);
967
968
0
  if (err)
969
0
    handle_filter_error(&filter_status, entry, 0);
970
0
  strbuf_release(&filter_status);
971
0
  return !err;
972
0
}
973
974
static struct convert_driver {
975
  const char *name;
976
  struct convert_driver *next;
977
  const char *smudge;
978
  const char *clean;
979
  const char *process;
980
  int required;
981
} *user_convert, **user_convert_tail;
982
983
static int apply_filter(const char *path, const char *src, size_t len,
984
      int fd, struct strbuf *dst, struct convert_driver *drv,
985
      const unsigned int wanted_capability,
986
      const struct checkout_metadata *meta,
987
      struct delayed_checkout *dco)
988
28.8k
{
989
28.8k
  const char *cmd = NULL;
990
991
28.8k
  if (!drv)
992
28.8k
    return 0;
993
994
0
  if (!dst)
995
0
    return 1;
996
997
0
  if ((wanted_capability & CAP_CLEAN) && !drv->process && drv->clean)
998
0
    cmd = drv->clean;
999
0
  else if ((wanted_capability & CAP_SMUDGE) && !drv->process && drv->smudge)
1000
0
    cmd = drv->smudge;
1001
1002
0
  if (cmd && *cmd)
1003
0
    return apply_single_file_filter(path, src, len, fd, dst, cmd);
1004
0
  else if (drv->process && *drv->process)
1005
0
    return apply_multi_file_filter(path, src, len, fd, dst,
1006
0
      drv->process, wanted_capability, meta, dco);
1007
1008
0
  return 0;
1009
0
}
1010
1011
static int read_convert_config(const char *var, const char *value, void *cb UNUSED)
1012
4
{
1013
4
  const char *key, *name;
1014
4
  size_t namelen;
1015
4
  struct convert_driver *drv;
1016
1017
  /*
1018
   * External conversion drivers are configured using
1019
   * "filter.<name>.variable".
1020
   */
1021
4
  if (parse_config_key(var, "filter", &name, &namelen, &key) < 0 || !name)
1022
4
    return 0;
1023
0
  for (drv = user_convert; drv; drv = drv->next)
1024
0
    if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
1025
0
      break;
1026
0
  if (!drv) {
1027
0
    CALLOC_ARRAY(drv, 1);
1028
0
    drv->name = xmemdupz(name, namelen);
1029
0
    *user_convert_tail = drv;
1030
0
    user_convert_tail = &(drv->next);
1031
0
  }
1032
1033
  /*
1034
   * filter.<name>.smudge and filter.<name>.clean specifies
1035
   * the command line:
1036
   *
1037
   *  command-line
1038
   *
1039
   * The command-line will not be interpolated in any way.
1040
   */
1041
1042
0
  if (!strcmp("smudge", key))
1043
0
    return git_config_string(&drv->smudge, var, value);
1044
1045
0
  if (!strcmp("clean", key))
1046
0
    return git_config_string(&drv->clean, var, value);
1047
1048
0
  if (!strcmp("process", key))
1049
0
    return git_config_string(&drv->process, var, value);
1050
1051
0
  if (!strcmp("required", key)) {
1052
0
    drv->required = git_config_bool(var, value);
1053
0
    return 0;
1054
0
  }
1055
1056
0
  return 0;
1057
0
}
1058
1059
static int count_ident(const char *cp, unsigned long size)
1060
0
{
1061
  /*
1062
   * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
1063
   */
1064
0
  int cnt = 0;
1065
0
  char ch;
1066
1067
0
  while (size) {
1068
0
    ch = *cp++;
1069
0
    size--;
1070
0
    if (ch != '$')
1071
0
      continue;
1072
0
    if (size < 3)
1073
0
      break;
1074
0
    if (memcmp("Id", cp, 2))
1075
0
      continue;
1076
0
    ch = cp[2];
1077
0
    cp += 3;
1078
0
    size -= 3;
1079
0
    if (ch == '$')
1080
0
      cnt++; /* $Id$ */
1081
0
    if (ch != ':')
1082
0
      continue;
1083
1084
    /*
1085
     * "$Id: ... "; scan up to the closing dollar sign and discard.
1086
     */
1087
0
    while (size) {
1088
0
      ch = *cp++;
1089
0
      size--;
1090
0
      if (ch == '$') {
1091
0
        cnt++;
1092
0
        break;
1093
0
      }
1094
0
      if (ch == '\n')
1095
0
        break;
1096
0
    }
1097
0
  }
1098
0
  return cnt;
1099
0
}
1100
1101
static int ident_to_git(const char *src, size_t len,
1102
      struct strbuf *buf, int ident)
1103
28.8k
{
1104
28.8k
  char *dst, *dollar;
1105
1106
28.8k
  if (!ident || (src && !count_ident(src, len)))
1107
28.8k
    return 0;
1108
1109
0
  if (!buf)
1110
0
    return 1;
1111
1112
  /* only grow if not in place */
1113
0
  if (strbuf_avail(buf) + buf->len < len)
1114
0
    strbuf_grow(buf, len - buf->len);
1115
0
  dst = buf->buf;
1116
0
  for (;;) {
1117
0
    dollar = memchr(src, '$', len);
1118
0
    if (!dollar)
1119
0
      break;
1120
0
    memmove(dst, src, dollar + 1 - src);
1121
0
    dst += dollar + 1 - src;
1122
0
    len -= dollar + 1 - src;
1123
0
    src  = dollar + 1;
1124
1125
0
    if (len > 3 && !memcmp(src, "Id:", 3)) {
1126
0
      dollar = memchr(src + 3, '$', len - 3);
1127
0
      if (!dollar)
1128
0
        break;
1129
0
      if (memchr(src + 3, '\n', dollar - src - 3)) {
1130
        /* Line break before the next dollar. */
1131
0
        continue;
1132
0
      }
1133
1134
0
      memcpy(dst, "Id$", 3);
1135
0
      dst += 3;
1136
0
      len -= dollar + 1 - src;
1137
0
      src  = dollar + 1;
1138
0
    }
1139
0
  }
1140
0
  memmove(dst, src, len);
1141
0
  strbuf_setlen(buf, dst + len - buf->buf);
1142
0
  return 1;
1143
0
}
1144
1145
static int ident_to_worktree(const char *src, size_t len,
1146
           struct strbuf *buf, int ident)
1147
0
{
1148
0
  struct object_id oid;
1149
0
  char *to_free = NULL, *dollar, *spc;
1150
0
  int cnt;
1151
1152
0
  if (!ident)
1153
0
    return 0;
1154
1155
0
  cnt = count_ident(src, len);
1156
0
  if (!cnt)
1157
0
    return 0;
1158
1159
  /* are we "faking" in place editing ? */
1160
0
  if (src == buf->buf)
1161
0
    to_free = strbuf_detach(buf, NULL);
1162
0
  hash_object_file(the_hash_algo, src, len, OBJ_BLOB, &oid);
1163
1164
0
  strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
1165
0
  for (;;) {
1166
    /* step 1: run to the next '$' */
1167
0
    dollar = memchr(src, '$', len);
1168
0
    if (!dollar)
1169
0
      break;
1170
0
    strbuf_add(buf, src, dollar + 1 - src);
1171
0
    len -= dollar + 1 - src;
1172
0
    src  = dollar + 1;
1173
1174
    /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
1175
0
    if (len < 3 || memcmp("Id", src, 2))
1176
0
      continue;
1177
1178
    /* step 3: skip over Id$ or Id:xxxxx$ */
1179
0
    if (src[2] == '$') {
1180
0
      src += 3;
1181
0
      len -= 3;
1182
0
    } else if (src[2] == ':') {
1183
      /*
1184
       * It's possible that an expanded Id has crept its way into the
1185
       * repository, we cope with that by stripping the expansion out.
1186
       * This is probably not a good idea, since it will cause changes
1187
       * on checkout, which won't go away by stash, but let's keep it
1188
       * for git-style ids.
1189
       */
1190
0
      dollar = memchr(src + 3, '$', len - 3);
1191
0
      if (!dollar) {
1192
        /* incomplete keyword, no more '$', so just quit the loop */
1193
0
        break;
1194
0
      }
1195
1196
0
      if (memchr(src + 3, '\n', dollar - src - 3)) {
1197
        /* Line break before the next dollar. */
1198
0
        continue;
1199
0
      }
1200
1201
0
      spc = memchr(src + 4, ' ', dollar - src - 4);
1202
0
      if (spc && spc < dollar-1) {
1203
        /* There are spaces in unexpected places.
1204
         * This is probably an id from some other
1205
         * versioning system. Keep it for now.
1206
         */
1207
0
        continue;
1208
0
      }
1209
1210
0
      len -= dollar + 1 - src;
1211
0
      src  = dollar + 1;
1212
0
    } else {
1213
      /* it wasn't a "Id$" or "Id:xxxx$" */
1214
0
      continue;
1215
0
    }
1216
1217
    /* step 4: substitute */
1218
0
    strbuf_addstr(buf, "Id: ");
1219
0
    strbuf_addstr(buf, oid_to_hex(&oid));
1220
0
    strbuf_addstr(buf, " $");
1221
0
  }
1222
0
  strbuf_add(buf, src, len);
1223
1224
0
  free(to_free);
1225
0
  return 1;
1226
0
}
1227
1228
static const char *git_path_check_encoding(struct attr_check_item *check)
1229
40.2k
{
1230
40.2k
  const char *value = check->value;
1231
1232
40.2k
  if (ATTR_UNSET(value) || !strlen(value))
1233
40.2k
    return NULL;
1234
1235
0
  if (ATTR_TRUE(value) || ATTR_FALSE(value)) {
1236
0
    die(_("true/false are no valid working-tree-encodings"));
1237
0
  }
1238
1239
  /* Don't encode to the default encoding */
1240
0
  if (same_encoding(value, default_encoding))
1241
0
    return NULL;
1242
1243
0
  return value;
1244
0
}
1245
1246
static enum convert_crlf_action git_path_check_crlf(struct attr_check_item *check)
1247
80.4k
{
1248
80.4k
  const char *value = check->value;
1249
1250
80.4k
  if (ATTR_TRUE(value))
1251
0
    return CRLF_TEXT;
1252
80.4k
  else if (ATTR_FALSE(value))
1253
0
    return CRLF_BINARY;
1254
80.4k
  else if (ATTR_UNSET(value))
1255
80.4k
    ;
1256
0
  else if (!strcmp(value, "input"))
1257
0
    return CRLF_TEXT_INPUT;
1258
0
  else if (!strcmp(value, "auto"))
1259
0
    return CRLF_AUTO;
1260
80.4k
  return CRLF_UNDEFINED;
1261
80.4k
}
1262
1263
static enum eol git_path_check_eol(struct attr_check_item *check)
1264
40.2k
{
1265
40.2k
  const char *value = check->value;
1266
1267
40.2k
  if (ATTR_UNSET(value))
1268
40.2k
    ;
1269
0
  else if (!strcmp(value, "lf"))
1270
0
    return EOL_LF;
1271
0
  else if (!strcmp(value, "crlf"))
1272
0
    return EOL_CRLF;
1273
40.2k
  return EOL_UNSET;
1274
40.2k
}
1275
1276
static struct convert_driver *git_path_check_convert(struct attr_check_item *check)
1277
40.2k
{
1278
40.2k
  const char *value = check->value;
1279
40.2k
  struct convert_driver *drv;
1280
1281
40.2k
  if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
1282
40.2k
    return NULL;
1283
0
  for (drv = user_convert; drv; drv = drv->next)
1284
0
    if (!strcmp(value, drv->name))
1285
0
      return drv;
1286
0
  return NULL;
1287
0
}
1288
1289
static int git_path_check_ident(struct attr_check_item *check)
1290
40.2k
{
1291
40.2k
  const char *value = check->value;
1292
1293
40.2k
  return !!ATTR_TRUE(value);
1294
40.2k
}
1295
1296
static struct attr_check *check;
1297
1298
void convert_attrs(struct index_state *istate,
1299
       struct conv_attrs *ca, const char *path)
1300
40.2k
{
1301
40.2k
  struct attr_check_item *ccheck = NULL;
1302
1303
40.2k
  if (!check) {
1304
1
    check = attr_check_initl("crlf", "ident", "filter",
1305
1
           "eol", "text", "working-tree-encoding",
1306
1
           NULL);
1307
1
    user_convert_tail = &user_convert;
1308
1
    git_config(read_convert_config, NULL);
1309
1
  }
1310
1311
40.2k
  git_check_attr(istate, NULL, path, check);
1312
40.2k
  ccheck = check->items;
1313
40.2k
  ca->crlf_action = git_path_check_crlf(ccheck + 4);
1314
40.2k
  if (ca->crlf_action == CRLF_UNDEFINED)
1315
40.2k
    ca->crlf_action = git_path_check_crlf(ccheck + 0);
1316
40.2k
  ca->ident = git_path_check_ident(ccheck + 1);
1317
40.2k
  ca->drv = git_path_check_convert(ccheck + 2);
1318
40.2k
  if (ca->crlf_action != CRLF_BINARY) {
1319
40.2k
    enum eol eol_attr = git_path_check_eol(ccheck + 3);
1320
40.2k
    if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_LF)
1321
0
      ca->crlf_action = CRLF_AUTO_INPUT;
1322
40.2k
    else if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_CRLF)
1323
0
      ca->crlf_action = CRLF_AUTO_CRLF;
1324
40.2k
    else if (eol_attr == EOL_LF)
1325
0
      ca->crlf_action = CRLF_TEXT_INPUT;
1326
40.2k
    else if (eol_attr == EOL_CRLF)
1327
0
      ca->crlf_action = CRLF_TEXT_CRLF;
1328
40.2k
  }
1329
40.2k
  ca->working_tree_encoding = git_path_check_encoding(ccheck + 5);
1330
1331
  /* Save attr and make a decision for action */
1332
40.2k
  ca->attr_action = ca->crlf_action;
1333
40.2k
  if (ca->crlf_action == CRLF_TEXT)
1334
0
    ca->crlf_action = text_eol_is_crlf() ? CRLF_TEXT_CRLF : CRLF_TEXT_INPUT;
1335
40.2k
  if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_FALSE)
1336
40.2k
    ca->crlf_action = CRLF_BINARY;
1337
40.2k
  if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_TRUE)
1338
0
    ca->crlf_action = CRLF_AUTO_CRLF;
1339
40.2k
  if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_INPUT)
1340
0
    ca->crlf_action = CRLF_AUTO_INPUT;
1341
40.2k
}
1342
1343
void reset_parsed_attributes(void)
1344
0
{
1345
0
  struct convert_driver *drv, *next;
1346
1347
0
  attr_check_free(check);
1348
0
  check = NULL;
1349
0
  reset_merge_attributes();
1350
1351
0
  for (drv = user_convert; drv; drv = next) {
1352
0
    next = drv->next;
1353
0
    free((void *)drv->name);
1354
0
    free(drv);
1355
0
  }
1356
0
  user_convert = NULL;
1357
0
  user_convert_tail = NULL;
1358
0
}
1359
1360
int would_convert_to_git_filter_fd(struct index_state *istate, const char *path)
1361
11.3k
{
1362
11.3k
  struct conv_attrs ca;
1363
1364
11.3k
  convert_attrs(istate, &ca, path);
1365
11.3k
  if (!ca.drv)
1366
11.3k
    return 0;
1367
1368
  /*
1369
   * Apply a filter to an fd only if the filter is required to succeed.
1370
   * We must die if the filter fails, because the original data before
1371
   * filtering is not available.
1372
   */
1373
0
  if (!ca.drv->required)
1374
0
    return 0;
1375
1376
0
  return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL, NULL);
1377
0
}
1378
1379
const char *get_convert_attr_ascii(struct index_state *istate, const char *path)
1380
0
{
1381
0
  struct conv_attrs ca;
1382
1383
0
  convert_attrs(istate, &ca, path);
1384
0
  switch (ca.attr_action) {
1385
0
  case CRLF_UNDEFINED:
1386
0
    return "";
1387
0
  case CRLF_BINARY:
1388
0
    return "-text";
1389
0
  case CRLF_TEXT:
1390
0
    return "text";
1391
0
  case CRLF_TEXT_INPUT:
1392
0
    return "text eol=lf";
1393
0
  case CRLF_TEXT_CRLF:
1394
0
    return "text eol=crlf";
1395
0
  case CRLF_AUTO:
1396
0
    return "text=auto";
1397
0
  case CRLF_AUTO_CRLF:
1398
0
    return "text=auto eol=crlf";
1399
0
  case CRLF_AUTO_INPUT:
1400
0
    return "text=auto eol=lf";
1401
0
  }
1402
0
  return "";
1403
0
}
1404
1405
int convert_to_git(struct index_state *istate,
1406
       const char *path, const char *src, size_t len,
1407
       struct strbuf *dst, int conv_flags)
1408
28.8k
{
1409
28.8k
  int ret = 0;
1410
28.8k
  struct conv_attrs ca;
1411
1412
28.8k
  convert_attrs(istate, &ca, path);
1413
1414
28.8k
  ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN, NULL, NULL);
1415
28.8k
  if (!ret && ca.drv && ca.drv->required)
1416
0
    die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
1417
1418
28.8k
  if (ret && dst) {
1419
0
    src = dst->buf;
1420
0
    len = dst->len;
1421
0
  }
1422
1423
28.8k
  ret |= encode_to_git(path, src, len, dst, ca.working_tree_encoding, conv_flags);
1424
28.8k
  if (ret && dst) {
1425
0
    src = dst->buf;
1426
0
    len = dst->len;
1427
0
  }
1428
1429
28.8k
  if (!(conv_flags & CONV_EOL_KEEP_CRLF)) {
1430
28.8k
    ret |= crlf_to_git(istate, path, src, len, dst, ca.crlf_action, conv_flags);
1431
28.8k
    if (ret && dst) {
1432
0
      src = dst->buf;
1433
0
      len = dst->len;
1434
0
    }
1435
28.8k
  }
1436
28.8k
  return ret | ident_to_git(src, len, dst, ca.ident);
1437
28.8k
}
1438
1439
void convert_to_git_filter_fd(struct index_state *istate,
1440
            const char *path, int fd, struct strbuf *dst,
1441
            int conv_flags)
1442
0
{
1443
0
  struct conv_attrs ca;
1444
0
  convert_attrs(istate, &ca, path);
1445
1446
0
  assert(ca.drv);
1447
1448
0
  if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL, NULL))
1449
0
    die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
1450
1451
0
  encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
1452
0
  crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
1453
0
  ident_to_git(dst->buf, dst->len, dst, ca.ident);
1454
0
}
1455
1456
static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
1457
                 const char *path, const char *src,
1458
                 size_t len, struct strbuf *dst,
1459
                 int normalizing,
1460
                 const struct checkout_metadata *meta,
1461
                 struct delayed_checkout *dco)
1462
0
{
1463
0
  int ret = 0, ret_filter = 0;
1464
1465
0
  ret |= ident_to_worktree(src, len, dst, ca->ident);
1466
0
  if (ret) {
1467
0
    src = dst->buf;
1468
0
    len = dst->len;
1469
0
  }
1470
  /*
1471
   * CRLF conversion can be skipped if normalizing, unless there
1472
   * is a smudge or process filter (even if the process filter doesn't
1473
   * support smudge).  The filters might expect CRLFs.
1474
   */
1475
0
  if ((ca->drv && (ca->drv->smudge || ca->drv->process)) || !normalizing) {
1476
0
    ret |= crlf_to_worktree(src, len, dst, ca->crlf_action);
1477
0
    if (ret) {
1478
0
      src = dst->buf;
1479
0
      len = dst->len;
1480
0
    }
1481
0
  }
1482
1483
0
  ret |= encode_to_worktree(path, src, len, dst, ca->working_tree_encoding);
1484
0
  if (ret) {
1485
0
    src = dst->buf;
1486
0
    len = dst->len;
1487
0
  }
1488
1489
0
  ret_filter = apply_filter(
1490
0
    path, src, len, -1, dst, ca->drv, CAP_SMUDGE, meta, dco);
1491
0
  if (!ret_filter && ca->drv && ca->drv->required)
1492
0
    die(_("%s: smudge filter %s failed"), path, ca->drv->name);
1493
1494
0
  return ret | ret_filter;
1495
0
}
1496
1497
int async_convert_to_working_tree_ca(const struct conv_attrs *ca,
1498
             const char *path, const char *src,
1499
             size_t len, struct strbuf *dst,
1500
             const struct checkout_metadata *meta,
1501
             void *dco)
1502
0
{
1503
0
  return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0,
1504
0
               meta, dco);
1505
0
}
1506
1507
int convert_to_working_tree_ca(const struct conv_attrs *ca,
1508
             const char *path, const char *src,
1509
             size_t len, struct strbuf *dst,
1510
             const struct checkout_metadata *meta)
1511
0
{
1512
0
  return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0,
1513
0
               meta, NULL);
1514
0
}
1515
1516
int renormalize_buffer(struct index_state *istate, const char *path,
1517
           const char *src, size_t len, struct strbuf *dst)
1518
0
{
1519
0
  struct conv_attrs ca;
1520
0
  int ret;
1521
1522
0
  convert_attrs(istate, &ca, path);
1523
0
  ret = convert_to_working_tree_ca_internal(&ca, path, src, len, dst, 1,
1524
0
              NULL, NULL);
1525
0
  if (ret) {
1526
0
    src = dst->buf;
1527
0
    len = dst->len;
1528
0
  }
1529
0
  return ret | convert_to_git(istate, path, src, len, dst, CONV_EOL_RENORMALIZE);
1530
0
}
1531
1532
/*****************************************************************
1533
 *
1534
 * Streaming conversion support
1535
 *
1536
 *****************************************************************/
1537
1538
typedef int (*filter_fn)(struct stream_filter *,
1539
       const char *input, size_t *isize_p,
1540
       char *output, size_t *osize_p);
1541
typedef void (*free_fn)(struct stream_filter *);
1542
1543
struct stream_filter_vtbl {
1544
  filter_fn filter;
1545
  free_fn free;
1546
};
1547
1548
struct stream_filter {
1549
  struct stream_filter_vtbl *vtbl;
1550
};
1551
1552
static int null_filter_fn(struct stream_filter *filter UNUSED,
1553
        const char *input, size_t *isize_p,
1554
        char *output, size_t *osize_p)
1555
0
{
1556
0
  size_t count;
1557
1558
0
  if (!input)
1559
0
    return 0; /* we do not keep any states */
1560
0
  count = *isize_p;
1561
0
  if (*osize_p < count)
1562
0
    count = *osize_p;
1563
0
  if (count) {
1564
0
    memmove(output, input, count);
1565
0
    *isize_p -= count;
1566
0
    *osize_p -= count;
1567
0
  }
1568
0
  return 0;
1569
0
}
1570
1571
static void null_free_fn(struct stream_filter *filter UNUSED)
1572
0
{
1573
0
  ; /* nothing -- null instances are shared */
1574
0
}
1575
1576
static struct stream_filter_vtbl null_vtbl = {
1577
  .filter = null_filter_fn,
1578
  .free = null_free_fn,
1579
};
1580
1581
static struct stream_filter null_filter_singleton = {
1582
  .vtbl = &null_vtbl,
1583
};
1584
1585
int is_null_stream_filter(struct stream_filter *filter)
1586
0
{
1587
0
  return filter == &null_filter_singleton;
1588
0
}
1589
1590
1591
/*
1592
 * LF-to-CRLF filter
1593
 */
1594
1595
struct lf_to_crlf_filter {
1596
  struct stream_filter filter;
1597
  unsigned has_held:1;
1598
  char held;
1599
};
1600
1601
static int lf_to_crlf_filter_fn(struct stream_filter *filter,
1602
        const char *input, size_t *isize_p,
1603
        char *output, size_t *osize_p)
1604
0
{
1605
0
  size_t count, o = 0;
1606
0
  struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter;
1607
1608
  /*
1609
   * We may be holding onto the CR to see if it is followed by a
1610
   * LF, in which case we would need to go to the main loop.
1611
   * Otherwise, just emit it to the output stream.
1612
   */
1613
0
  if (lf_to_crlf->has_held && (lf_to_crlf->held != '\r' || !input)) {
1614
0
    output[o++] = lf_to_crlf->held;
1615
0
    lf_to_crlf->has_held = 0;
1616
0
  }
1617
1618
  /* We are told to drain */
1619
0
  if (!input) {
1620
0
    *osize_p -= o;
1621
0
    return 0;
1622
0
  }
1623
1624
0
  count = *isize_p;
1625
0
  if (count || lf_to_crlf->has_held) {
1626
0
    size_t i;
1627
0
    int was_cr = 0;
1628
1629
0
    if (lf_to_crlf->has_held) {
1630
0
      was_cr = 1;
1631
0
      lf_to_crlf->has_held = 0;
1632
0
    }
1633
1634
0
    for (i = 0; o < *osize_p && i < count; i++) {
1635
0
      char ch = input[i];
1636
1637
0
      if (ch == '\n') {
1638
0
        output[o++] = '\r';
1639
0
      } else if (was_cr) {
1640
        /*
1641
         * Previous round saw CR and it is not followed
1642
         * by a LF; emit the CR before processing the
1643
         * current character.
1644
         */
1645
0
        output[o++] = '\r';
1646
0
      }
1647
1648
      /*
1649
       * We may have consumed the last output slot,
1650
       * in which case we need to break out of this
1651
       * loop; hold the current character before
1652
       * returning.
1653
       */
1654
0
      if (*osize_p <= o) {
1655
0
        lf_to_crlf->has_held = 1;
1656
0
        lf_to_crlf->held = ch;
1657
0
        continue; /* break but increment i */
1658
0
      }
1659
1660
0
      if (ch == '\r') {
1661
0
        was_cr = 1;
1662
0
        continue;
1663
0
      }
1664
1665
0
      was_cr = 0;
1666
0
      output[o++] = ch;
1667
0
    }
1668
1669
0
    *osize_p -= o;
1670
0
    *isize_p -= i;
1671
1672
0
    if (!lf_to_crlf->has_held && was_cr) {
1673
0
      lf_to_crlf->has_held = 1;
1674
0
      lf_to_crlf->held = '\r';
1675
0
    }
1676
0
  }
1677
0
  return 0;
1678
0
}
1679
1680
static void lf_to_crlf_free_fn(struct stream_filter *filter)
1681
0
{
1682
0
  free(filter);
1683
0
}
1684
1685
static struct stream_filter_vtbl lf_to_crlf_vtbl = {
1686
  .filter = lf_to_crlf_filter_fn,
1687
  .free = lf_to_crlf_free_fn,
1688
};
1689
1690
static struct stream_filter *lf_to_crlf_filter(void)
1691
0
{
1692
0
  struct lf_to_crlf_filter *lf_to_crlf = xcalloc(1, sizeof(*lf_to_crlf));
1693
1694
0
  lf_to_crlf->filter.vtbl = &lf_to_crlf_vtbl;
1695
0
  return (struct stream_filter *)lf_to_crlf;
1696
0
}
1697
1698
/*
1699
 * Cascade filter
1700
 */
1701
#define FILTER_BUFFER 1024
1702
struct cascade_filter {
1703
  struct stream_filter filter;
1704
  struct stream_filter *one;
1705
  struct stream_filter *two;
1706
  char buf[FILTER_BUFFER];
1707
  int end, ptr;
1708
};
1709
1710
static int cascade_filter_fn(struct stream_filter *filter,
1711
           const char *input, size_t *isize_p,
1712
           char *output, size_t *osize_p)
1713
0
{
1714
0
  struct cascade_filter *cas = (struct cascade_filter *) filter;
1715
0
  size_t filled = 0;
1716
0
  size_t sz = *osize_p;
1717
0
  size_t to_feed, remaining;
1718
1719
  /*
1720
   * input -- (one) --> buf -- (two) --> output
1721
   */
1722
0
  while (filled < sz) {
1723
0
    remaining = sz - filled;
1724
1725
    /* do we already have something to feed two with? */
1726
0
    if (cas->ptr < cas->end) {
1727
0
      to_feed = cas->end - cas->ptr;
1728
0
      if (stream_filter(cas->two,
1729
0
            cas->buf + cas->ptr, &to_feed,
1730
0
            output + filled, &remaining))
1731
0
        return -1;
1732
0
      cas->ptr += (cas->end - cas->ptr) - to_feed;
1733
0
      filled = sz - remaining;
1734
0
      continue;
1735
0
    }
1736
1737
    /* feed one from upstream and have it emit into our buffer */
1738
0
    to_feed = input ? *isize_p : 0;
1739
0
    if (input && !to_feed)
1740
0
      break;
1741
0
    remaining = sizeof(cas->buf);
1742
0
    if (stream_filter(cas->one,
1743
0
          input, &to_feed,
1744
0
          cas->buf, &remaining))
1745
0
      return -1;
1746
0
    cas->end = sizeof(cas->buf) - remaining;
1747
0
    cas->ptr = 0;
1748
0
    if (input) {
1749
0
      size_t fed = *isize_p - to_feed;
1750
0
      *isize_p -= fed;
1751
0
      input += fed;
1752
0
    }
1753
1754
    /* do we know that we drained one completely? */
1755
0
    if (input || cas->end)
1756
0
      continue;
1757
1758
    /* tell two to drain; we have nothing more to give it */
1759
0
    to_feed = 0;
1760
0
    remaining = sz - filled;
1761
0
    if (stream_filter(cas->two,
1762
0
          NULL, &to_feed,
1763
0
          output + filled, &remaining))
1764
0
      return -1;
1765
0
    if (remaining == (sz - filled))
1766
0
      break; /* completely drained two */
1767
0
    filled = sz - remaining;
1768
0
  }
1769
0
  *osize_p -= filled;
1770
0
  return 0;
1771
0
}
1772
1773
static void cascade_free_fn(struct stream_filter *filter)
1774
0
{
1775
0
  struct cascade_filter *cas = (struct cascade_filter *)filter;
1776
0
  free_stream_filter(cas->one);
1777
0
  free_stream_filter(cas->two);
1778
0
  free(filter);
1779
0
}
1780
1781
static struct stream_filter_vtbl cascade_vtbl = {
1782
  .filter = cascade_filter_fn,
1783
  .free = cascade_free_fn,
1784
};
1785
1786
static struct stream_filter *cascade_filter(struct stream_filter *one,
1787
              struct stream_filter *two)
1788
0
{
1789
0
  struct cascade_filter *cascade;
1790
1791
0
  if (!one || is_null_stream_filter(one))
1792
0
    return two;
1793
0
  if (!two || is_null_stream_filter(two))
1794
0
    return one;
1795
1796
0
  cascade = xmalloc(sizeof(*cascade));
1797
0
  cascade->one = one;
1798
0
  cascade->two = two;
1799
0
  cascade->end = cascade->ptr = 0;
1800
0
  cascade->filter.vtbl = &cascade_vtbl;
1801
0
  return (struct stream_filter *)cascade;
1802
0
}
1803
1804
/*
1805
 * ident filter
1806
 */
1807
0
#define IDENT_DRAINING (-1)
1808
0
#define IDENT_SKIPPING (-2)
1809
struct ident_filter {
1810
  struct stream_filter filter;
1811
  struct strbuf left;
1812
  int state;
1813
  char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
1814
};
1815
1816
static int is_foreign_ident(const char *str)
1817
0
{
1818
0
  int i;
1819
1820
0
  if (!skip_prefix(str, "$Id: ", &str))
1821
0
    return 0;
1822
0
  for (i = 0; str[i]; i++) {
1823
0
    if (isspace(str[i]) && str[i+1] != '$')
1824
0
      return 1;
1825
0
  }
1826
0
  return 0;
1827
0
}
1828
1829
static void ident_drain(struct ident_filter *ident, char **output_p, size_t *osize_p)
1830
0
{
1831
0
  size_t to_drain = ident->left.len;
1832
1833
0
  if (*osize_p < to_drain)
1834
0
    to_drain = *osize_p;
1835
0
  if (to_drain) {
1836
0
    memcpy(*output_p, ident->left.buf, to_drain);
1837
0
    strbuf_remove(&ident->left, 0, to_drain);
1838
0
    *output_p += to_drain;
1839
0
    *osize_p -= to_drain;
1840
0
  }
1841
0
  if (!ident->left.len)
1842
0
    ident->state = 0;
1843
0
}
1844
1845
static int ident_filter_fn(struct stream_filter *filter,
1846
         const char *input, size_t *isize_p,
1847
         char *output, size_t *osize_p)
1848
0
{
1849
0
  struct ident_filter *ident = (struct ident_filter *)filter;
1850
0
  static const char head[] = "$Id";
1851
1852
0
  if (!input) {
1853
    /* drain upon eof */
1854
0
    switch (ident->state) {
1855
0
    default:
1856
0
      strbuf_add(&ident->left, head, ident->state);
1857
      /* fallthrough */
1858
0
    case IDENT_SKIPPING:
1859
      /* fallthrough */
1860
0
    case IDENT_DRAINING:
1861
0
      ident_drain(ident, &output, osize_p);
1862
0
    }
1863
0
    return 0;
1864
0
  }
1865
1866
0
  while (*isize_p || (ident->state == IDENT_DRAINING)) {
1867
0
    int ch;
1868
1869
0
    if (ident->state == IDENT_DRAINING) {
1870
0
      ident_drain(ident, &output, osize_p);
1871
0
      if (!*osize_p)
1872
0
        break;
1873
0
      continue;
1874
0
    }
1875
1876
0
    ch = *(input++);
1877
0
    (*isize_p)--;
1878
1879
0
    if (ident->state == IDENT_SKIPPING) {
1880
      /*
1881
       * Skipping until '$' or LF, but keeping them
1882
       * in case it is a foreign ident.
1883
       */
1884
0
      strbuf_addch(&ident->left, ch);
1885
0
      if (ch != '\n' && ch != '$')
1886
0
        continue;
1887
0
      if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
1888
0
        strbuf_setlen(&ident->left, sizeof(head) - 1);
1889
0
        strbuf_addstr(&ident->left, ident->ident);
1890
0
      }
1891
0
      ident->state = IDENT_DRAINING;
1892
0
      continue;
1893
0
    }
1894
1895
0
    if (ident->state < sizeof(head) &&
1896
0
        head[ident->state] == ch) {
1897
0
      ident->state++;
1898
0
      continue;
1899
0
    }
1900
1901
0
    if (ident->state)
1902
0
      strbuf_add(&ident->left, head, ident->state);
1903
0
    if (ident->state == sizeof(head) - 1) {
1904
0
      if (ch != ':' && ch != '$') {
1905
0
        strbuf_addch(&ident->left, ch);
1906
0
        ident->state = 0;
1907
0
        continue;
1908
0
      }
1909
1910
0
      if (ch == ':') {
1911
0
        strbuf_addch(&ident->left, ch);
1912
0
        ident->state = IDENT_SKIPPING;
1913
0
      } else {
1914
0
        strbuf_addstr(&ident->left, ident->ident);
1915
0
        ident->state = IDENT_DRAINING;
1916
0
      }
1917
0
      continue;
1918
0
    }
1919
1920
0
    strbuf_addch(&ident->left, ch);
1921
0
    ident->state = IDENT_DRAINING;
1922
0
  }
1923
0
  return 0;
1924
0
}
1925
1926
static void ident_free_fn(struct stream_filter *filter)
1927
0
{
1928
0
  struct ident_filter *ident = (struct ident_filter *)filter;
1929
0
  strbuf_release(&ident->left);
1930
0
  free(filter);
1931
0
}
1932
1933
static struct stream_filter_vtbl ident_vtbl = {
1934
  .filter = ident_filter_fn,
1935
  .free = ident_free_fn,
1936
};
1937
1938
static struct stream_filter *ident_filter(const struct object_id *oid)
1939
0
{
1940
0
  struct ident_filter *ident = xmalloc(sizeof(*ident));
1941
1942
0
  xsnprintf(ident->ident, sizeof(ident->ident),
1943
0
      ": %s $", oid_to_hex(oid));
1944
0
  strbuf_init(&ident->left, 0);
1945
0
  ident->filter.vtbl = &ident_vtbl;
1946
0
  ident->state = 0;
1947
0
  return (struct stream_filter *)ident;
1948
0
}
1949
1950
/*
1951
 * Return an appropriately constructed filter for the given ca, or NULL if
1952
 * the contents cannot be filtered without reading the whole thing
1953
 * in-core.
1954
 *
1955
 * Note that you would be crazy to set CRLF, smudge/clean or ident to a
1956
 * large binary blob you would want us not to slurp into the memory!
1957
 */
1958
struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,
1959
             const struct object_id *oid)
1960
0
{
1961
0
  struct stream_filter *filter = NULL;
1962
1963
0
  if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)
1964
0
    return NULL;
1965
1966
0
  if (ca->ident)
1967
0
    filter = ident_filter(oid);
1968
1969
0
  if (output_eol(ca->crlf_action) == EOL_CRLF)
1970
0
    filter = cascade_filter(filter, lf_to_crlf_filter());
1971
0
  else
1972
0
    filter = cascade_filter(filter, &null_filter_singleton);
1973
1974
0
  return filter;
1975
0
}
1976
1977
struct stream_filter *get_stream_filter(struct index_state *istate,
1978
          const char *path,
1979
          const struct object_id *oid)
1980
0
{
1981
0
  struct conv_attrs ca;
1982
0
  convert_attrs(istate, &ca, path);
1983
0
  return get_stream_filter_ca(&ca, oid);
1984
0
}
1985
1986
void free_stream_filter(struct stream_filter *filter)
1987
0
{
1988
0
  filter->vtbl->free(filter);
1989
0
}
1990
1991
int stream_filter(struct stream_filter *filter,
1992
      const char *input, size_t *isize_p,
1993
      char *output, size_t *osize_p)
1994
0
{
1995
0
  return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
1996
0
}
1997
1998
void init_checkout_metadata(struct checkout_metadata *meta, const char *refname,
1999
          const struct object_id *treeish,
2000
          const struct object_id *blob)
2001
0
{
2002
0
  memset(meta, 0, sizeof(*meta));
2003
0
  if (refname)
2004
0
    meta->refname = refname;
2005
0
  if (treeish)
2006
0
    oidcpy(&meta->treeish, treeish);
2007
0
  if (blob)
2008
0
    oidcpy(&meta->blob, blob);
2009
0
}
2010
2011
void clone_checkout_metadata(struct checkout_metadata *dst,
2012
           const struct checkout_metadata *src,
2013
           const struct object_id *blob)
2014
9.68k
{
2015
9.68k
  memcpy(dst, src, sizeof(*dst));
2016
9.68k
  if (blob)
2017
0
    oidcpy(&dst->blob, blob);
2018
9.68k
}
2019
2020
enum conv_attrs_classification classify_conv_attrs(const struct conv_attrs *ca)
2021
0
{
2022
0
  if (ca->drv) {
2023
0
    if (ca->drv->process)
2024
0
      return CA_CLASS_INCORE_PROCESS;
2025
0
    if (ca->drv->smudge || ca->drv->clean)
2026
0
      return CA_CLASS_INCORE_FILTER;
2027
0
  }
2028
2029
0
  if (ca->working_tree_encoding)
2030
0
    return CA_CLASS_INCORE;
2031
2032
0
  if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF)
2033
0
    return CA_CLASS_INCORE;
2034
2035
0
  return CA_CLASS_STREAMABLE;
2036
0
}