Coverage Report

Created: 2024-09-08 06:23

/src/git/mailinfo.c
Line
Count
Source (jump to first uncovered line)
1
#define USE_THE_REPOSITORY_VARIABLE
2
3
#include "git-compat-util.h"
4
#include "config.h"
5
#include "gettext.h"
6
#include "hex-ll.h"
7
#include "utf8.h"
8
#include "strbuf.h"
9
#include "mailinfo.h"
10
11
static void cleanup_space(struct strbuf *sb)
12
0
{
13
0
  size_t pos, cnt;
14
0
  for (pos = 0; pos < sb->len; pos++) {
15
0
    if (isspace(sb->buf[pos])) {
16
0
      sb->buf[pos] = ' ';
17
0
      for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
18
0
      strbuf_remove(sb, pos + 1, cnt);
19
0
    }
20
0
  }
21
0
}
22
23
static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
24
0
{
25
0
  struct strbuf *src = name;
26
0
  if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
27
0
    src = email;
28
0
  else if (name == out)
29
0
    return;
30
0
  strbuf_reset(out);
31
0
  strbuf_addbuf(out, src);
32
0
}
33
34
static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
35
0
{
36
  /* John Doe <johndoe> */
37
38
0
  char *bra, *ket;
39
  /* This is fallback, so do not bother if we already have an
40
   * e-mail address.
41
   */
42
0
  if (mi->email.len)
43
0
    return;
44
45
0
  bra = strchr(line->buf, '<');
46
0
  if (!bra)
47
0
    return;
48
0
  ket = strchr(bra, '>');
49
0
  if (!ket)
50
0
    return;
51
52
0
  strbuf_reset(&mi->email);
53
0
  strbuf_add(&mi->email, bra + 1, ket - bra - 1);
54
55
0
  strbuf_reset(&mi->name);
56
0
  strbuf_add(&mi->name, line->buf, bra - line->buf);
57
0
  strbuf_trim(&mi->name);
58
0
  get_sane_name(&mi->name, &mi->name, &mi->email);
59
0
}
60
61
static const char *unquote_comment(struct strbuf *outbuf, const char *in)
62
0
{
63
0
  int take_next_literally = 0;
64
0
  int depth = 1;
65
66
0
  strbuf_addch(outbuf, '(');
67
68
0
  while (*in) {
69
0
    int c = *in++;
70
0
    if (take_next_literally == 1) {
71
0
      take_next_literally = 0;
72
0
    } else {
73
0
      switch (c) {
74
0
      case '\\':
75
0
        take_next_literally = 1;
76
0
        continue;
77
0
      case '(':
78
0
        strbuf_addch(outbuf, '(');
79
0
        depth++;
80
0
        continue;
81
0
      case ')':
82
0
        strbuf_addch(outbuf, ')');
83
0
        if (!--depth)
84
0
          return in;
85
0
        continue;
86
0
      }
87
0
    }
88
89
0
    strbuf_addch(outbuf, c);
90
0
  }
91
92
0
  return in;
93
0
}
94
95
static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
96
0
{
97
0
  int take_next_literally = 0;
98
99
0
  while (*in) {
100
0
    int c = *in++;
101
0
    if (take_next_literally == 1) {
102
0
      take_next_literally = 0;
103
0
    } else {
104
0
      switch (c) {
105
0
      case '\\':
106
0
        take_next_literally = 1;
107
0
        continue;
108
0
      case '"':
109
0
        return in;
110
0
      }
111
0
    }
112
113
0
    strbuf_addch(outbuf, c);
114
0
  }
115
116
0
  return in;
117
0
}
118
119
static void unquote_quoted_pair(struct strbuf *line)
120
0
{
121
0
  struct strbuf outbuf;
122
0
  const char *in = line->buf;
123
0
  int c;
124
125
0
  strbuf_init(&outbuf, line->len);
126
127
0
  while ((c = *in++) != 0) {
128
0
    switch (c) {
129
0
    case '"':
130
0
      in = unquote_quoted_string(&outbuf, in);
131
0
      continue;
132
0
    case '(':
133
0
      in = unquote_comment(&outbuf, in);
134
0
      continue;
135
0
    }
136
137
0
    strbuf_addch(&outbuf, c);
138
0
  }
139
140
0
  strbuf_swap(&outbuf, line);
141
0
  strbuf_release(&outbuf);
142
143
0
}
144
145
static void handle_from(struct mailinfo *mi, const struct strbuf *from)
146
0
{
147
0
  char *at;
148
0
  size_t el;
149
0
  struct strbuf f;
150
151
0
  strbuf_init(&f, from->len);
152
0
  strbuf_addbuf(&f, from);
153
154
0
  unquote_quoted_pair(&f);
155
156
0
  at = strchr(f.buf, '@');
157
0
  if (!at) {
158
0
    parse_bogus_from(mi, from);
159
0
    goto out;
160
0
  }
161
162
  /*
163
   * If we already have one email, don't take any confusing lines
164
   */
165
0
  if (mi->email.len && strchr(at + 1, '@'))
166
0
    goto out;
167
168
  /* Pick up the string around '@', possibly delimited with <>
169
   * pair; that is the email part.
170
   */
171
0
  while (at > f.buf) {
172
0
    char c = at[-1];
173
0
    if (isspace(c))
174
0
      break;
175
0
    if (c == '<') {
176
0
      at[-1] = ' ';
177
0
      break;
178
0
    }
179
0
    at--;
180
0
  }
181
0
  el = strcspn(at, " \n\t\r\v\f>");
182
0
  strbuf_reset(&mi->email);
183
0
  strbuf_add(&mi->email, at, el);
184
0
  strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
185
186
  /* The remainder is name.  It could be
187
   *
188
   * - "John Doe <john.doe@xz>"     (a), or
189
   * - "john.doe@xz (John Doe)"     (b), or
190
   * - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
191
   *
192
   * but we have removed the email part, so
193
   *
194
   * - remove extra spaces which could stay after email (case 'c'), and
195
   * - trim from both ends, possibly removing the () pair at the end
196
   *   (cases 'a' and 'b').
197
   */
198
0
  cleanup_space(&f);
199
0
  strbuf_trim(&f);
200
0
  if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
201
0
    strbuf_remove(&f, 0, 1);
202
0
    strbuf_setlen(&f, f.len - 1);
203
0
  }
204
205
0
  get_sane_name(&mi->name, &f, &mi->email);
206
0
out:
207
0
  strbuf_release(&f);
208
0
}
209
210
static void handle_header(struct strbuf **out, const struct strbuf *line)
211
0
{
212
0
  if (!*out) {
213
0
    *out = xmalloc(sizeof(struct strbuf));
214
0
    strbuf_init(*out, line->len);
215
0
  } else
216
0
    strbuf_reset(*out);
217
218
0
  strbuf_addbuf(*out, line);
219
0
}
220
221
/* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
222
 * to have enough heuristics to grok MIME encoded patches often found
223
 * on our mailing lists.  For example, we do not even treat header lines
224
 * case insensitively.
225
 */
226
227
static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
228
0
{
229
0
  const char *ends, *ap = strcasestr(line, name);
230
0
  size_t sz;
231
232
0
  strbuf_setlen(attr, 0);
233
0
  if (!ap)
234
0
    return 0;
235
0
  ap += strlen(name);
236
0
  if (*ap == '"') {
237
0
    ap++;
238
0
    ends = "\"";
239
0
  }
240
0
  else
241
0
    ends = "; \t";
242
0
  sz = strcspn(ap, ends);
243
0
  strbuf_add(attr, ap, sz);
244
0
  return 1;
245
0
}
246
247
static int has_attr_value(const char *line, const char *name, const char *value)
248
0
{
249
0
  struct strbuf sb = STRBUF_INIT;
250
0
  int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
251
0
  strbuf_release(&sb);
252
0
  return rc;
253
0
}
254
255
static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
256
0
{
257
0
  struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
258
0
  strbuf_init(boundary, line->len);
259
260
0
  mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
261
0
  mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
262
263
0
  if (slurp_attr(line->buf, "boundary=", boundary)) {
264
0
    strbuf_insertstr(boundary, 0, "--");
265
0
    if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
266
0
      error("Too many boundaries to handle");
267
0
      mi->input_error = -1;
268
0
      mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
269
0
      return;
270
0
    }
271
0
    *(mi->content_top) = boundary;
272
0
    boundary = NULL;
273
0
  }
274
0
  slurp_attr(line->buf, "charset=", &mi->charset);
275
276
0
  if (boundary) {
277
0
    strbuf_release(boundary);
278
0
    free(boundary);
279
0
  }
280
0
}
281
282
static void handle_content_transfer_encoding(struct mailinfo *mi,
283
               const struct strbuf *line)
284
0
{
285
0
  if (strcasestr(line->buf, "base64"))
286
0
    mi->transfer_encoding = TE_BASE64;
287
0
  else if (strcasestr(line->buf, "quoted-printable"))
288
0
    mi->transfer_encoding = TE_QP;
289
0
  else
290
0
    mi->transfer_encoding = TE_DONTCARE;
291
0
}
292
293
static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
294
0
{
295
0
  struct strbuf *content_top = *(mi->content_top);
296
297
0
  return ((content_top->len <= line->len) &&
298
0
    !memcmp(line->buf, content_top->buf, content_top->len));
299
0
}
300
301
static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
302
0
{
303
0
  size_t at = 0;
304
305
0
  while (at < subject->len) {
306
0
    char *pos;
307
0
    size_t remove;
308
309
0
    switch (subject->buf[at]) {
310
0
    case 'r': case 'R':
311
0
      if (subject->len <= at + 3)
312
0
        break;
313
0
      if ((subject->buf[at + 1] == 'e' ||
314
0
           subject->buf[at + 1] == 'E') &&
315
0
          subject->buf[at + 2] == ':') {
316
0
        strbuf_remove(subject, at, 3);
317
0
        continue;
318
0
      }
319
0
      at++;
320
0
      break;
321
0
    case ' ': case '\t': case ':':
322
0
      strbuf_remove(subject, at, 1);
323
0
      continue;
324
0
    case '[':
325
0
      pos = strchr(subject->buf + at, ']');
326
0
      if (!pos)
327
0
        break;
328
0
      remove = pos - (subject->buf + at) + 1;
329
0
      if (!mi->keep_non_patch_brackets_in_subject ||
330
0
          (7 <= remove &&
331
0
           memmem(subject->buf + at, remove, "PATCH", 5)))
332
0
        strbuf_remove(subject, at, remove);
333
0
      else {
334
0
        at += remove;
335
        /*
336
         * If the input had a space after the ], keep
337
         * it.  We don't bother with finding the end of
338
         * the space, since we later normalize it
339
         * anyway.
340
         */
341
0
        if (isspace(subject->buf[at]))
342
0
          at += 1;
343
0
      }
344
0
      continue;
345
0
    }
346
0
    break;
347
0
  }
348
0
  strbuf_trim(subject);
349
0
}
350
351
static const char * const header[] = {
352
  "From", "Subject", "Date",
353
};
354
355
static inline int skip_header(const struct strbuf *line, const char *hdr,
356
            const char **outval)
357
0
{
358
0
  const char *val;
359
0
  if (!skip_iprefix(line->buf, hdr, &val) ||
360
0
      *val++ != ':')
361
0
    return 0;
362
0
  while (isspace(*val))
363
0
    val++;
364
0
  *outval = val;
365
0
  return 1;
366
0
}
367
368
static int is_format_patch_separator(const char *line, int len)
369
0
{
370
0
  static const char SAMPLE[] =
371
0
    "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
372
0
  const char *cp;
373
374
0
  if (len != strlen(SAMPLE))
375
0
    return 0;
376
0
  if (!skip_prefix(line, "From ", &cp))
377
0
    return 0;
378
0
  if (strspn(cp, "0123456789abcdef") != 40)
379
0
    return 0;
380
0
  cp += 40;
381
0
  return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
382
0
}
383
384
static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
385
0
{
386
0
  const char *in = q_seg->buf;
387
0
  int c;
388
0
  struct strbuf *out = xmalloc(sizeof(struct strbuf));
389
0
  strbuf_init(out, q_seg->len);
390
391
0
  while ((c = *in++) != 0) {
392
0
    if (c == '=') {
393
0
      int ch, d = *in;
394
0
      if (d == '\n' || !d)
395
0
        break; /* drop trailing newline */
396
0
      ch = hex2chr(in);
397
0
      if (ch >= 0) {
398
0
        strbuf_addch(out, ch);
399
0
        in += 2;
400
0
        continue;
401
0
      }
402
      /* garbage -- fall through */
403
0
    }
404
0
    if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
405
0
      c = 0x20;
406
0
    strbuf_addch(out, c);
407
0
  }
408
0
  return out;
409
0
}
410
411
static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
412
0
{
413
  /* Decode in..ep, possibly in-place to ot */
414
0
  int c, pos = 0, acc = 0;
415
0
  const char *in = b_seg->buf;
416
0
  struct strbuf *out = xmalloc(sizeof(struct strbuf));
417
0
  strbuf_init(out, b_seg->len);
418
419
0
  while ((c = *in++) != 0) {
420
0
    if (c == '+')
421
0
      c = 62;
422
0
    else if (c == '/')
423
0
      c = 63;
424
0
    else if ('A' <= c && c <= 'Z')
425
0
      c -= 'A';
426
0
    else if ('a' <= c && c <= 'z')
427
0
      c -= 'a' - 26;
428
0
    else if ('0' <= c && c <= '9')
429
0
      c -= '0' - 52;
430
0
    else
431
0
      continue; /* garbage */
432
0
    switch (pos++) {
433
0
    case 0:
434
0
      acc = (c << 2);
435
0
      break;
436
0
    case 1:
437
0
      strbuf_addch(out, (acc | (c >> 4)));
438
0
      acc = (c & 15) << 4;
439
0
      break;
440
0
    case 2:
441
0
      strbuf_addch(out, (acc | (c >> 2)));
442
0
      acc = (c & 3) << 6;
443
0
      break;
444
0
    case 3:
445
0
      strbuf_addch(out, (acc | c));
446
0
      acc = pos = 0;
447
0
      break;
448
0
    }
449
0
  }
450
0
  return out;
451
0
}
452
453
static int convert_to_utf8(struct mailinfo *mi,
454
         struct strbuf *line, const char *charset)
455
0
{
456
0
  char *out;
457
0
  size_t out_len;
458
459
0
  if (!mi->metainfo_charset || !charset || !*charset)
460
0
    return 0;
461
462
0
  if (same_encoding(mi->metainfo_charset, charset))
463
0
    return 0;
464
0
  out = reencode_string_len(line->buf, line->len,
465
0
          mi->metainfo_charset, charset, &out_len);
466
0
  if (!out) {
467
0
    mi->input_error = -1;
468
0
    return error("cannot convert from %s to %s",
469
0
           charset, mi->metainfo_charset);
470
0
  }
471
0
  strbuf_attach(line, out, out_len, out_len);
472
0
  return 0;
473
0
}
474
475
static void decode_header(struct mailinfo *mi, struct strbuf *it)
476
0
{
477
0
  char *in, *ep, *cp;
478
0
  struct strbuf outbuf = STRBUF_INIT, *dec;
479
0
  struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
480
0
  int found_error = 1; /* pessimism */
481
482
0
  in = it->buf;
483
0
  while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
484
0
    int encoding;
485
0
    strbuf_reset(&charset_q);
486
0
    strbuf_reset(&piecebuf);
487
488
0
    if (in != ep) {
489
      /*
490
       * We are about to process an encoded-word
491
       * that begins at ep, but there is something
492
       * before the encoded word.
493
       */
494
0
      char *scan;
495
0
      for (scan = in; scan < ep; scan++)
496
0
        if (!isspace(*scan))
497
0
          break;
498
499
0
      if (scan != ep || in == it->buf) {
500
        /*
501
         * We should not lose that "something",
502
         * unless we have just processed an
503
         * encoded-word, and there is only LWS
504
         * before the one we are about to process.
505
         */
506
0
        strbuf_add(&outbuf, in, ep - in);
507
0
      }
508
0
    }
509
    /* E.g.
510
     * ep : "=?iso-2022-jp?B?GyR...?= foo"
511
     * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
512
     */
513
0
    ep += 2;
514
515
0
    if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
516
0
      goto release_return;
517
518
0
    if (cp + 3 - it->buf > it->len)
519
0
      goto release_return;
520
0
    strbuf_add(&charset_q, ep, cp - ep);
521
522
0
    encoding = cp[1];
523
0
    if (!encoding || cp[2] != '?')
524
0
      goto release_return;
525
0
    ep = strstr(cp + 3, "?=");
526
0
    if (!ep)
527
0
      goto release_return;
528
0
    strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
529
0
    switch (tolower(encoding)) {
530
0
    default:
531
0
      goto release_return;
532
0
    case 'b':
533
0
      dec = decode_b_segment(&piecebuf);
534
0
      break;
535
0
    case 'q':
536
0
      dec = decode_q_segment(&piecebuf, 1);
537
0
      break;
538
0
    }
539
0
    if (convert_to_utf8(mi, dec, charset_q.buf))
540
0
      goto release_return;
541
542
0
    strbuf_addbuf(&outbuf, dec);
543
0
    strbuf_release(dec);
544
0
    free(dec);
545
0
    in = ep + 2;
546
0
  }
547
0
  strbuf_addstr(&outbuf, in);
548
0
  strbuf_reset(it);
549
0
  strbuf_addbuf(it, &outbuf);
550
0
  found_error = 0;
551
0
release_return:
552
0
  strbuf_release(&outbuf);
553
0
  strbuf_release(&charset_q);
554
0
  strbuf_release(&piecebuf);
555
556
0
  if (found_error)
557
0
    mi->input_error = -1;
558
0
}
559
560
/*
561
 * Returns true if "line" contains a header matching "hdr", in which case "val"
562
 * will contain the value of the header with any RFC2047 B and Q encoding
563
 * unwrapped, and optionally normalize the meta information to utf8.
564
 */
565
static int parse_header(const struct strbuf *line,
566
      const char *hdr,
567
      struct mailinfo *mi,
568
      struct strbuf *val)
569
0
{
570
0
  const char *val_str;
571
572
0
  if (!skip_header(line, hdr, &val_str))
573
0
    return 0;
574
0
  strbuf_addstr(val, val_str);
575
0
  decode_header(mi, val);
576
0
  return 1;
577
0
}
578
579
static int check_header(struct mailinfo *mi,
580
      const struct strbuf *line,
581
      struct strbuf *hdr_data[], int overwrite)
582
0
{
583
0
  int i, ret = 0;
584
0
  struct strbuf sb = STRBUF_INIT;
585
586
  /* search for the interesting parts */
587
0
  for (i = 0; i < ARRAY_SIZE(header); i++) {
588
0
    if ((!hdr_data[i] || overwrite) &&
589
0
        parse_header(line, header[i], mi, &sb)) {
590
0
      handle_header(&hdr_data[i], &sb);
591
0
      ret = 1;
592
0
      goto check_header_out;
593
0
    }
594
0
  }
595
596
  /* Content stuff */
597
0
  if (parse_header(line, "Content-Type", mi, &sb)) {
598
0
    handle_content_type(mi, &sb);
599
0
    ret = 1;
600
0
    goto check_header_out;
601
0
  }
602
0
  if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
603
0
    handle_content_transfer_encoding(mi, &sb);
604
0
    ret = 1;
605
0
    goto check_header_out;
606
0
  }
607
0
  if (parse_header(line, "Message-ID", mi, &sb)) {
608
0
    if (mi->add_message_id)
609
0
      mi->message_id = strbuf_detach(&sb, NULL);
610
0
    ret = 1;
611
0
    goto check_header_out;
612
0
  }
613
614
0
check_header_out:
615
0
  strbuf_release(&sb);
616
0
  return ret;
617
0
}
618
619
/*
620
 * Returns 1 if the given line or any line beginning with the given line is an
621
 * in-body header (that is, check_header will succeed when passed
622
 * mi->s_hdr_data).
623
 */
624
static int is_inbody_header(const struct mailinfo *mi,
625
          const struct strbuf *line)
626
0
{
627
0
  int i;
628
0
  const char *val;
629
0
  for (i = 0; i < ARRAY_SIZE(header); i++)
630
0
    if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
631
0
      return 1;
632
0
  return 0;
633
0
}
634
635
static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
636
0
{
637
0
  struct strbuf *ret;
638
639
0
  switch (mi->transfer_encoding) {
640
0
  case TE_QP:
641
0
    ret = decode_q_segment(line, 0);
642
0
    break;
643
0
  case TE_BASE64:
644
0
    ret = decode_b_segment(line);
645
0
    break;
646
0
  case TE_DONTCARE:
647
0
  default:
648
0
    return;
649
0
  }
650
0
  strbuf_reset(line);
651
0
  strbuf_addbuf(line, ret);
652
0
  strbuf_release(ret);
653
0
  free(ret);
654
0
}
655
656
static inline int patchbreak(const struct strbuf *line)
657
0
{
658
0
  size_t i;
659
660
  /* Beginning of a "diff -" header? */
661
0
  if (starts_with(line->buf, "diff -"))
662
0
    return 1;
663
664
  /* CVS "Index: " line? */
665
0
  if (starts_with(line->buf, "Index: "))
666
0
    return 1;
667
668
  /*
669
   * "--- <filename>" starts patches without headers
670
   * "---<sp>*" is a manual separator
671
   */
672
0
  if (line->len < 4)
673
0
    return 0;
674
675
0
  if (starts_with(line->buf, "---")) {
676
    /* space followed by a filename? */
677
0
    if (line->buf[3] == ' ' && !isspace(line->buf[4]))
678
0
      return 1;
679
    /* Just whitespace? */
680
0
    for (i = 3; i < line->len; i++) {
681
0
      unsigned char c = line->buf[i];
682
0
      if (c == '\n')
683
0
        return 1;
684
0
      if (!isspace(c))
685
0
        break;
686
0
    }
687
0
    return 0;
688
0
  }
689
0
  return 0;
690
0
}
691
692
static int is_scissors_line(const char *line)
693
0
{
694
0
  const char *c;
695
0
  int scissors = 0, gap = 0;
696
0
  const char *first_nonblank = NULL, *last_nonblank = NULL;
697
0
  int visible, perforation = 0, in_perforation = 0;
698
699
0
  for (c = line; *c; c++) {
700
0
    if (isspace(*c)) {
701
0
      if (in_perforation) {
702
0
        perforation++;
703
0
        gap++;
704
0
      }
705
0
      continue;
706
0
    }
707
0
    last_nonblank = c;
708
0
    if (!first_nonblank)
709
0
      first_nonblank = c;
710
0
    if (*c == '-') {
711
0
      in_perforation = 1;
712
0
      perforation++;
713
0
      continue;
714
0
    }
715
0
    if (starts_with(c, ">8") || starts_with(c, "8<") ||
716
0
        starts_with(c, ">%") || starts_with(c, "%<")) {
717
0
      in_perforation = 1;
718
0
      perforation += 2;
719
0
      scissors += 2;
720
0
      c++;
721
0
      continue;
722
0
    }
723
0
    in_perforation = 0;
724
0
  }
725
726
  /*
727
   * The mark must be at least 8 bytes long (e.g. "-- >8 --").
728
   * Even though there can be arbitrary cruft on the same line
729
   * (e.g. "cut here"), in order to avoid misidentification, the
730
   * perforation must occupy more than a third of the visible
731
   * width of the line, and dashes and scissors must occupy more
732
   * than half of the perforation.
733
   */
734
735
0
  if (first_nonblank && last_nonblank)
736
0
    visible = last_nonblank - first_nonblank + 1;
737
0
  else
738
0
    visible = 0;
739
0
  return (scissors && 8 <= visible &&
740
0
    visible < perforation * 3 &&
741
0
    gap * 2 < perforation);
742
0
}
743
744
static void flush_inbody_header_accum(struct mailinfo *mi)
745
0
{
746
0
  if (!mi->inbody_header_accum.len)
747
0
    return;
748
0
  if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
749
0
    BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
750
0
  strbuf_reset(&mi->inbody_header_accum);
751
0
}
752
753
static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
754
0
{
755
0
  if (mi->inbody_header_accum.len &&
756
0
      (line->buf[0] == ' ' || line->buf[0] == '\t')) {
757
0
    if (mi->use_scissors && is_scissors_line(line->buf)) {
758
      /*
759
       * This is a scissors line; do not consider this line
760
       * as a header continuation line.
761
       */
762
0
      flush_inbody_header_accum(mi);
763
0
      return 0;
764
0
    }
765
0
    strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
766
0
    strbuf_addbuf(&mi->inbody_header_accum, line);
767
0
    return 1;
768
0
  }
769
770
0
  flush_inbody_header_accum(mi);
771
772
0
  if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
773
0
    return is_format_patch_separator(line->buf + 1, line->len - 1);
774
0
  if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
775
0
    int i;
776
0
    for (i = 0; i < ARRAY_SIZE(header); i++)
777
0
      if (!strcmp("Subject", header[i])) {
778
0
        handle_header(&mi->s_hdr_data[i], line);
779
0
        return 1;
780
0
      }
781
0
    return 0;
782
0
  }
783
0
  if (is_inbody_header(mi, line)) {
784
0
    strbuf_addbuf(&mi->inbody_header_accum, line);
785
0
    return 1;
786
0
  }
787
0
  return 0;
788
0
}
789
790
static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
791
0
{
792
0
  assert(!mi->filter_stage);
793
794
0
  if (mi->header_stage) {
795
0
    if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
796
0
      if (mi->inbody_header_accum.len) {
797
0
        flush_inbody_header_accum(mi);
798
0
        mi->header_stage = 0;
799
0
      }
800
0
      return 0;
801
0
    }
802
0
  }
803
804
0
  if (mi->use_inbody_headers && mi->header_stage) {
805
0
    mi->header_stage = check_inbody_header(mi, line);
806
0
    if (mi->header_stage)
807
0
      return 0;
808
0
  } else
809
    /* Only trim the first (blank) line of the commit message
810
     * when ignoring in-body headers.
811
     */
812
0
    mi->header_stage = 0;
813
814
  /* normalize the log message to UTF-8. */
815
0
  if (convert_to_utf8(mi, line, mi->charset.buf))
816
0
    return 0; /* mi->input_error already set */
817
818
0
  if (mi->use_scissors && is_scissors_line(line->buf)) {
819
0
    int i;
820
821
0
    strbuf_setlen(&mi->log_message, 0);
822
0
    mi->header_stage = 1;
823
824
    /*
825
     * We may have already read "secondary headers"; purge
826
     * them to give ourselves a clean restart.
827
     */
828
0
    for (i = 0; i < ARRAY_SIZE(header); i++) {
829
0
      if (mi->s_hdr_data[i])
830
0
        strbuf_release(mi->s_hdr_data[i]);
831
0
      FREE_AND_NULL(mi->s_hdr_data[i]);
832
0
    }
833
0
    return 0;
834
0
  }
835
836
0
  if (patchbreak(line)) {
837
0
    if (mi->message_id)
838
0
      strbuf_addf(&mi->log_message,
839
0
            "Message-ID: %s\n", mi->message_id);
840
0
    return 1;
841
0
  }
842
843
0
  strbuf_addbuf(&mi->log_message, line);
844
0
  return 0;
845
0
}
846
847
static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
848
0
{
849
0
  fwrite(line->buf, 1, line->len, mi->patchfile);
850
0
  mi->patch_lines++;
851
0
}
852
853
static void handle_filter(struct mailinfo *mi, struct strbuf *line)
854
0
{
855
0
  switch (mi->filter_stage) {
856
0
  case 0:
857
0
    if (!handle_commit_msg(mi, line))
858
0
      break;
859
0
    mi->filter_stage++;
860
    /* fallthrough */
861
0
  case 1:
862
0
    handle_patch(mi, line);
863
0
    break;
864
0
  }
865
0
}
866
867
static int is_rfc2822_header(const struct strbuf *line)
868
0
{
869
  /*
870
   * The section that defines the loosest possible
871
   * field name is "3.6.8 Optional fields".
872
   *
873
   * optional-field = field-name ":" unstructured CRLF
874
   * field-name = 1*ftext
875
   * ftext = %d33-57 / %59-126
876
   */
877
0
  int ch;
878
0
  char *cp = line->buf;
879
880
  /* Count mbox From headers as headers */
881
0
  if (starts_with(cp, "From ") || starts_with(cp, ">From "))
882
0
    return 1;
883
884
0
  while ((ch = *cp++)) {
885
0
    if (ch == ':')
886
0
      return 1;
887
0
    if ((33 <= ch && ch <= 57) ||
888
0
        (59 <= ch && ch <= 126))
889
0
      continue;
890
0
    break;
891
0
  }
892
0
  return 0;
893
0
}
894
895
static int read_one_header_line(struct strbuf *line, FILE *in)
896
0
{
897
0
  struct strbuf continuation = STRBUF_INIT;
898
899
  /* Get the first part of the line. */
900
0
  if (strbuf_getline_lf(line, in))
901
0
    return 0;
902
903
  /*
904
   * Is it an empty line or not a valid rfc2822 header?
905
   * If so, stop here, and return false ("not a header")
906
   */
907
0
  strbuf_rtrim(line);
908
0
  if (!line->len || !is_rfc2822_header(line)) {
909
    /* Re-add the newline */
910
0
    strbuf_addch(line, '\n');
911
0
    return 0;
912
0
  }
913
914
  /*
915
   * Now we need to eat all the continuation lines..
916
   * Yuck, 2822 header "folding"
917
   */
918
0
  for (;;) {
919
0
    int peek;
920
921
0
    peek = fgetc(in);
922
0
    if (peek == EOF)
923
0
      break;
924
0
    ungetc(peek, in);
925
0
    if (peek != ' ' && peek != '\t')
926
0
      break;
927
0
    if (strbuf_getline_lf(&continuation, in))
928
0
      break;
929
0
    continuation.buf[0] = ' ';
930
0
    strbuf_rtrim(&continuation);
931
0
    strbuf_addbuf(line, &continuation);
932
0
  }
933
0
  strbuf_release(&continuation);
934
935
0
  return 1;
936
0
}
937
938
static int find_boundary(struct mailinfo *mi, struct strbuf *line)
939
0
{
940
0
  while (!strbuf_getline_lf(line, mi->input)) {
941
0
    if (*(mi->content_top) && is_multipart_boundary(mi, line))
942
0
      return 1;
943
0
  }
944
0
  return 0;
945
0
}
946
947
static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
948
0
{
949
0
  struct strbuf newline = STRBUF_INIT;
950
951
0
  strbuf_addch(&newline, '\n');
952
0
again:
953
0
  if (line->len >= (*(mi->content_top))->len + 2 &&
954
0
      !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
955
    /* we hit an end boundary */
956
    /* pop the current boundary off the stack */
957
0
    strbuf_release(*(mi->content_top));
958
0
    FREE_AND_NULL(*(mi->content_top));
959
960
    /* technically won't happen as is_multipart_boundary()
961
       will fail first.  But just in case..
962
     */
963
0
    if (--mi->content_top < mi->content) {
964
0
      error("Detected mismatched boundaries, can't recover");
965
0
      mi->input_error = -1;
966
0
      mi->content_top = mi->content;
967
0
      strbuf_release(&newline);
968
0
      return 0;
969
0
    }
970
0
    handle_filter(mi, &newline);
971
0
    strbuf_release(&newline);
972
0
    if (mi->input_error)
973
0
      return 0;
974
975
    /* skip to the next boundary */
976
0
    if (!find_boundary(mi, line))
977
0
      return 0;
978
0
    goto again;
979
0
  }
980
981
  /* set some defaults */
982
0
  mi->transfer_encoding = TE_DONTCARE;
983
0
  strbuf_reset(&mi->charset);
984
985
  /* slurp in this section's info */
986
0
  while (read_one_header_line(line, mi->input))
987
0
    check_header(mi, line, mi->p_hdr_data, 0);
988
989
0
  strbuf_release(&newline);
990
  /* replenish line */
991
0
  if (strbuf_getline_lf(line, mi->input))
992
0
    return 0;
993
0
  strbuf_addch(line, '\n');
994
0
  return 1;
995
0
}
996
997
static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
998
         struct strbuf *prev)
999
0
{
1000
0
  size_t len = line->len;
1001
0
  const char *rest;
1002
1003
0
  if (!mi->format_flowed) {
1004
0
    if (len >= 2 &&
1005
0
        line->buf[len - 2] == '\r' &&
1006
0
        line->buf[len - 1] == '\n') {
1007
0
      mi->have_quoted_cr = 1;
1008
0
      if (mi->quoted_cr == quoted_cr_strip) {
1009
0
        strbuf_setlen(line, len - 2);
1010
0
        strbuf_addch(line, '\n');
1011
0
        len--;
1012
0
      }
1013
0
    }
1014
0
    handle_filter(mi, line);
1015
0
    return;
1016
0
  }
1017
1018
0
  if (line->buf[len - 1] == '\n') {
1019
0
    len--;
1020
0
    if (len && line->buf[len - 1] == '\r')
1021
0
      len--;
1022
0
  }
1023
1024
  /* Keep signature separator as-is. */
1025
0
  if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1026
0
    if (prev->len) {
1027
0
      handle_filter(mi, prev);
1028
0
      strbuf_reset(prev);
1029
0
    }
1030
0
    handle_filter(mi, line);
1031
0
    return;
1032
0
  }
1033
1034
  /* Unstuff space-stuffed line. */
1035
0
  if (len && line->buf[0] == ' ') {
1036
0
    strbuf_remove(line, 0, 1);
1037
0
    len--;
1038
0
  }
1039
1040
  /* Save flowed line for later, but without the soft line break. */
1041
0
  if (len && line->buf[len - 1] == ' ') {
1042
0
    strbuf_add(prev, line->buf, len - !!mi->delsp);
1043
0
    return;
1044
0
  }
1045
1046
  /* Prepend any previous partial lines */
1047
0
  strbuf_insert(line, 0, prev->buf, prev->len);
1048
0
  strbuf_reset(prev);
1049
1050
0
  handle_filter(mi, line);
1051
0
}
1052
1053
static void summarize_quoted_cr(struct mailinfo *mi)
1054
0
{
1055
0
  if (mi->have_quoted_cr &&
1056
0
      mi->quoted_cr == quoted_cr_warn)
1057
0
    warning(_("quoted CRLF detected"));
1058
0
}
1059
1060
static void handle_body(struct mailinfo *mi, struct strbuf *line)
1061
0
{
1062
0
  struct strbuf prev = STRBUF_INIT;
1063
1064
  /* Skip up to the first boundary */
1065
0
  if (*(mi->content_top)) {
1066
0
    if (!find_boundary(mi, line))
1067
0
      goto handle_body_out;
1068
0
  }
1069
1070
0
  do {
1071
    /* process any boundary lines */
1072
0
    if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1073
      /* flush any leftover */
1074
0
      if (prev.len) {
1075
0
        handle_filter(mi, &prev);
1076
0
        strbuf_reset(&prev);
1077
0
      }
1078
0
      summarize_quoted_cr(mi);
1079
0
      mi->have_quoted_cr = 0;
1080
0
      if (!handle_boundary(mi, line))
1081
0
        goto handle_body_out;
1082
0
    }
1083
1084
    /* Unwrap transfer encoding */
1085
0
    decode_transfer_encoding(mi, line);
1086
1087
0
    switch (mi->transfer_encoding) {
1088
0
    case TE_BASE64:
1089
0
    case TE_QP:
1090
0
    {
1091
0
      struct strbuf **lines, **it, *sb;
1092
1093
      /* Prepend any previous partial lines */
1094
0
      strbuf_insert(line, 0, prev.buf, prev.len);
1095
0
      strbuf_reset(&prev);
1096
1097
      /*
1098
       * This is a decoded line that may contain
1099
       * multiple new lines.  Pass only one chunk
1100
       * at a time to handle_filter()
1101
       */
1102
0
      lines = strbuf_split(line, '\n');
1103
0
      for (it = lines; (sb = *it); it++) {
1104
0
        if (!*(it + 1)) /* The last line */
1105
0
          if (sb->buf[sb->len - 1] != '\n') {
1106
            /* Partial line, save it for later. */
1107
0
            strbuf_addbuf(&prev, sb);
1108
0
            break;
1109
0
          }
1110
0
        handle_filter_flowed(mi, sb, &prev);
1111
0
      }
1112
      /*
1113
       * The partial chunk is saved in "prev" and will be
1114
       * appended by the next iteration of read_line_with_nul().
1115
       */
1116
0
      strbuf_list_free(lines);
1117
0
      break;
1118
0
    }
1119
0
    default:
1120
0
      handle_filter_flowed(mi, line, &prev);
1121
0
    }
1122
1123
0
    if (mi->input_error)
1124
0
      break;
1125
0
  } while (!strbuf_getwholeline(line, mi->input, '\n'));
1126
1127
0
  if (prev.len)
1128
0
    handle_filter(mi, &prev);
1129
0
  summarize_quoted_cr(mi);
1130
1131
0
  flush_inbody_header_accum(mi);
1132
1133
0
handle_body_out:
1134
0
  strbuf_release(&prev);
1135
0
}
1136
1137
static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1138
0
{
1139
0
  const char *sp = data->buf;
1140
0
  while (1) {
1141
0
    char *ep = strchr(sp, '\n');
1142
0
    int len;
1143
0
    if (!ep)
1144
0
      len = strlen(sp);
1145
0
    else
1146
0
      len = ep - sp;
1147
0
    fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1148
0
    if (!ep)
1149
0
      break;
1150
0
    sp = ep + 1;
1151
0
  }
1152
0
}
1153
1154
static void handle_info(struct mailinfo *mi)
1155
0
{
1156
0
  struct strbuf *hdr;
1157
0
  int i;
1158
1159
0
  for (i = 0; i < ARRAY_SIZE(header); i++) {
1160
    /* only print inbody headers if we output a patch file */
1161
0
    if (mi->patch_lines && mi->s_hdr_data[i])
1162
0
      hdr = mi->s_hdr_data[i];
1163
0
    else if (mi->p_hdr_data[i])
1164
0
      hdr = mi->p_hdr_data[i];
1165
0
    else
1166
0
      continue;
1167
1168
0
    if (memchr(hdr->buf, '\0', hdr->len)) {
1169
0
      error("a NUL byte in '%s' is not allowed.", header[i]);
1170
0
      mi->input_error = -1;
1171
0
    }
1172
1173
0
    if (!strcmp(header[i], "Subject")) {
1174
0
      if (!mi->keep_subject) {
1175
0
        cleanup_subject(mi, hdr);
1176
0
        cleanup_space(hdr);
1177
0
      }
1178
0
      output_header_lines(mi->output, "Subject", hdr);
1179
0
    } else if (!strcmp(header[i], "From")) {
1180
0
      cleanup_space(hdr);
1181
0
      handle_from(mi, hdr);
1182
0
      fprintf(mi->output, "Author: %s\n", mi->name.buf);
1183
0
      fprintf(mi->output, "Email: %s\n", mi->email.buf);
1184
0
    } else {
1185
0
      cleanup_space(hdr);
1186
0
      fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1187
0
    }
1188
0
  }
1189
0
  fprintf(mi->output, "\n");
1190
0
}
1191
1192
int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1193
0
{
1194
0
  FILE *cmitmsg;
1195
0
  int peek;
1196
0
  struct strbuf line = STRBUF_INIT;
1197
1198
0
  cmitmsg = fopen(msg, "w");
1199
0
  if (!cmitmsg) {
1200
0
    perror(msg);
1201
0
    return -1;
1202
0
  }
1203
0
  mi->patchfile = fopen(patch, "w");
1204
0
  if (!mi->patchfile) {
1205
0
    perror(patch);
1206
0
    fclose(cmitmsg);
1207
0
    return -1;
1208
0
  }
1209
1210
0
  mi->p_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->p_hdr_data)));
1211
0
  mi->s_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->s_hdr_data)));
1212
1213
0
  do {
1214
0
    peek = fgetc(mi->input);
1215
0
    if (peek == EOF) {
1216
0
      fclose(cmitmsg);
1217
0
      return error("empty patch: '%s'", patch);
1218
0
    }
1219
0
  } while (isspace(peek));
1220
0
  ungetc(peek, mi->input);
1221
1222
  /* process the email header */
1223
0
  while (read_one_header_line(&line, mi->input))
1224
0
    check_header(mi, &line, mi->p_hdr_data, 1);
1225
1226
0
  handle_body(mi, &line);
1227
0
  fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1228
0
  fclose(cmitmsg);
1229
0
  fclose(mi->patchfile);
1230
1231
0
  handle_info(mi);
1232
0
  strbuf_release(&line);
1233
0
  return mi->input_error;
1234
0
}
1235
1236
int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
1237
0
{
1238
0
  if (!strcmp(actionstr, "nowarn"))
1239
0
    *action = quoted_cr_nowarn;
1240
0
  else if (!strcmp(actionstr, "warn"))
1241
0
    *action = quoted_cr_warn;
1242
0
  else if (!strcmp(actionstr, "strip"))
1243
0
    *action = quoted_cr_strip;
1244
0
  else
1245
0
    return -1;
1246
0
  return 0;
1247
0
}
1248
1249
static int git_mailinfo_config(const char *var, const char *value,
1250
             const struct config_context *ctx, void *mi_)
1251
0
{
1252
0
  struct mailinfo *mi = mi_;
1253
1254
0
  if (!starts_with(var, "mailinfo."))
1255
0
    return git_default_config(var, value, ctx, NULL);
1256
0
  if (!strcmp(var, "mailinfo.scissors")) {
1257
0
    mi->use_scissors = git_config_bool(var, value);
1258
0
    return 0;
1259
0
  }
1260
0
  if (!strcmp(var, "mailinfo.quotedcr")) {
1261
0
    if (!value)
1262
0
      return config_error_nonbool(var);
1263
0
    if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
1264
0
      return error(_("bad action '%s' for '%s'"), value, var);
1265
0
    return 0;
1266
0
  }
1267
  /* perhaps others here */
1268
0
  return 0;
1269
0
}
1270
1271
void setup_mailinfo(struct mailinfo *mi)
1272
0
{
1273
0
  memset(mi, 0, sizeof(*mi));
1274
0
  strbuf_init(&mi->name, 0);
1275
0
  strbuf_init(&mi->email, 0);
1276
0
  strbuf_init(&mi->charset, 0);
1277
0
  strbuf_init(&mi->log_message, 0);
1278
0
  strbuf_init(&mi->inbody_header_accum, 0);
1279
0
  mi->quoted_cr = quoted_cr_warn;
1280
0
  mi->header_stage = 1;
1281
0
  mi->use_inbody_headers = 1;
1282
0
  mi->content_top = mi->content;
1283
0
  git_config(git_mailinfo_config, mi);
1284
0
}
1285
1286
void clear_mailinfo(struct mailinfo *mi)
1287
0
{
1288
0
  strbuf_release(&mi->name);
1289
0
  strbuf_release(&mi->email);
1290
0
  strbuf_release(&mi->charset);
1291
0
  strbuf_release(&mi->inbody_header_accum);
1292
0
  free(mi->message_id);
1293
1294
0
  for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1295
0
    if (!mi->p_hdr_data[i])
1296
0
      continue;
1297
0
    strbuf_release(mi->p_hdr_data[i]);
1298
0
    free(mi->p_hdr_data[i]);
1299
0
  }
1300
0
  free(mi->p_hdr_data);
1301
1302
0
  for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1303
0
    if (!mi->s_hdr_data[i])
1304
0
      continue;
1305
0
    strbuf_release(mi->s_hdr_data[i]);
1306
0
    free(mi->s_hdr_data[i]);
1307
0
  }
1308
0
  free(mi->s_hdr_data);
1309
1310
0
  while (mi->content < mi->content_top) {
1311
0
    free(*(mi->content_top));
1312
0
    mi->content_top--;
1313
0
  }
1314
1315
0
  strbuf_release(&mi->log_message);
1316
0
}