Coverage Report

Created: 2024-09-08 06:23

/src/git/ws.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Whitespace rules
3
 *
4
 * Copyright (c) 2007 Junio C Hamano
5
 */
6
#include "git-compat-util.h"
7
#include "attr.h"
8
#include "strbuf.h"
9
#include "ws.h"
10
11
unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
12
13
static struct whitespace_rule {
14
  const char *rule_name;
15
  unsigned rule_bits;
16
  unsigned loosens_error:1,
17
    exclude_default:1;
18
} whitespace_rule_names[] = {
19
  { "trailing-space", WS_TRAILING_SPACE, 0 },
20
  { "space-before-tab", WS_SPACE_BEFORE_TAB, 0 },
21
  { "indent-with-non-tab", WS_INDENT_WITH_NON_TAB, 0 },
22
  { "cr-at-eol", WS_CR_AT_EOL, 1 },
23
  { "blank-at-eol", WS_BLANK_AT_EOL, 0 },
24
  { "blank-at-eof", WS_BLANK_AT_EOF, 0 },
25
  { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
26
};
27
28
unsigned parse_whitespace_rule(const char *string)
29
0
{
30
0
  unsigned rule = WS_DEFAULT_RULE;
31
32
0
  while (string) {
33
0
    int i;
34
0
    size_t len;
35
0
    const char *ep;
36
0
    const char *arg;
37
0
    int negated = 0;
38
39
0
    string = string + strspn(string, ", \t\n\r");
40
0
    ep = strchrnul(string, ',');
41
0
    len = ep - string;
42
43
0
    if (*string == '-') {
44
0
      negated = 1;
45
0
      string++;
46
0
      len--;
47
0
    }
48
0
    if (!len)
49
0
      break;
50
0
    for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) {
51
0
      if (strncmp(whitespace_rule_names[i].rule_name,
52
0
            string, len))
53
0
        continue;
54
0
      if (negated)
55
0
        rule &= ~whitespace_rule_names[i].rule_bits;
56
0
      else
57
0
        rule |= whitespace_rule_names[i].rule_bits;
58
0
      break;
59
0
    }
60
0
    if (skip_prefix(string, "tabwidth=", &arg)) {
61
0
      unsigned tabwidth = atoi(arg);
62
0
      if (0 < tabwidth && tabwidth < 0100) {
63
0
        rule &= ~WS_TAB_WIDTH_MASK;
64
0
        rule |= tabwidth;
65
0
      }
66
0
      else
67
0
        warning("tabwidth %.*s out of range",
68
0
          (int)(ep - arg), arg);
69
0
    }
70
0
    string = ep;
71
0
  }
72
73
0
  if (rule & WS_TAB_IN_INDENT && rule & WS_INDENT_WITH_NON_TAB)
74
0
    die("cannot enforce both tab-in-indent and indent-with-non-tab");
75
0
  return rule;
76
0
}
77
78
unsigned whitespace_rule(struct index_state *istate, const char *pathname)
79
0
{
80
0
  static struct attr_check *attr_whitespace_rule;
81
0
  const char *value;
82
83
0
  if (!attr_whitespace_rule)
84
0
    attr_whitespace_rule = attr_check_initl("whitespace", NULL);
85
86
0
  git_check_attr(istate, pathname, attr_whitespace_rule);
87
0
  value = attr_whitespace_rule->items[0].value;
88
0
  if (ATTR_TRUE(value)) {
89
    /* true (whitespace) */
90
0
    unsigned all_rule = ws_tab_width(whitespace_rule_cfg);
91
0
    int i;
92
0
    for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++)
93
0
      if (!whitespace_rule_names[i].loosens_error &&
94
0
          !whitespace_rule_names[i].exclude_default)
95
0
        all_rule |= whitespace_rule_names[i].rule_bits;
96
0
    return all_rule;
97
0
  } else if (ATTR_FALSE(value)) {
98
    /* false (-whitespace) */
99
0
    return ws_tab_width(whitespace_rule_cfg);
100
0
  } else if (ATTR_UNSET(value)) {
101
    /* reset to default (!whitespace) */
102
0
    return whitespace_rule_cfg;
103
0
  } else {
104
    /* string */
105
0
    return parse_whitespace_rule(value);
106
0
  }
107
0
}
108
109
/* The returned string should be freed by the caller. */
110
char *whitespace_error_string(unsigned ws)
111
0
{
112
0
  struct strbuf err = STRBUF_INIT;
113
0
  if ((ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE)
114
0
    strbuf_addstr(&err, "trailing whitespace");
115
0
  else {
116
0
    if (ws & WS_BLANK_AT_EOL)
117
0
      strbuf_addstr(&err, "trailing whitespace");
118
0
    if (ws & WS_BLANK_AT_EOF) {
119
0
      if (err.len)
120
0
        strbuf_addstr(&err, ", ");
121
0
      strbuf_addstr(&err, "new blank line at EOF");
122
0
    }
123
0
  }
124
0
  if (ws & WS_SPACE_BEFORE_TAB) {
125
0
    if (err.len)
126
0
      strbuf_addstr(&err, ", ");
127
0
    strbuf_addstr(&err, "space before tab in indent");
128
0
  }
129
0
  if (ws & WS_INDENT_WITH_NON_TAB) {
130
0
    if (err.len)
131
0
      strbuf_addstr(&err, ", ");
132
0
    strbuf_addstr(&err, "indent with spaces");
133
0
  }
134
0
  if (ws & WS_TAB_IN_INDENT) {
135
0
    if (err.len)
136
0
      strbuf_addstr(&err, ", ");
137
0
    strbuf_addstr(&err, "tab in indent");
138
0
  }
139
0
  return strbuf_detach(&err, NULL);
140
0
}
141
142
/* If stream is non-NULL, emits the line after checking. */
143
static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
144
        FILE *stream, const char *set,
145
        const char *reset, const char *ws)
146
0
{
147
0
  unsigned result = 0;
148
0
  int written = 0;
149
0
  int trailing_whitespace = -1;
150
0
  int trailing_newline = 0;
151
0
  int trailing_carriage_return = 0;
152
0
  int i;
153
154
  /* Logic is simpler if we temporarily ignore the trailing newline. */
155
0
  if (len > 0 && line[len - 1] == '\n') {
156
0
    trailing_newline = 1;
157
0
    len--;
158
0
  }
159
0
  if ((ws_rule & WS_CR_AT_EOL) &&
160
0
      len > 0 && line[len - 1] == '\r') {
161
0
    trailing_carriage_return = 1;
162
0
    len--;
163
0
  }
164
165
  /* Check for trailing whitespace. */
166
0
  if (ws_rule & WS_BLANK_AT_EOL) {
167
0
    for (i = len - 1; i >= 0; i--) {
168
0
      if (isspace(line[i])) {
169
0
        trailing_whitespace = i;
170
0
        result |= WS_BLANK_AT_EOL;
171
0
      }
172
0
      else
173
0
        break;
174
0
    }
175
0
  }
176
177
0
  if (trailing_whitespace == -1)
178
0
    trailing_whitespace = len;
179
180
  /* Check indentation */
181
0
  for (i = 0; i < trailing_whitespace; i++) {
182
0
    if (line[i] == ' ')
183
0
      continue;
184
0
    if (line[i] != '\t')
185
0
      break;
186
0
    if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) {
187
0
      result |= WS_SPACE_BEFORE_TAB;
188
0
      if (stream) {
189
0
        fputs(ws, stream);
190
0
        fwrite(line + written, i - written, 1, stream);
191
0
        fputs(reset, stream);
192
0
        fwrite(line + i, 1, 1, stream);
193
0
      }
194
0
    } else if (ws_rule & WS_TAB_IN_INDENT) {
195
0
      result |= WS_TAB_IN_INDENT;
196
0
      if (stream) {
197
0
        fwrite(line + written, i - written, 1, stream);
198
0
        fputs(ws, stream);
199
0
        fwrite(line + i, 1, 1, stream);
200
0
        fputs(reset, stream);
201
0
      }
202
0
    } else if (stream) {
203
0
      fwrite(line + written, i - written + 1, 1, stream);
204
0
    }
205
0
    written = i + 1;
206
0
  }
207
208
  /* Check for indent using non-tab. */
209
0
  if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= ws_tab_width(ws_rule)) {
210
0
    result |= WS_INDENT_WITH_NON_TAB;
211
0
    if (stream) {
212
0
      fputs(ws, stream);
213
0
      fwrite(line + written, i - written, 1, stream);
214
0
      fputs(reset, stream);
215
0
    }
216
0
    written = i;
217
0
  }
218
219
0
  if (stream) {
220
    /*
221
     * Now the rest of the line starts at "written".
222
     * The non-highlighted part ends at "trailing_whitespace".
223
     */
224
225
    /* Emit non-highlighted (middle) segment. */
226
0
    if (trailing_whitespace - written > 0) {
227
0
      fputs(set, stream);
228
0
      fwrite(line + written,
229
0
          trailing_whitespace - written, 1, stream);
230
0
      fputs(reset, stream);
231
0
    }
232
233
    /* Highlight errors in trailing whitespace. */
234
0
    if (trailing_whitespace != len) {
235
0
      fputs(ws, stream);
236
0
      fwrite(line + trailing_whitespace,
237
0
          len - trailing_whitespace, 1, stream);
238
0
      fputs(reset, stream);
239
0
    }
240
0
    if (trailing_carriage_return)
241
0
      fputc('\r', stream);
242
0
    if (trailing_newline)
243
0
      fputc('\n', stream);
244
0
  }
245
0
  return result;
246
0
}
247
248
void ws_check_emit(const char *line, int len, unsigned ws_rule,
249
       FILE *stream, const char *set,
250
       const char *reset, const char *ws)
251
0
{
252
0
  (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws);
253
0
}
254
255
unsigned ws_check(const char *line, int len, unsigned ws_rule)
256
0
{
257
0
  return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL);
258
0
}
259
260
int ws_blank_line(const char *line, int len)
261
0
{
262
  /*
263
   * We _might_ want to treat CR differently from other
264
   * whitespace characters when ws_rule has WS_CR_AT_EOL, but
265
   * for now we just use this stupid definition.
266
   */
267
0
  while (len-- > 0) {
268
0
    if (!isspace(*line))
269
0
      return 0;
270
0
    line++;
271
0
  }
272
0
  return 1;
273
0
}
274
275
/* Copy the line onto the end of the strbuf while fixing whitespaces */
276
void ws_fix_copy(struct strbuf *dst, const char *src, int len, unsigned ws_rule, int *error_count)
277
0
{
278
  /*
279
   * len is number of bytes to be copied from src, starting
280
   * at src.  Typically src[len-1] is '\n', unless this is
281
   * the incomplete last line.
282
   */
283
0
  int i;
284
0
  int add_nl_to_tail = 0;
285
0
  int add_cr_to_tail = 0;
286
0
  int fixed = 0;
287
0
  int last_tab_in_indent = -1;
288
0
  int last_space_in_indent = -1;
289
0
  int need_fix_leading_space = 0;
290
291
  /*
292
   * Strip trailing whitespace
293
   */
294
0
  if (ws_rule & WS_BLANK_AT_EOL) {
295
0
    if (0 < len && src[len - 1] == '\n') {
296
0
      add_nl_to_tail = 1;
297
0
      len--;
298
0
      if (0 < len && src[len - 1] == '\r') {
299
0
        add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);
300
0
        len--;
301
0
      }
302
0
    }
303
0
    if (0 < len && isspace(src[len - 1])) {
304
0
      while (0 < len && isspace(src[len-1]))
305
0
        len--;
306
0
      fixed = 1;
307
0
    }
308
0
  }
309
310
  /*
311
   * Check leading whitespaces (indent)
312
   */
313
0
  for (i = 0; i < len; i++) {
314
0
    char ch = src[i];
315
0
    if (ch == '\t') {
316
0
      last_tab_in_indent = i;
317
0
      if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
318
0
          0 <= last_space_in_indent)
319
0
          need_fix_leading_space = 1;
320
0
    } else if (ch == ' ') {
321
0
      last_space_in_indent = i;
322
0
      if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
323
0
          ws_tab_width(ws_rule) <= i - last_tab_in_indent)
324
0
        need_fix_leading_space = 1;
325
0
    } else
326
0
      break;
327
0
  }
328
329
0
  if (need_fix_leading_space) {
330
    /* Process indent ourselves */
331
0
    int consecutive_spaces = 0;
332
0
    int last = last_tab_in_indent + 1;
333
334
0
    if (ws_rule & WS_INDENT_WITH_NON_TAB) {
335
      /* have "last" point at one past the indent */
336
0
      if (last_tab_in_indent < last_space_in_indent)
337
0
        last = last_space_in_indent + 1;
338
0
      else
339
0
        last = last_tab_in_indent + 1;
340
0
    }
341
342
    /*
343
     * between src[0..last-1], strip the funny spaces,
344
     * updating them to tab as needed.
345
     */
346
0
    for (i = 0; i < last; i++) {
347
0
      char ch = src[i];
348
0
      if (ch != ' ') {
349
0
        consecutive_spaces = 0;
350
0
        strbuf_addch(dst, ch);
351
0
      } else {
352
0
        consecutive_spaces++;
353
0
        if (consecutive_spaces == ws_tab_width(ws_rule)) {
354
0
          strbuf_addch(dst, '\t');
355
0
          consecutive_spaces = 0;
356
0
        }
357
0
      }
358
0
    }
359
0
    while (0 < consecutive_spaces--)
360
0
      strbuf_addch(dst, ' ');
361
0
    len -= last;
362
0
    src += last;
363
0
    fixed = 1;
364
0
  } else if ((ws_rule & WS_TAB_IN_INDENT) && last_tab_in_indent >= 0) {
365
    /* Expand tabs into spaces */
366
0
    int start = dst->len;
367
0
    int last = last_tab_in_indent + 1;
368
0
    for (i = 0; i < last; i++) {
369
0
      if (src[i] == '\t')
370
0
        do {
371
0
          strbuf_addch(dst, ' ');
372
0
        } while ((dst->len - start) % ws_tab_width(ws_rule));
373
0
      else
374
0
        strbuf_addch(dst, src[i]);
375
0
    }
376
0
    len -= last;
377
0
    src += last;
378
0
    fixed = 1;
379
0
  }
380
381
0
  strbuf_add(dst, src, len);
382
0
  if (add_cr_to_tail)
383
0
    strbuf_addch(dst, '\r');
384
0
  if (add_nl_to_tail)
385
0
    strbuf_addch(dst, '\n');
386
0
  if (fixed && error_count)
387
0
    (*error_count)++;
388
0
}