Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Whitespace rules  | 
3  |  |  *  | 
4  |  |  * Copyright (c) 2007 Junio C Hamano  | 
5  |  |  */  | 
6  |  | #include "git-compat-util.h"  | 
7  |  | #include "attr.h"  | 
8  |  | #include "strbuf.h"  | 
9  |  | #include "ws.h"  | 
10  |  |  | 
11  |  | unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;  | 
12  |  |  | 
13  |  | static struct whitespace_rule { | 
14  |  |   const char *rule_name;  | 
15  |  |   unsigned rule_bits;  | 
16  |  |   unsigned loosens_error:1,  | 
17  |  |     exclude_default:1;  | 
18  |  | } whitespace_rule_names[] = { | 
19  |  |   { "trailing-space", WS_TRAILING_SPACE, 0 }, | 
20  |  |   { "space-before-tab", WS_SPACE_BEFORE_TAB, 0 }, | 
21  |  |   { "indent-with-non-tab", WS_INDENT_WITH_NON_TAB, 0 }, | 
22  |  |   { "cr-at-eol", WS_CR_AT_EOL, 1 }, | 
23  |  |   { "blank-at-eol", WS_BLANK_AT_EOL, 0 }, | 
24  |  |   { "blank-at-eof", WS_BLANK_AT_EOF, 0 }, | 
25  |  |   { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 }, | 
26  |  | };  | 
27  |  |  | 
28  |  | unsigned parse_whitespace_rule(const char *string)  | 
29  | 0  | { | 
30  | 0  |   unsigned rule = WS_DEFAULT_RULE;  | 
31  |  | 
  | 
32  | 0  |   while (string) { | 
33  | 0  |     int i;  | 
34  | 0  |     size_t len;  | 
35  | 0  |     const char *ep;  | 
36  | 0  |     const char *arg;  | 
37  | 0  |     int negated = 0;  | 
38  |  | 
  | 
39  | 0  |     string = string + strspn(string, ", \t\n\r");  | 
40  | 0  |     ep = strchrnul(string, ',');  | 
41  | 0  |     len = ep - string;  | 
42  |  | 
  | 
43  | 0  |     if (*string == '-') { | 
44  | 0  |       negated = 1;  | 
45  | 0  |       string++;  | 
46  | 0  |       len--;  | 
47  | 0  |     }  | 
48  | 0  |     if (!len)  | 
49  | 0  |       break;  | 
50  | 0  |     for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) { | 
51  | 0  |       if (strncmp(whitespace_rule_names[i].rule_name,  | 
52  | 0  |             string, len))  | 
53  | 0  |         continue;  | 
54  | 0  |       if (negated)  | 
55  | 0  |         rule &= ~whitespace_rule_names[i].rule_bits;  | 
56  | 0  |       else  | 
57  | 0  |         rule |= whitespace_rule_names[i].rule_bits;  | 
58  | 0  |       break;  | 
59  | 0  |     }  | 
60  | 0  |     if (skip_prefix(string, "tabwidth=", &arg)) { | 
61  | 0  |       unsigned tabwidth = atoi(arg);  | 
62  | 0  |       if (0 < tabwidth && tabwidth < 0100) { | 
63  | 0  |         rule &= ~WS_TAB_WIDTH_MASK;  | 
64  | 0  |         rule |= tabwidth;  | 
65  | 0  |       }  | 
66  | 0  |       else  | 
67  | 0  |         warning("tabwidth %.*s out of range", | 
68  | 0  |           (int)(ep - arg), arg);  | 
69  | 0  |     }  | 
70  | 0  |     string = ep;  | 
71  | 0  |   }  | 
72  |  | 
  | 
73  | 0  |   if (rule & WS_TAB_IN_INDENT && rule & WS_INDENT_WITH_NON_TAB)  | 
74  | 0  |     die("cannot enforce both tab-in-indent and indent-with-non-tab"); | 
75  | 0  |   return rule;  | 
76  | 0  | }  | 
77  |  |  | 
78  |  | unsigned whitespace_rule(struct index_state *istate, const char *pathname)  | 
79  | 0  | { | 
80  | 0  |   static struct attr_check *attr_whitespace_rule;  | 
81  | 0  |   const char *value;  | 
82  |  | 
  | 
83  | 0  |   if (!attr_whitespace_rule)  | 
84  | 0  |     attr_whitespace_rule = attr_check_initl("whitespace", NULL); | 
85  |  | 
  | 
86  | 0  |   git_check_attr(istate, pathname, attr_whitespace_rule);  | 
87  | 0  |   value = attr_whitespace_rule->items[0].value;  | 
88  | 0  |   if (ATTR_TRUE(value)) { | 
89  |  |     /* true (whitespace) */  | 
90  | 0  |     unsigned all_rule = ws_tab_width(whitespace_rule_cfg);  | 
91  | 0  |     int i;  | 
92  | 0  |     for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++)  | 
93  | 0  |       if (!whitespace_rule_names[i].loosens_error &&  | 
94  | 0  |           !whitespace_rule_names[i].exclude_default)  | 
95  | 0  |         all_rule |= whitespace_rule_names[i].rule_bits;  | 
96  | 0  |     return all_rule;  | 
97  | 0  |   } else if (ATTR_FALSE(value)) { | 
98  |  |     /* false (-whitespace) */  | 
99  | 0  |     return ws_tab_width(whitespace_rule_cfg);  | 
100  | 0  |   } else if (ATTR_UNSET(value)) { | 
101  |  |     /* reset to default (!whitespace) */  | 
102  | 0  |     return whitespace_rule_cfg;  | 
103  | 0  |   } else { | 
104  |  |     /* string */  | 
105  | 0  |     return parse_whitespace_rule(value);  | 
106  | 0  |   }  | 
107  | 0  | }  | 
108  |  |  | 
109  |  | /* The returned string should be freed by the caller. */  | 
110  |  | char *whitespace_error_string(unsigned ws)  | 
111  | 0  | { | 
112  | 0  |   struct strbuf err = STRBUF_INIT;  | 
113  | 0  |   if ((ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE)  | 
114  | 0  |     strbuf_addstr(&err, "trailing whitespace");  | 
115  | 0  |   else { | 
116  | 0  |     if (ws & WS_BLANK_AT_EOL)  | 
117  | 0  |       strbuf_addstr(&err, "trailing whitespace");  | 
118  | 0  |     if (ws & WS_BLANK_AT_EOF) { | 
119  | 0  |       if (err.len)  | 
120  | 0  |         strbuf_addstr(&err, ", ");  | 
121  | 0  |       strbuf_addstr(&err, "new blank line at EOF");  | 
122  | 0  |     }  | 
123  | 0  |   }  | 
124  | 0  |   if (ws & WS_SPACE_BEFORE_TAB) { | 
125  | 0  |     if (err.len)  | 
126  | 0  |       strbuf_addstr(&err, ", ");  | 
127  | 0  |     strbuf_addstr(&err, "space before tab in indent");  | 
128  | 0  |   }  | 
129  | 0  |   if (ws & WS_INDENT_WITH_NON_TAB) { | 
130  | 0  |     if (err.len)  | 
131  | 0  |       strbuf_addstr(&err, ", ");  | 
132  | 0  |     strbuf_addstr(&err, "indent with spaces");  | 
133  | 0  |   }  | 
134  | 0  |   if (ws & WS_TAB_IN_INDENT) { | 
135  | 0  |     if (err.len)  | 
136  | 0  |       strbuf_addstr(&err, ", ");  | 
137  | 0  |     strbuf_addstr(&err, "tab in indent");  | 
138  | 0  |   }  | 
139  | 0  |   return strbuf_detach(&err, NULL);  | 
140  | 0  | }  | 
141  |  |  | 
142  |  | /* If stream is non-NULL, emits the line after checking. */  | 
143  |  | static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,  | 
144  |  |         FILE *stream, const char *set,  | 
145  |  |         const char *reset, const char *ws)  | 
146  | 0  | { | 
147  | 0  |   unsigned result = 0;  | 
148  | 0  |   int written = 0;  | 
149  | 0  |   int trailing_whitespace = -1;  | 
150  | 0  |   int trailing_newline = 0;  | 
151  | 0  |   int trailing_carriage_return = 0;  | 
152  | 0  |   int i;  | 
153  |  |  | 
154  |  |   /* Logic is simpler if we temporarily ignore the trailing newline. */  | 
155  | 0  |   if (len > 0 && line[len - 1] == '\n') { | 
156  | 0  |     trailing_newline = 1;  | 
157  | 0  |     len--;  | 
158  | 0  |   }  | 
159  | 0  |   if ((ws_rule & WS_CR_AT_EOL) &&  | 
160  | 0  |       len > 0 && line[len - 1] == '\r') { | 
161  | 0  |     trailing_carriage_return = 1;  | 
162  | 0  |     len--;  | 
163  | 0  |   }  | 
164  |  |  | 
165  |  |   /* Check for trailing whitespace. */  | 
166  | 0  |   if (ws_rule & WS_BLANK_AT_EOL) { | 
167  | 0  |     for (i = len - 1; i >= 0; i--) { | 
168  | 0  |       if (isspace(line[i])) { | 
169  | 0  |         trailing_whitespace = i;  | 
170  | 0  |         result |= WS_BLANK_AT_EOL;  | 
171  | 0  |       }  | 
172  | 0  |       else  | 
173  | 0  |         break;  | 
174  | 0  |     }  | 
175  | 0  |   }  | 
176  |  | 
  | 
177  | 0  |   if (trailing_whitespace == -1)  | 
178  | 0  |     trailing_whitespace = len;  | 
179  |  |  | 
180  |  |   /* Check indentation */  | 
181  | 0  |   for (i = 0; i < trailing_whitespace; i++) { | 
182  | 0  |     if (line[i] == ' ')  | 
183  | 0  |       continue;  | 
184  | 0  |     if (line[i] != '\t')  | 
185  | 0  |       break;  | 
186  | 0  |     if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) { | 
187  | 0  |       result |= WS_SPACE_BEFORE_TAB;  | 
188  | 0  |       if (stream) { | 
189  | 0  |         fputs(ws, stream);  | 
190  | 0  |         fwrite(line + written, i - written, 1, stream);  | 
191  | 0  |         fputs(reset, stream);  | 
192  | 0  |         fwrite(line + i, 1, 1, stream);  | 
193  | 0  |       }  | 
194  | 0  |     } else if (ws_rule & WS_TAB_IN_INDENT) { | 
195  | 0  |       result |= WS_TAB_IN_INDENT;  | 
196  | 0  |       if (stream) { | 
197  | 0  |         fwrite(line + written, i - written, 1, stream);  | 
198  | 0  |         fputs(ws, stream);  | 
199  | 0  |         fwrite(line + i, 1, 1, stream);  | 
200  | 0  |         fputs(reset, stream);  | 
201  | 0  |       }  | 
202  | 0  |     } else if (stream) { | 
203  | 0  |       fwrite(line + written, i - written + 1, 1, stream);  | 
204  | 0  |     }  | 
205  | 0  |     written = i + 1;  | 
206  | 0  |   }  | 
207  |  |  | 
208  |  |   /* Check for indent using non-tab. */  | 
209  | 0  |   if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= ws_tab_width(ws_rule)) { | 
210  | 0  |     result |= WS_INDENT_WITH_NON_TAB;  | 
211  | 0  |     if (stream) { | 
212  | 0  |       fputs(ws, stream);  | 
213  | 0  |       fwrite(line + written, i - written, 1, stream);  | 
214  | 0  |       fputs(reset, stream);  | 
215  | 0  |     }  | 
216  | 0  |     written = i;  | 
217  | 0  |   }  | 
218  |  | 
  | 
219  | 0  |   if (stream) { | 
220  |  |     /*  | 
221  |  |      * Now the rest of the line starts at "written".  | 
222  |  |      * The non-highlighted part ends at "trailing_whitespace".  | 
223  |  |      */  | 
224  |  |  | 
225  |  |     /* Emit non-highlighted (middle) segment. */  | 
226  | 0  |     if (trailing_whitespace - written > 0) { | 
227  | 0  |       fputs(set, stream);  | 
228  | 0  |       fwrite(line + written,  | 
229  | 0  |           trailing_whitespace - written, 1, stream);  | 
230  | 0  |       fputs(reset, stream);  | 
231  | 0  |     }  | 
232  |  |  | 
233  |  |     /* Highlight errors in trailing whitespace. */  | 
234  | 0  |     if (trailing_whitespace != len) { | 
235  | 0  |       fputs(ws, stream);  | 
236  | 0  |       fwrite(line + trailing_whitespace,  | 
237  | 0  |           len - trailing_whitespace, 1, stream);  | 
238  | 0  |       fputs(reset, stream);  | 
239  | 0  |     }  | 
240  | 0  |     if (trailing_carriage_return)  | 
241  | 0  |       fputc('\r', stream); | 
242  | 0  |     if (trailing_newline)  | 
243  | 0  |       fputc('\n', stream); | 
244  | 0  |   }  | 
245  | 0  |   return result;  | 
246  | 0  | }  | 
247  |  |  | 
248  |  | void ws_check_emit(const char *line, int len, unsigned ws_rule,  | 
249  |  |        FILE *stream, const char *set,  | 
250  |  |        const char *reset, const char *ws)  | 
251  | 0  | { | 
252  | 0  |   (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws);  | 
253  | 0  | }  | 
254  |  |  | 
255  |  | unsigned ws_check(const char *line, int len, unsigned ws_rule)  | 
256  | 0  | { | 
257  | 0  |   return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL);  | 
258  | 0  | }  | 
259  |  |  | 
260  |  | int ws_blank_line(const char *line, int len)  | 
261  | 0  | { | 
262  |  |   /*  | 
263  |  |    * We _might_ want to treat CR differently from other  | 
264  |  |    * whitespace characters when ws_rule has WS_CR_AT_EOL, but  | 
265  |  |    * for now we just use this stupid definition.  | 
266  |  |    */  | 
267  | 0  |   while (len-- > 0) { | 
268  | 0  |     if (!isspace(*line))  | 
269  | 0  |       return 0;  | 
270  | 0  |     line++;  | 
271  | 0  |   }  | 
272  | 0  |   return 1;  | 
273  | 0  | }  | 
274  |  |  | 
275  |  | /* Copy the line onto the end of the strbuf while fixing whitespaces */  | 
276  |  | void ws_fix_copy(struct strbuf *dst, const char *src, int len, unsigned ws_rule, int *error_count)  | 
277  | 0  | { | 
278  |  |   /*  | 
279  |  |    * len is number of bytes to be copied from src, starting  | 
280  |  |    * at src.  Typically src[len-1] is '\n', unless this is  | 
281  |  |    * the incomplete last line.  | 
282  |  |    */  | 
283  | 0  |   int i;  | 
284  | 0  |   int add_nl_to_tail = 0;  | 
285  | 0  |   int add_cr_to_tail = 0;  | 
286  | 0  |   int fixed = 0;  | 
287  | 0  |   int last_tab_in_indent = -1;  | 
288  | 0  |   int last_space_in_indent = -1;  | 
289  | 0  |   int need_fix_leading_space = 0;  | 
290  |  |  | 
291  |  |   /*  | 
292  |  |    * Strip trailing whitespace  | 
293  |  |    */  | 
294  | 0  |   if (ws_rule & WS_BLANK_AT_EOL) { | 
295  | 0  |     if (0 < len && src[len - 1] == '\n') { | 
296  | 0  |       add_nl_to_tail = 1;  | 
297  | 0  |       len--;  | 
298  | 0  |       if (0 < len && src[len - 1] == '\r') { | 
299  | 0  |         add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);  | 
300  | 0  |         len--;  | 
301  | 0  |       }  | 
302  | 0  |     }  | 
303  | 0  |     if (0 < len && isspace(src[len - 1])) { | 
304  | 0  |       while (0 < len && isspace(src[len-1]))  | 
305  | 0  |         len--;  | 
306  | 0  |       fixed = 1;  | 
307  | 0  |     }  | 
308  | 0  |   }  | 
309  |  |  | 
310  |  |   /*  | 
311  |  |    * Check leading whitespaces (indent)  | 
312  |  |    */  | 
313  | 0  |   for (i = 0; i < len; i++) { | 
314  | 0  |     char ch = src[i];  | 
315  | 0  |     if (ch == '\t') { | 
316  | 0  |       last_tab_in_indent = i;  | 
317  | 0  |       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&  | 
318  | 0  |           0 <= last_space_in_indent)  | 
319  | 0  |           need_fix_leading_space = 1;  | 
320  | 0  |     } else if (ch == ' ') { | 
321  | 0  |       last_space_in_indent = i;  | 
322  | 0  |       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&  | 
323  | 0  |           ws_tab_width(ws_rule) <= i - last_tab_in_indent)  | 
324  | 0  |         need_fix_leading_space = 1;  | 
325  | 0  |     } else  | 
326  | 0  |       break;  | 
327  | 0  |   }  | 
328  |  | 
  | 
329  | 0  |   if (need_fix_leading_space) { | 
330  |  |     /* Process indent ourselves */  | 
331  | 0  |     int consecutive_spaces = 0;  | 
332  | 0  |     int last = last_tab_in_indent + 1;  | 
333  |  | 
  | 
334  | 0  |     if (ws_rule & WS_INDENT_WITH_NON_TAB) { | 
335  |  |       /* have "last" point at one past the indent */  | 
336  | 0  |       if (last_tab_in_indent < last_space_in_indent)  | 
337  | 0  |         last = last_space_in_indent + 1;  | 
338  | 0  |       else  | 
339  | 0  |         last = last_tab_in_indent + 1;  | 
340  | 0  |     }  | 
341  |  |  | 
342  |  |     /*  | 
343  |  |      * between src[0..last-1], strip the funny spaces,  | 
344  |  |      * updating them to tab as needed.  | 
345  |  |      */  | 
346  | 0  |     for (i = 0; i < last; i++) { | 
347  | 0  |       char ch = src[i];  | 
348  | 0  |       if (ch != ' ') { | 
349  | 0  |         consecutive_spaces = 0;  | 
350  | 0  |         strbuf_addch(dst, ch);  | 
351  | 0  |       } else { | 
352  | 0  |         consecutive_spaces++;  | 
353  | 0  |         if (consecutive_spaces == ws_tab_width(ws_rule)) { | 
354  | 0  |           strbuf_addch(dst, '\t');  | 
355  | 0  |           consecutive_spaces = 0;  | 
356  | 0  |         }  | 
357  | 0  |       }  | 
358  | 0  |     }  | 
359  | 0  |     while (0 < consecutive_spaces--)  | 
360  | 0  |       strbuf_addch(dst, ' ');  | 
361  | 0  |     len -= last;  | 
362  | 0  |     src += last;  | 
363  | 0  |     fixed = 1;  | 
364  | 0  |   } else if ((ws_rule & WS_TAB_IN_INDENT) && last_tab_in_indent >= 0) { | 
365  |  |     /* Expand tabs into spaces */  | 
366  | 0  |     int start = dst->len;  | 
367  | 0  |     int last = last_tab_in_indent + 1;  | 
368  | 0  |     for (i = 0; i < last; i++) { | 
369  | 0  |       if (src[i] == '\t')  | 
370  | 0  |         do { | 
371  | 0  |           strbuf_addch(dst, ' ');  | 
372  | 0  |         } while ((dst->len - start) % ws_tab_width(ws_rule));  | 
373  | 0  |       else  | 
374  | 0  |         strbuf_addch(dst, src[i]);  | 
375  | 0  |     }  | 
376  | 0  |     len -= last;  | 
377  | 0  |     src += last;  | 
378  | 0  |     fixed = 1;  | 
379  | 0  |   }  | 
380  |  | 
  | 
381  | 0  |   strbuf_add(dst, src, len);  | 
382  | 0  |   if (add_cr_to_tail)  | 
383  | 0  |     strbuf_addch(dst, '\r');  | 
384  | 0  |   if (add_nl_to_tail)  | 
385  | 0  |     strbuf_addch(dst, '\n');  | 
386  | 0  |   if (fixed && error_count)  | 
387  | 0  |     (*error_count)++;  | 
388  | 0  | }  |