Coverage Report

Created: 2023-12-08 06:59

/src/aspell/modules/filter/markdown.cpp
Line
Count
Source (jump to first uncovered line)
1
#include "settings.h"
2
3
#include "config.hpp"
4
#include "indiv_filter.hpp"
5
#include "iostream.hpp"
6
#include "string_map.hpp"
7
#include "asc_ctype.hpp"
8
9
#include <typeinfo>
10
11
//#define DEBUG_FILTER
12
13
namespace {
14
15
using namespace acommon;
16
17
struct Iterator;
18
19
struct Block {
20
  Block * next;
21
9.85k
  Block() : next() {}
22
  enum KeepOpenState {NEVER, MAYBE, YES};
23
  virtual KeepOpenState proc_line(Iterator &) = 0;
24
  virtual void dump() const = 0;
25
  virtual bool leaf() const = 0;
26
9.85k
  virtual ~Block() {}
27
};
28
29
struct DocRoot : Block {
30
1.98M
  KeepOpenState proc_line(Iterator &) {return YES;}
31
0
  void dump() const {CERR.printf("DocRoot\n");}
32
25.1k
  bool leaf() const {return false;}
33
};
34
35
struct MultilineInlineState;
36
37
class MarkdownFilter : public IndividualFilter {
38
public:
39
172
  MarkdownFilter() : root(), back(&root), prev_blank(true), inline_state() {
40
172
    name_ = "markdown-filter";
41
172
    order_num_ = 0.30; // need to be before SGML filter
42
172
  }
43
  PosibErr<bool> setup(Config *);
44
  void reset();
45
  ~MarkdownFilter();
46
47
  void process(FilterChar * & start, FilterChar * & stop);
48
49
private:
50
0
  void dump() {
51
0
    CERR.printf(">>>blocks\n");
52
0
    for (Block * cur = &root; cur; cur = cur->next) {
53
0
      cur->dump();
54
0
    }
55
0
    CERR.printf("<<<blocks\n");
56
0
  }
57
58
  StringMap block_start_tags;
59
  StringMap raw_start_tags;
60
  
61
  DocRoot root;
62
  Block * back;
63
  bool prev_blank;
64
  MultilineInlineState * inline_state;
65
 
66
21.0k
  void kill(Block * blk) {
67
21.0k
    Block * cur = &root;
68
21.9k
    while (cur->next && cur->next != blk)
69
930
      cur = cur->next;
70
21.0k
    back = cur;
71
21.0k
    Block * next = cur->next;
72
21.0k
    cur->next = NULL;
73
21.0k
    cur = next;
74
30.6k
    while (cur) {
75
9.55k
      next = cur->next;
76
9.55k
      delete cur;
77
9.55k
      cur = next;
78
9.55k
    }
79
21.0k
  }
80
81
9.55k
  void add(Block * blk) {
82
9.55k
    back->next = blk;
83
9.55k
    back = blk;
84
9.55k
  }
85
86
  Block * start_block(Iterator & itr);
87
};
88
89
//
90
// Iterator class
91
//
92
93
684k
inline void blank(FilterChar & chr) {
94
684k
  if (!asc_isspace(chr))
95
678k
    chr = ' ';
96
684k
}
97
98
struct Iterator {
99
  FilterChar * line_start;
100
  FilterChar * i;
101
  FilterChar * end;
102
  int line_pos;
103
  int indent;
104
  Iterator(FilterChar * start, FilterChar * stop)
105
907
    : line_start(start), i(start), end(stop), line_pos(), indent() {}
106
5.09M
  void * pos() {return i;}
107
35.0M
  unsigned int operator[](int x) const {
108
35.0M
    if (x < 0) {
109
1.55M
      if (i + x >= line_start) return i[x];
110
50.7k
      else return '\0';
111
33.5M
    } else {
112
33.5M
      if (i + x >= end) return '\0';
113
33.5M
      if (*i == '\r' || *i == '\n') return '\0';
114
33.3M
      else return i[x];
115
33.5M
    }
116
35.0M
  }
117
10.5k
  bool prev_isspace() const {return i == line_start || asc_isspace(i[-1]);}
118
1.55M
  bool escaped() const {return operator[](-1) == '\\';}
119
21.2M
  unsigned int operator *() const {return operator[](0); }
120
14.2M
  bool eol() const {return operator*() == '\0';}
121
5.99M
  bool at_end() const {return i >= end;}
122
2.16M
  int width() const {
123
2.16M
    if (i == end) return 0;
124
2.16M
    if (*i == '\t') return 4  - (line_pos % 4);
125
2.13M
    return 1;
126
2.16M
  }
127
  // u_eq = not escaped and equal
128
1.55M
  bool u_eq(char chr) {
129
1.55M
    return !escaped() && operator*() == chr;
130
1.55M
  }
131
978k
  bool eq(const char * str) {
132
978k
    int i = 0;
133
995k
    while (str[i] != '\0' && operator[](i) == str[i])
134
16.4k
      ++i;
135
978k
    return str[i] == '\0';
136
978k
  }
137
2.14M
  void inc() {
138
2.14M
    indent = 0;
139
2.14M
    if (eol()) return;
140
2.14M
    line_pos += width();
141
2.14M
    ++i;
142
2.14M
  }
143
807k
  void adv(int width = 1) {
144
1.62M
    for (; width > 0; --width)
145
817k
      inc();
146
807k
    eat_space();
147
807k
  }
148
71.3k
  void blank_adv(int width = 1) {
149
684k
    for (; !eol() && width > 0; --width) {
150
612k
      blank(*i);
151
612k
      inc();
152
612k
    }
153
71.3k
    eat_space();
154
71.3k
  }
155
1.02M
  void blank_rest() {
156
1.07M
    while (!eol()) {
157
51.4k
      blank(*i);
158
51.4k
      inc();
159
51.4k
    }
160
1.02M
  }
161
  int eat_space();
162
  void next_line();
163
};
164
165
2.92M
int Iterator::eat_space() {
166
2.92M
  indent = 0;
167
2.98M
  while (!eol()) {
168
989k
    if (*i == ' ') {
169
41.3k
      ++i;
170
41.3k
      indent++;
171
41.3k
      line_pos++;
172
948k
    } else if (*i == '\t') {
173
20.2k
      int w = width();
174
20.2k
      ++i;
175
20.2k
      indent += w;
176
20.2k
      line_pos += w;
177
928k
    } else {
178
928k
      break;
179
928k
    }
180
989k
  }
181
2.92M
  return indent;
182
2.92M
}
183
184
1.99M
void Iterator::next_line() {
185
1.99M
  while (!eol())
186
0
    inc();
187
1.99M
  if (!at_end() && *i == '\r') {
188
22.2k
    ++i;
189
22.2k
    if (!at_end() && *i == '\n') {
190
640
      ++i;
191
640
    }
192
1.97M
  } else if (!at_end()) {
193
1.97M
    ++i;
194
1.97M
  }
195
1.99M
  line_pos = 0;
196
1.99M
  line_start = i;
197
1.99M
}
198
199
//
200
// Markdown blocks
201
// 
202
203
struct BlockQuote : Block {
204
33.6k
  static BlockQuote * start_block(Iterator & itr) {
205
33.6k
    if (*itr == '>') {
206
1.15k
      itr.blank_adv();
207
1.15k
      return new BlockQuote();
208
1.15k
    }
209
32.4k
    return NULL;
210
33.6k
  }
211
4.03k
  KeepOpenState proc_line(Iterator & itr) {
212
4.03k
    if (*itr == '>') {
213
1.52k
      itr.blank_adv();
214
1.52k
      return YES;
215
2.50k
    } else if (itr.eol()) {
216
695
      return NEVER;
217
695
    }
218
1.81k
    return MAYBE;
219
4.03k
  }
220
0
  void dump() const {CERR.printf("BlockQuote\n");}
221
1.75k
  bool leaf() const {return false;}
222
};
223
224
struct ListItem : Block {
225
  char marker; // '-' '+' or '*' for bullet lists; '.' or ')' for ordered lists
226
  int indent; // indention required in order to be considered part of
227
              // the same list item
228
  ListItem(char m, int i)
229
2.42k
    : marker(m), indent(i) {}
230
32.4k
  static ListItem * start_block(Iterator & itr) {
231
32.4k
    char marker = '\0';
232
32.4k
    int width = 0;
233
32.4k
    if (*itr == '-' || *itr == '+' || *itr == '*') {
234
2.42k
      marker = *itr;
235
2.42k
      width = 1;
236
30.0k
    } else if (asc_isdigit(*itr)) {
237
405
      width = 1;
238
501
      while (asc_isdigit(itr[width]))
239
96
        width += 1;
240
405
      if (itr[width] == '.' || itr[width] == ')') {
241
0
        width += 1;
242
0
        marker = *itr;
243
0
      }
244
405
    }
245
32.4k
    if (marker != '\0') {
246
2.42k
      itr.adv(width);
247
2.42k
      if (itr.indent <= 4) {
248
2.09k
        int indent = width + itr.indent;
249
2.09k
        itr.indent = 0;
250
2.09k
        return new ListItem(marker, indent);
251
2.09k
      } else {
252
333
        int indent = 1 + itr.indent;
253
333
        itr.indent -= 1;
254
333
        return new ListItem(marker, indent);
255
333
      }
256
2.42k
    }
257
30.0k
    return NULL;
258
32.4k
  }
259
8.76k
  KeepOpenState proc_line(Iterator & itr) {
260
8.76k
    if (!itr.eol() && itr.indent >= indent) {
261
59
      itr.indent -= indent;
262
59
      return YES;
263
59
    }
264
8.70k
    return MAYBE;
265
8.76k
  }
266
0
  void dump() const {CERR.printf("ListItem: '%c' %d\n", marker, indent);}
267
2.45k
  bool leaf() const {return false;}
268
};
269
270
struct IndentedCodeBlock : Block {
271
34.5k
  static IndentedCodeBlock * start_block(bool prev_blank, Iterator & itr) {
272
34.5k
    if (prev_blank && !itr.eol() && itr.indent >= 4) {
273
747
      itr.blank_rest();
274
747
      return new IndentedCodeBlock();
275
747
    }
276
33.8k
    return NULL;
277
34.5k
  }
278
1.85k
  KeepOpenState proc_line(Iterator & itr) {
279
1.85k
    if (itr.indent >= 4) {
280
447
      itr.blank_rest();
281
447
      return YES;
282
1.40k
    } else if (itr.eol()) {
283
895
      return YES;
284
895
    }
285
514
    return NEVER;
286
1.85k
  }
287
0
  void dump() const {CERR.printf("IndentedCodeBlock\n");}
288
747
  bool leaf() const {return true;}
289
};
290
291
struct FencedCodeBlock : Block {
292
  char delem;
293
  int  delem_len;
294
162
  FencedCodeBlock(char d, int l) : delem(d), delem_len(l) {}
295
33.8k
  static FencedCodeBlock * start_block(Iterator & itr) {
296
33.8k
    if (*itr == '`' || *itr == '~') {
297
257
      char delem = *itr;
298
257
      int i = 1;
299
174k
      while (itr[i] == delem)
300
174k
        ++i;
301
257
      if (i < 3) return NULL;
302
162
      itr.blank_adv(i);
303
162
      itr.blank_rest(); // blank info string
304
162
      return new FencedCodeBlock(delem, i);
305
257
    }
306
33.5k
    return NULL;
307
33.8k
  }
308
1.02M
  KeepOpenState proc_line(Iterator & itr) {
309
1.02M
    if (*itr == '`' || *itr == '~') {
310
164
      char delem = *itr;
311
164
      int i = 1;
312
342k
      while (itr[i] == delem)
313
342k
        ++i;
314
164
      itr.blank_adv(i);
315
164
      if (i >= delem_len && itr.eol()) {
316
149
        return NEVER;
317
149
      }
318
164
    }
319
1.02M
    itr.blank_rest();
320
1.02M
    return YES;
321
1.02M
  }
322
0
  bool blank_rest() const {
323
0
    return true;
324
0
  }
325
0
  void dump() const {CERR.printf("FencedCodeBlock: `%c` %d\n", delem, delem_len);}
326
162
  bool leaf() const {return true;}
327
};
328
329
struct SingleLineBlock : Block {
330
28.5k
  static SingleLineBlock * start_block(Iterator & itr) {
331
28.5k
    unsigned int chr = *itr;
332
28.5k
    switch (chr) {
333
1.01k
    case '-': case '_': case '*': {
334
1.01k
      Iterator i = itr;
335
1.01k
      i.adv();
336
2.43k
      while (*i == *itr)
337
1.41k
        i.adv();
338
1.01k
      if (i.eol()) {
339
358
        itr = i; 
340
358
        return new SingleLineBlock();
341
358
      }
342
661
      if (chr != '-') // fall though on '-' case
343
661
        break;
344
661
    }
345
1.51k
    case '=': {
346
1.51k
      Iterator i = itr;
347
1.51k
      i.inc();
348
1.51k
      while (*i == *itr)
349
0
        i.inc();
350
1.51k
      i.eat_space();
351
1.51k
      if (i.eol()) {
352
599
        itr = i;
353
599
        return new SingleLineBlock();
354
599
      }
355
914
      break;
356
1.51k
    }
357
914
    case '#':
358
170
      return new SingleLineBlock();
359
0
      break;
360
361
28.5k
    }
362
27.4k
    return NULL;
363
28.5k
  }
364
1.12k
  KeepOpenState proc_line(Iterator & itr) {return NEVER;}
365
1.12k
  bool leaf() const {return true;}
366
0
  void dump() const {CERR.printf("SingleLineBlock\n");}
367
};
368
369
//
370
// MultilineInline 
371
// 
372
373
struct MultilineInline {
374
  virtual MultilineInline * close(Iterator & itr) = 0;
375
2.86k
  virtual ~MultilineInline() {}
376
};
377
378
struct InlineCode : MultilineInline {
379
  int marker_len;
380
711k
  MultilineInline * open(Iterator & itr) {
381
711k
    if (itr.u_eq('`')) {
382
464
      int i = 1;
383
23.1k
      while (itr[i] == '`')
384
22.7k
        ++i;
385
464
      itr.blank_adv(i);
386
464
      marker_len = i;
387
464
      return close(itr);
388
464
    }
389
710k
    return NULL;
390
711k
  }
391
1.02k
  MultilineInline * close(Iterator & itr) {
392
61.1k
    while (!itr.eol()) {
393
60.5k
      if (*itr == '`') {
394
14.8k
        int i = 1;
395
10.7M
        while (i < marker_len && itr[i] == '`')
396
10.7M
          ++i;
397
14.8k
        if (i == marker_len) {
398
376
          itr.blank_adv(i);
399
376
          return NULL;
400
376
        }
401
14.8k
      }
402
60.1k
      itr.blank_adv();
403
60.1k
    }
404
653
    return this;
405
1.02k
  }
406
};
407
408
//
409
// Html handling
410
//
411
412
struct HtmlComment : MultilineInline {
413
710k
  MultilineInline * open(Iterator & itr) {
414
710k
    if (itr.eq("<!--")) {
415
745
      itr.adv(4);
416
745
      return close(itr);
417
745
    }
418
710k
    return NULL;
419
710k
  }
420
1.37k
  MultilineInline * close(Iterator & itr) {
421
268k
    while (!itr.eol()) {
422
267k
      if (itr.eq("-->")) {
423
311
        itr.adv(3);
424
311
        return NULL;
425
311
      }
426
267k
      itr.inc();
427
267k
    }
428
1.06k
    return this;
429
1.37k
  }
430
};
431
432
19.5k
bool parse_tag_close(Iterator & itr) {
433
19.5k
  if (*itr == '>') {
434
2.43k
    itr.adv();
435
2.43k
    return true;
436
17.1k
  } else if (*itr == '/' && itr[1] == '>') {
437
290
    itr.adv(2);
438
290
    return true;
439
290
  }
440
16.8k
  return false;
441
19.5k
}
442
443
// note: does _not_ eat trialing whitespaceb
444
17.0k
bool parse_tag_name(Iterator & itr, String & tag) {
445
17.0k
  if (asc_isalpha(*itr)) {
446
11.8k
    tag += asc_tolower(*itr);
447
11.8k
    itr.inc();
448
263k
    while (asc_isalpha(*itr) || asc_isdigit(*itr) || *itr == '-') {
449
251k
      tag += asc_tolower(*itr);
450
251k
      itr.inc();
451
251k
    }
452
11.8k
    return true;
453
11.8k
  }
454
5.19k
  return false;
455
17.0k
}
456
457
struct HtmlTag : MultilineInline {
458
735
  HtmlTag(bool mlt) : start_pos(NULL), last(NULL,NULL), multi_line(mlt) {}
459
  void * start_pos; // used for caching
460
  Iterator last;    // ditto
461
  String name;
462
  bool closing;
463
  enum State {Invalid,Between,AfterName,AfterEq,InSingleQ,InDoubleQ,BeforeClose,Valid};
464
  State state;
465
  bool multi_line;
466
798k
  void clear_cache() {
467
798k
    start_pos = NULL;
468
798k
  }
469
798k
  void reset() {
470
798k
    clear_cache();
471
798k
    name.clear();
472
798k
    closing = false;
473
798k
    state = Invalid;
474
798k
  }
475
783k
  MultilineInline * open(const Iterator & itr0, Iterator & itr) {
476
783k
    if (itr.pos() == start_pos) {
477
23.1k
      itr = last;
478
23.1k
      if (state != Invalid && state != Valid)
479
1.51k
        return this;
480
21.6k
      return NULL;
481
23.1k
    }
482
760k
    reset();
483
760k
    start_pos = itr.pos();
484
760k
    if (*itr == '<') {
485
17.0k
      itr.inc();
486
17.0k
      if (*itr == '/') {
487
2.23k
        itr.inc();
488
2.23k
        closing = true;
489
2.23k
      }
490
17.0k
      if (!parse_tag_name(itr, name))
491
5.19k
        return invalid(itr0, itr);
492
11.8k
      state = Between;
493
11.8k
      if (itr.eol()) {
494
2.83k
        return incomplete(itr0, itr);
495
9.01k
      } else if (parse_tag_close(itr)) {
496
2.41k
        return valid(itr0, itr);
497
6.60k
      } else if (asc_isspace(*itr)) {
498
4.18k
        return close(itr0, itr);
499
4.18k
      } else {
500
2.42k
        return invalid(itr0, itr);
501
2.42k
      }
502
11.8k
    }
503
743k
    return invalid(itr0, itr);
504
760k
  }
505
756k
  MultilineInline * open(Iterator & itr) {
506
756k
    Iterator itr0 = itr;
507
756k
    return open(itr0, itr);
508
756k
  }
509
11.5k
  MultilineInline * close(const Iterator & itr0, Iterator & itr) {
510
21.3k
    while (!itr.eol()) {
511
14.8k
      if (state == Between || state == BeforeClose) {
512
10.5k
        itr.eat_space();
513
10.5k
        bool leading_space = itr.prev_isspace();
514
        
515
10.5k
        if (parse_tag_close(itr))
516
317
          return valid(itr0, itr);
517
518
10.2k
        if ((state == BeforeClose && !itr.eol())
519
10.2k
            || (itr.line_pos != 0 && !leading_space))
520
2.68k
          return invalid(itr0, itr);
521
10.2k
      }
522
523
11.8k
      state = parse_attribute(itr, state);
524
11.8k
      if (state == Invalid)
525
2.09k
        return invalid(itr0, itr);
526
11.8k
    }
527
6.50k
    return incomplete(itr0, itr);
528
11.5k
  }
529
7.41k
  MultilineInline * close(Iterator & itr) {
530
7.41k
    Iterator itr0 = itr;
531
7.41k
    return close(itr0, itr);
532
7.41k
  }
533
534
2.72k
  MultilineInline * valid(const Iterator & itr0, Iterator & itr) {
535
2.72k
    state = Valid;
536
2.72k
    last = itr;
537
2.72k
    return NULL;
538
2.72k
  }
539
756k
  MultilineInline * invalid(const Iterator & itr0, Iterator & itr) {
540
756k
    state = Invalid;
541
756k
    itr = itr0;
542
756k
    last = itr;
543
756k
    return NULL;
544
756k
  }
545
9.33k
  MultilineInline * incomplete(const Iterator & itr0, Iterator & itr) {
546
9.33k
    last = itr;
547
9.33k
    if (multi_line)
548
9.00k
      return this;
549
332
    return invalid(itr0, itr);
550
9.33k
  }
551
552
  // note: does _not_ eat trialing whitespace
553
11.8k
  static State parse_attribute(Iterator & itr, State state) {
554
11.8k
    switch (state) {
555
      // note: this switch is being used as a computed goto to make
556
      //   restoring state straightforward without restructuring the code
557
7.57k
     case Between:
558
7.57k
      if (asc_isalpha(*itr) || *itr == '_' || *itr == ':') {
559
5.97k
        itr.inc();
560
10.3k
        while (asc_isalpha(*itr) || asc_isdigit(*itr)
561
10.3k
               || *itr == '_' || *itr == ':' || *itr == '.' || *itr == '-')
562
4.38k
          itr.inc();
563
7.70k
       case AfterName:
564
7.70k
        itr.eat_space();
565
7.70k
        if (itr.eol()) return AfterName;
566
5.48k
        if (*itr != '=') return Invalid;
567
4.63k
        itr.inc();
568
5.96k
       case AfterEq:
569
5.96k
        itr.eat_space();
570
5.96k
        if (itr.eol()) return AfterEq;
571
4.36k
        if (*itr == '\'') {
572
135
          itr.inc();
573
1.12k
         case InSingleQ:
574
5.26k
          while (!itr.eol() && *itr != '\'')
575
4.14k
            itr.inc();
576
1.12k
          if (itr.eol()) return InSingleQ;
577
129
          if (*itr != '\'') return Invalid;
578
129
          itr.inc();
579
4.22k
        } else if (*itr == '"') {
580
425
          itr.inc();
581
640
         case InDoubleQ:
582
3.62k
          while (!itr.eol() && *itr != '"')
583
2.98k
            itr.inc();
584
640
          if (itr.eol()) return InDoubleQ;
585
174
          if (*itr != '"') return Invalid;
586
174
          itr.inc();
587
3.80k
        } else {
588
3.80k
          void * pos = itr.pos();
589
20.8k
          while (!itr.eol() && !asc_isspace(*itr)
590
20.8k
                 && *itr != '"' && *itr != '\'' && *itr != '='
591
20.8k
                 && *itr != '<' && *itr != '>' && *itr != '`')
592
17.0k
            itr.inc();
593
3.80k
          if (pos == itr.pos()) return Invalid;
594
3.80k
        }
595
2.85k
        return Between;
596
4.36k
      }
597
1.60k
     case BeforeClose:
598
1.60k
      return BeforeClose;
599
0
     default: //case Valid: case Invalid:
600
      // should not happen
601
0
      break;
602
11.8k
    }
603
    // should not be here
604
0
    abort();
605
11.8k
  }
606
};
607
608
struct HtmlBlock : Block {
609
1.88k
  HtmlBlock(Iterator & itr) {
610
1.88k
    proc_line(itr);
611
1.88k
  }
612
5.16k
  KeepOpenState proc_line(Iterator & itr) {
613
5.16k
    if (itr.eol()) return NEVER;
614
8.65k
    while (!itr.eol()) itr.inc();
615
1.96k
    return YES;
616
5.16k
  }
617
0
  void dump() const {CERR.printf("HtmlBlock\n");}
618
1.88k
  bool leaf() const {return true;}
619
};
620
621
struct RawHtmlBlock : Block {
622
563
  RawHtmlBlock(Iterator & itr, ParmStr tn) : done(false), tag(false), tag_name(tn) {
623
563
    proc_line(itr);
624
563
  }
625
  bool done;
626
  HtmlTag tag;
627
  String tag_name;
628
3.35k
  KeepOpenState proc_line(Iterator & itr) {
629
3.35k
    tag.reset();
630
3.35k
    if (done) return NEVER;
631
49.0k
    while (!itr.eol()) {
632
46.3k
      tag.open(itr);
633
46.3k
      if (tag.state == HtmlTag::Valid && tag.closing && tag.name == tag_name) {
634
349
        done = true;
635
4.18k
        while (!itr.eol()) itr.inc();
636
349
        return NEVER;
637
349
      }
638
45.9k
      itr.adv();
639
45.9k
    }
640
2.71k
    return YES;
641
3.06k
  }
642
0
  void dump() const {CERR.printf("RawHtmlBlock: %s\n", tag_name.c_str());}
643
563
  bool leaf() const {return true;}
644
};
645
646
Block * start_html_block(Iterator & itr, HtmlTag & tag,
647
                         const StringMap & start_tags,
648
27.4k
                         const StringMap & raw_tags) {
649
27.4k
  Iterator itr0 = itr;
650
27.4k
  tag.open(itr0, itr);
651
27.4k
  if (!tag.closing && raw_tags.have(tag.name))
652
563
    return new RawHtmlBlock(itr,tag.name);
653
26.8k
  if ((tag.state == HtmlTag::Valid && itr.eol())
654
26.8k
      || start_tags.have(tag.name)) {
655
1.88k
    return new HtmlBlock(itr);
656
1.88k
  }
657
24.9k
  itr = itr0;
658
24.9k
  return NULL;
659
26.8k
}
660
661
//
662
// Link handling
663
// 
664
665
struct Link : MultilineInline {
666
1.78k
  Link(bool s) : skip_ref_labels(s) {reset();}
667
  enum State {Invalid, BeforeUrl, AfterUrl, InSingleQ, InDoubleQ, InParanQ, AfterQuote, Valid};
668
  State state;
669
  bool skip_ref_labels;
670
  struct LineState {
671
    Iterator itr0;
672
    FilterChar * blank_start;
673
    FilterChar * blank_stop;
674
    LineState(const Iterator & itr0)
675
46.3k
      : itr0(itr0), blank_start(NULL), blank_stop(NULL) {}
676
  };
677
706k
  void reset() {
678
706k
    state = Invalid;
679
706k
  }
680
704k
  MultilineInline * open(Iterator & itr) {
681
704k
    reset();
682
704k
    if (itr.u_eq(']')) {
683
      // no space allowed between ']' and '(' or '[';
684
9.70k
      if (itr[1] == '(') {
685
5.19k
        itr.adv(2);
686
5.19k
        return close(itr);
687
5.19k
      } else if (skip_ref_labels && itr[1] == '[') {
688
2.29k
        LineState st(itr);
689
2.29k
        itr.adv(2);
690
2.29k
        st.blank_start = itr.i;
691
9.00k
        while (!itr.eol() && !itr.u_eq(']'))
692
6.71k
          itr.adv();
693
2.29k
        st.blank_stop = itr.i;
694
2.29k
        if (!itr.eol())
695
1.01k
          return valid(st,itr);
696
1.28k
        else
697
1.28k
          return invalid(st, itr);
698
2.29k
      }
699
9.70k
    }
700
697k
    state = Invalid;
701
697k
    return NULL;
702
704k
  }
703
  
704
7.62k
  static State parse_url(LineState & st, Iterator & itr, char close) {
705
7.62k
    if (itr.eol())
706
872
      return BeforeUrl;
707
6.75k
    if (itr.u_eq('<')) {
708
534
      st.blank_start = itr.i;
709
534
      itr.adv();
710
18.4k
      while (!itr.eol() && !itr.u_eq('>'))
711
17.8k
        itr.adv();
712
534
      if (itr.eol())
713
237
        return Invalid;
714
297
      itr.adv();
715
297
      st.blank_stop = itr.i;
716
6.21k
    } else {
717
6.21k
      st.blank_start = itr.i;
718
35.7k
      while (!itr.eol() && !itr.u_eq(close) && !asc_isspace(*itr))
719
29.4k
        itr.inc();
720
6.21k
      st.blank_stop = itr.i;
721
6.21k
      itr.eat_space();
722
6.21k
    }
723
6.51k
    return AfterUrl;
724
6.75k
  }
725
6.64k
  static State parse_label(State state, Iterator & itr) {
726
6.64k
    switch (state) {
727
5.38k
    default:
728
5.38k
      if (itr.u_eq('\'')) {
729
191
        itr.inc();
730
191
        state = InSingleQ;
731
191
      case InSingleQ:
732
961
        while (!itr.eol() && !itr.u_eq('\''))
733
770
          itr.inc();
734
191
        if (itr.eol())
735
2
          return state;
736
189
        itr.adv();
737
189
        state = AfterQuote;
738
5.19k
      } else if (itr.u_eq('\"')) {
739
1.06k
        itr.inc();
740
1.06k
        state = InDoubleQ;
741
2.32k
      case InDoubleQ:
742
21.3k
        while (!itr.eol() && !itr.u_eq('"'))
743
19.0k
          itr.inc();
744
2.32k
        if (itr.eol())
745
1.27k
          return state;
746
1.05k
        itr.adv();
747
1.05k
        state = AfterQuote;
748
4.12k
      } else if (itr.u_eq('(')) {
749
3.20k
        state = InParanQ;
750
3.20k
      case InParanQ:
751
9.86k
        while (!itr.eol() && !itr.u_eq(')'))
752
6.65k
          itr.inc();
753
3.20k
        if (itr.eol())
754
216
          return state;
755
2.98k
        itr.adv();
756
2.98k
        state = AfterQuote;
757
2.98k
      }
758
6.64k
    }
759
5.15k
    return state;
760
6.64k
  }
761
13.9k
  Link * parse_url_label(Iterator & itr, char close) {
762
13.9k
    LineState st(itr);
763
13.9k
    itr.eat_space();
764
13.9k
    switch (state) {
765
7.62k
    default:
766
7.62k
      state = parse_url(st,itr,close);
767
7.62k
      if (state == Invalid) return invalid(st, itr);
768
7.38k
      if (itr.eol()) return incomplete(st, itr);
769
6.33k
    case AfterUrl:
770
6.33k
      if (close != '\0' ? itr.u_eq(close) : itr.eol())
771
951
        return valid(st, itr);
772
6.64k
    case InSingleQ: case InDoubleQ:
773
6.64k
      state = parse_label(state, itr);
774
6.64k
      if (state == Invalid) return invalid(st, itr);
775
6.64k
      if (itr.eol()) return incomplete(st, itr);
776
5.15k
    case AfterQuote:
777
5.15k
      if (close != '\0' ? itr.u_eq(close) : itr.eol())
778
3.05k
        return valid(st, itr);
779
2.09k
      return invalid(st, itr);
780
13.9k
    }    
781
13.9k
  }
782
8.48k
  MultilineInline * close(Iterator & itr) {
783
8.48k
    return parse_url_label(itr, ')');
784
8.48k
  }
785
14.1k
  static void blank(const LineState & st) {
786
34.0k
    for (FilterChar * i = st.blank_start; i != st.blank_stop; ++i) {
787
19.8k
      ::blank(*i);
788
19.8k
    }
789
14.1k
  }
790
5.02k
  Link * valid(const LineState & st, Iterator & itr) {
791
5.02k
    itr.adv(); // skip over closing tag
792
5.02k
    blank(st);
793
5.02k
    state = Valid;
794
5.02k
    return NULL;
795
5.02k
  }
796
3.61k
  Link * invalid(const LineState & st, Iterator & itr) {
797
3.61k
    state = Invalid;
798
3.61k
    itr = st.itr0;
799
3.61k
    return NULL;
800
3.61k
  }
801
7.61k
  Link * incomplete(const LineState & st, Iterator & itr) {
802
7.61k
    blank(st);
803
7.61k
    return this;
804
7.61k
  }
805
};
806
807
struct LinkRefDefinition : Block {
808
  Link link;
809
  Link * multiline;
810
1.61k
  LinkRefDefinition() : link(false) {}
811
30.0k
  static LinkRefDefinition * start_block(Iterator & itr, bool skip_ref_labels) {
812
30.0k
    Link::LineState st(itr);
813
30.0k
    if (*itr == '[') {
814
3.19k
      itr.adv();
815
3.19k
      st.blank_start = itr.i;
816
3.19k
      if (*itr == ']') goto fail;
817
17.1k
      while (!itr.eol() && !itr.u_eq(']')) {
818
14.4k
        itr.adv();
819
14.4k
      }
820
2.74k
      st.blank_stop = itr.i;
821
2.74k
      itr.inc();
822
2.74k
      if (*itr != ':') goto fail;
823
1.61k
      itr.adv();
824
1.61k
      LinkRefDefinition * obj = new LinkRefDefinition();
825
1.61k
      obj->multiline = obj->link.parse_url_label(itr, '\0');
826
1.61k
      if (obj->link.state == Link::Invalid) {
827
130
        delete obj;
828
130
        goto fail;
829
130
      }
830
1.48k
      if (skip_ref_labels)
831
1.48k
        Link::blank(st);
832
1.48k
      return obj;
833
1.61k
    }
834
28.5k
  fail:
835
28.5k
    itr = st.itr0;
836
28.5k
    return NULL;
837
30.0k
  }
838
3.85k
  KeepOpenState proc_line(Iterator & itr) {
839
3.85k
    if (!multiline)
840
0
      return NEVER;
841
3.85k
    void * pos = itr.pos();
842
3.85k
    multiline = multiline->parse_url_label(itr, '\0');
843
3.85k
    if (multiline)
844
2.37k
      return MAYBE;
845
1.47k
    return NEVER;
846
3.85k
  }
847
0
  void dump() const {CERR.printf("LinkRefDefination\n");}
848
1.48k
  bool leaf() const {return true;}
849
};
850
851
//
852
//
853
//
854
855
struct MultilineInlineState {
856
172
  MultilineInlineState(bool mlt, bool s) : ptr(), tag(mlt), link(s) {}
857
  MultilineInline * ptr;
858
  InlineCode inline_code;
859
  HtmlComment comment;
860
  HtmlTag tag;
861
  Link link;
862
172
  void clear_cache() {
863
172
    tag.clear_cache();
864
172
  }
865
0
  void reset() {
866
0
    tag.reset();
867
0
    link.reset();
868
0
  }
869
};
870
871
//
872
// MarkdownFilter implementation
873
//
874
875
172
PosibErr<bool> MarkdownFilter::setup(Config * cfg) {
876
172
  bool skip_ref_labels = cfg->retrieve_bool("f-markdown-skip-ref-labels");
877
172
  bool multiline_tags = cfg->retrieve_bool("f-markdown-multiline-tags");
878
172
  delete inline_state;
879
172
  inline_state = new MultilineInlineState(multiline_tags, skip_ref_labels);
880
172
  raw_start_tags.clear();
881
172
  cfg->retrieve_list("f-markdown-raw-start-tags",  &raw_start_tags);
882
172
  block_start_tags.clear();
883
172
  cfg->retrieve_list("f-markdown-block-start-tags", &block_start_tags);
884
885
172
  return true;
886
172
}
887
888
0
void MarkdownFilter::reset() {
889
0
  kill(root.next);
890
0
  prev_blank = true;
891
0
  inline_state->reset();
892
0
}
893
894
895
172
MarkdownFilter::~MarkdownFilter() {
896
172
  kill(root.next);
897
172
  delete inline_state;
898
172
}
899
900
172
void MarkdownFilter::process(FilterChar * & start, FilterChar * & stop) {
901
172
  inline_state->clear_cache();
902
172
  Iterator itr(start,stop);
903
172
  bool blank_line = false;
904
1.99M
  while (!itr.at_end()) { 
905
1.99M
    itr.eat_space();
906
1.99M
    if (inline_state->ptr) {
907
13.4k
      if (itr.eol())
908
1.53k
        inline_state->ptr = NULL;
909
11.9k
      else
910
11.9k
        inline_state->ptr = inline_state->ptr->close(itr);
911
1.98M
    } else {
912
1.98M
      Block * blk = &root;
913
1.98M
      Block::KeepOpenState keep_open;
914
4.99M
      for (; blk; blk = blk->next) {
915
3.02M
        keep_open = blk->proc_line(itr);
916
3.02M
        if (keep_open != Block::YES)
917
11.4k
          break;
918
3.02M
      }
919
920
1.98M
      blank_line = itr.eol();
921
1.98M
      Block * nblk = blank_line || (keep_open == Block::YES && back->leaf())
922
1.98M
        ? NULL
923
1.98M
        : start_block(itr);
924
      
925
1.98M
      if (nblk || keep_open == Block::NEVER || (prev_blank && !blank_line)) {
926
#ifdef DEBUG_FILTER
927
        CERR.printf("*** kill\n");
928
#endif
929
19.4k
        kill(blk);
930
1.96M
      } else {
931
1.97M
        for (; blk; blk = blk->next) {
932
8.58k
          keep_open = blk->proc_line(itr);
933
8.58k
          if (keep_open == Block::NEVER) {
934
#ifdef DEBUG_FILTER
935
            CERR.printf("***** kill\n");
936
#endif          
937
1.44k
            kill(blk);
938
1.44k
            break;
939
1.44k
          }
940
8.58k
        }
941
1.96M
      }
942
943
1.98M
      if (nblk) {
944
#ifdef DEBUG_FILTER
945
        CERR.printf("*** new block\n");
946
#endif
947
7.34k
        add(nblk);
948
7.34k
        prev_blank = true;
949
7.34k
      }
950
951
1.98M
      while (nblk && !nblk->leaf()) {
952
3.58k
        nblk = start_block(itr);
953
3.58k
        if (nblk) {
954
#ifdef DEBUG_FILTER
955
          CERR.printf("*** new block\n");
956
#endif         
957
2.21k
          add(nblk);
958
2.21k
        }
959
3.58k
      }
960
961
#ifdef DEBUG_FILTER
962
      dump();
963
#endif
964
1.98M
    }
965
    // now process line, mainly blank inline code and handle html tags
966
      
967
2.70M
    while (!itr.eol()) {
968
711k
      void * pos = itr.pos();
969
711k
#define TRY(what) \
970
2.83M
  inline_state->ptr = inline_state->what.open(itr);  \
971
2.83M
  if (inline_state->ptr) break; \
972
2.83M
  if (itr.pos() != pos) continue
973
711k
      TRY(inline_code);
974
1.42M
      TRY(comment);
975
1.41M
      TRY(tag);
976
1.40M
      TRY(link);
977
698k
#undef TRY
978
698k
      if (*itr == '<' || *itr == '>')
979
7.38k
        itr.blank_adv();
980
691k
      else
981
691k
        itr.adv();
982
698k
    }
983
984
1.99M
    itr.next_line();
985
986
1.99M
    prev_blank = blank_line;
987
1.99M
  }
988
172
}
989
990
34.5k
Block * MarkdownFilter::start_block(Iterator & itr) {
991
34.5k
  inline_state->tag.reset();
992
34.5k
  Block * nblk = NULL;
993
34.5k
  (nblk = IndentedCodeBlock::start_block(prev_blank, itr))
994
34.5k
    || (nblk = FencedCodeBlock::start_block(itr))
995
34.5k
    || (nblk = BlockQuote::start_block(itr))
996
34.5k
    || (nblk = ListItem::start_block(itr))
997
34.5k
    || (nblk = LinkRefDefinition::start_block(itr, inline_state->link.skip_ref_labels))
998
34.5k
    || (nblk = SingleLineBlock::start_block(itr))
999
34.5k
    || (nblk = start_html_block(itr, inline_state->tag, block_start_tags, raw_start_tags));
1000
34.5k
  return nblk;
1001
34.5k
}
1002
1003
} // anon namespace
1004
1005
C_EXPORT IndividualFilter * new_aspell_markdown_filter() 
1006
172
{
1007
172
  return new MarkdownFilter();
1008
172
}
1009