Coverage Report

Created: 2026-04-04 08:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/binutils-gdb/gas/app.c
Line
Count
Source
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2026 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
static const char *mri_state;
49
static char mri_last_ch;
50
#else
51
1.84M
#define scrub_m68k_mri 0
52
#endif
53
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
61
/* The pseudo-op (without leading dot) at which we want to (perhaps just
62
   temporarily) stop processing.  See the comments in do_scrub_chars().  */
63
static const char   end_pseudo[] = "end ";
64
static const char * end_state;
65
66
/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67
   active.  */
68
static bool no_pseudo_dot;
69
70
static char last_char;
71
72
44.2M
#define LEX_IS_SYMBOL_COMPONENT   1
73
2.11M
#define LEX_IS_WHITESPACE   2
74
1.96M
#define LEX_IS_LINE_SEPARATOR   3
75
744k
#define LEX_IS_COMMENT_START    4
76
1.20M
#define LEX_IS_LINE_COMMENT_START 5
77
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
78
604k
#define LEX_IS_STRINGQUOTE    8
79
1.20M
#define LEX_IS_COLON      9
80
2.19M
#define LEX_IS_NEWLINE      10
81
3.35M
#define LEX_IS_ONECHAR_QUOTE    11
82
#ifdef TC_V850
83
#define LEX_IS_DOUBLEDASH_1ST   12
84
#endif
85
#ifdef DOUBLEBAR_PARALLEL
86
#define LEX_IS_DOUBLEBAR_1ST    13
87
#endif
88
723k
#define LEX_IS_PARALLEL_SEPARATOR 14
89
#ifdef H_TICK_HEX
90
#define LEX_IS_H      15
91
#endif
92
5.58M
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93
1.10M
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
94
1.61M
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
95
723k
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96
1.48M
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
97
1.48M
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
98
#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99
2.12M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
100
101
static char lex[256] = {
102
  [' ']  = LEX_IS_WHITESPACE,
103
  ['\t'] = LEX_IS_WHITESPACE,
104
#ifdef CR_EOL
105
  ['\r'] = LEX_IS_LINE_SEPARATOR,
106
#else
107
  ['\r'] = LEX_IS_WHITESPACE,
108
#endif
109
  ['\n'] = LEX_IS_NEWLINE,
110
  [':'] = LEX_IS_COLON,
111
  ['$'] = LEX_IS_SYMBOL_COMPONENT,
112
  ['.'] = LEX_IS_SYMBOL_COMPONENT,
113
  ['_'] = LEX_IS_SYMBOL_COMPONENT,
114
  ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115
  ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116
  ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117
  ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118
  ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119
  ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120
  ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121
  ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122
  ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123
  ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124
  ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125
  ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126
  ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127
  ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128
  ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129
  ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130
  ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131
  ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132
  ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133
  ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134
  ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135
  ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136
  ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137
  ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138
  ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139
  ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140
  ['0'] = LEX_IS_SYMBOL_COMPONENT,
141
  ['1'] = LEX_IS_SYMBOL_COMPONENT,
142
  ['2'] = LEX_IS_SYMBOL_COMPONENT,
143
  ['3'] = LEX_IS_SYMBOL_COMPONENT,
144
  ['4'] = LEX_IS_SYMBOL_COMPONENT,
145
  ['5'] = LEX_IS_SYMBOL_COMPONENT,
146
  ['6'] = LEX_IS_SYMBOL_COMPONENT,
147
  ['7'] = LEX_IS_SYMBOL_COMPONENT,
148
  ['8'] = LEX_IS_SYMBOL_COMPONENT,
149
  ['9'] = LEX_IS_SYMBOL_COMPONENT,
150
#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151
     [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152
#define INIT4(n)    INIT2 (n),  INIT2 ((n) +  2)
153
#define INIT8(n)    INIT4 (n),  INIT4 ((n) +  4)
154
#define INIT16(n)   INIT8 (n),  INIT8 ((n) +  8)
155
#define INIT32(n)  INIT16 (n), INIT16 ((n) + 16)
156
#define INIT64(n)  INIT32 (n), INIT32 ((n) + 32)
157
#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158
  INIT128 (128),
159
#undef INIT128
160
#undef INIT64
161
#undef INIT32
162
#undef INIT16
163
#undef INIT8
164
#undef INIT4
165
#undef INIT2
166
};
167
168
void
169
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
170
567
{
171
567
  const char *p;
172
173
  /* Latch this once at start.  xtensa uses a hook function, yet context isn't
174
     meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175
     state changes).  */
176
567
  if (lex['/'] == 0)
177
1
    no_pseudo_dot = NO_PSEUDO_DOT;
178
179
#ifdef TC_M68K
180
  scrub_m68k_mri = m68k_mri;
181
182
  if (! m68k_mri)
183
#endif
184
567
    {
185
567
      lex['"'] = LEX_IS_STRINGQUOTE;
186
187
567
#if ! defined (TC_HPPA)
188
567
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
189
567
#endif
190
191
#ifdef SINGLE_QUOTE_STRINGS
192
      lex['\''] = LEX_IS_STRINGQUOTE;
193
#endif
194
567
    }
195
196
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197
     in state 5 of do_scrub_chars must be changed.  */
198
199
  /* Note that these override the previous defaults, e.g. if ';' is a
200
     comment char, then it isn't a line separator.  */
201
202
567
#ifdef tc_symbol_chars
203
  /* This macro permits the processor to specify all characters which
204
     may appears in an operand.  This will prevent the scrubber from
205
     discarding meaningful whitespace in certain cases.  The i386
206
     backend uses this to support prefixes, which can confuse the
207
     scrubber as to whether it is parsing operands or opcodes.  */
208
3.40k
  for (p = tc_symbol_chars; *p; ++p)
209
2.83k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210
567
#endif
211
212
  /* The m68k backend wants to be able to change comment_chars.  */
213
#ifndef tc_comment_chars
214
#define tc_comment_chars comment_chars
215
#endif
216
1.13k
  for (p = tc_comment_chars; *p; p++)
217
567
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
218
219
  /* While counter intuitive to have more special purpose line comment chars
220
     override more general purpose ordinary ones, logic in do_scrub_chars()
221
     depends on this ordering.   */
222
1.70k
  for (p = line_comment_chars; *p; p++)
223
1.13k
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
224
225
567
#ifndef tc_line_separator_chars
226
567
#define tc_line_separator_chars line_separator_chars
227
567
#endif
228
1.13k
  for (p = tc_line_separator_chars; *p; p++)
229
567
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
230
231
#ifdef tc_parallel_separator_chars
232
  /* This macro permits the processor to specify all characters which
233
     separate parallel insns on the same line.  */
234
  for (p = tc_parallel_separator_chars; *p; p++)
235
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236
#endif
237
238
  /* Only allow slash-star comments if slash is not in use.  Certain
239
     other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240
     FIXME: This isn't right.  We should always permit them.  */
241
567
  if (lex['/'] == 0)
242
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
243
244
#ifdef TC_M68K
245
  if (m68k_mri)
246
    {
247
      lex['\''] = LEX_IS_STRINGQUOTE;
248
      lex[';'] = LEX_IS_COMMENT_START;
249
      lex['*'] = LEX_IS_LINE_COMMENT_START;
250
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251
   then it can't be used in an expression.  */
252
      lex['!'] = LEX_IS_LINE_COMMENT_START;
253
    }
254
#endif
255
256
#ifdef TC_V850
257
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258
#endif
259
#ifdef DOUBLEBAR_PARALLEL
260
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261
#endif
262
263
#ifdef H_TICK_HEX
264
  if (enable_h_tick_hex)
265
    {
266
      lex['h'] = LEX_IS_H;
267
      lex['H'] = LEX_IS_H;
268
    }
269
#endif
270
567
}
271
272
/* Saved state of the scrubber.  */
273
static int state;
274
static int old_state;
275
static const char *out_string;
276
static char out_buf[20];
277
static int add_newlines;
278
static char *saved_input;
279
static size_t saved_input_len;
280
static char input_buffer[32 * 1024];
281
282
/* Data structure for saving the state of app across #include's.  Note that
283
   app is called asynchronously to the parsing of the .include's, so our
284
   state at the time .include is interpreted is completely unrelated.
285
   That's why we have to save it all.  */
286
287
struct app_save
288
{
289
  int          state;
290
  int          old_state;
291
  const char * out_string;
292
  char         out_buf[sizeof (out_buf)];
293
  int          add_newlines;
294
  char *       saved_input;
295
  size_t       saved_input_len;
296
  const char * end_state;
297
#ifdef TC_M68K
298
  int          scrub_m68k_mri;
299
  const char * mri_state;
300
  char         mri_last_ch;
301
#endif
302
#if defined TC_ARM && defined OBJ_ELF
303
  const char * symver_state;
304
#endif
305
  char         last_char;
306
};
307
308
char *
309
app_push (void)
310
1.77k
{
311
1.77k
  struct app_save *saved;
312
313
1.77k
  saved = XNEW (struct app_save);
314
1.77k
  saved->state = state;
315
1.77k
  saved->old_state = old_state;
316
1.77k
  saved->out_string = out_string;
317
1.77k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
318
1.77k
  saved->add_newlines = add_newlines;
319
1.77k
  if (saved_input == NULL)
320
1.72k
    saved->saved_input = NULL;
321
43
  else
322
43
    {
323
43
      saved->saved_input = XNEWVEC (char, saved_input_len);
324
43
      memcpy (saved->saved_input, saved_input, saved_input_len);
325
43
      saved->saved_input_len = saved_input_len;
326
43
    }
327
1.77k
  saved->end_state = end_state;
328
#ifdef TC_M68K
329
  saved->scrub_m68k_mri = scrub_m68k_mri;
330
  saved->mri_state = mri_state;
331
  saved->mri_last_ch = mri_last_ch;
332
#endif
333
#if defined TC_ARM && defined OBJ_ELF
334
  saved->symver_state = symver_state;
335
#endif
336
1.77k
  saved->last_char = last_char;
337
338
  /* do_scrub_begin() is not useful, just wastes time.  */
339
340
1.77k
  state = 0;
341
1.77k
  saved_input = NULL;
342
1.77k
  add_newlines = 0;
343
344
1.77k
  return (char *) saved;
345
1.77k
}
346
347
void
348
app_pop (char *arg)
349
1.77k
{
350
1.77k
  struct app_save *saved = (struct app_save *) arg;
351
352
  /* There is no do_scrub_end ().  */
353
1.77k
  state = saved->state;
354
1.77k
  old_state = saved->old_state;
355
1.77k
  out_string = saved->out_string;
356
1.77k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
357
1.77k
  add_newlines = saved->add_newlines;
358
1.77k
  if (saved->saved_input == NULL)
359
1.72k
    saved_input = NULL;
360
43
  else
361
43
    {
362
43
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
363
43
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
364
43
      saved_input = input_buffer;
365
43
      saved_input_len = saved->saved_input_len;
366
43
      free (saved->saved_input);
367
43
    }
368
1.77k
  end_state = saved->end_state;
369
#ifdef TC_M68K
370
  scrub_m68k_mri = saved->scrub_m68k_mri;
371
  mri_state = saved->mri_state;
372
  mri_last_ch = saved->mri_last_ch;
373
#endif
374
#if defined TC_ARM && defined OBJ_ELF
375
  symver_state = saved->symver_state;
376
#endif
377
1.77k
  last_char = saved->last_char;
378
379
1.77k
  free (arg);
380
1.77k
}
381
382
/* @@ This assumes that \n &c are the same on host and target.  This is not
383
   necessarily true.  */
384
385
static int
386
process_escape (int ch)
387
1.63M
{
388
1.63M
  switch (ch)
389
1.63M
    {
390
5
    case 'b':
391
5
      return '\b';
392
0
    case 'f':
393
0
      return '\f';
394
0
    case 'n':
395
0
      return '\n';
396
1
    case 'r':
397
1
      return '\r';
398
0
    case 't':
399
0
      return '\t';
400
4
    case '\'':
401
4
      return '\'';
402
0
    case '"':
403
0
      return '\"';
404
1.63M
    default:
405
1.63M
      return ch;
406
1.63M
    }
407
1.63M
}
408
409
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
410
static unsigned int multibyte_warn_count = 0;
411
412
bool
413
scan_for_multibyte_characters (const unsigned char *  start,
414
             const unsigned char *  end,
415
             bool                   warn)
416
0
{
417
0
  if (end <= start)
418
0
    return false;
419
420
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
421
0
    return false;
422
423
0
  bool found = false;
424
425
0
  while (start < end)
426
0
    {
427
0
      unsigned char c;
428
429
0
      if ((c = * start++) <= 0x7f)
430
0
  continue;
431
432
0
      if (!warn)
433
0
  return true;
434
435
0
      found = true;
436
437
0
      const char * filename;
438
0
      unsigned int lineno;
439
440
0
      filename = as_where (& lineno);
441
0
      if (filename == NULL)
442
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
443
0
      else if (lineno == 0)
444
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
445
0
      else
446
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
447
448
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
449
0
  {
450
0
    as_warn (_("further multibyte character warnings suppressed"));
451
0
    break;
452
0
  }
453
0
    }
454
455
0
  return found;
456
0
}
457
458
/* This function is called to process input characters.  The GET
459
   parameter is used to retrieve more input characters.  GET should
460
   set its parameter to point to a buffer, and return the length of
461
   the buffer; it should return 0 at end of file.  The scrubbed output
462
   characters are put into the buffer starting at TOSTART; the TOSTART
463
   buffer is TOLEN bytes in length.  The function returns the number
464
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
465
   end of file was seen.  This function is arranged as a state
466
   machine, and saves its state so that it may return at any point.
467
   This is the way the old code used to work.  */
468
469
size_t
470
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
471
    bool check_multibyte)
472
2.95k
{
473
2.95k
  char *to = tostart;
474
2.95k
  char *toend = tostart + tolen;
475
2.95k
  char *from;
476
2.95k
  char *fromend;
477
2.95k
  size_t fromlen;
478
2.95k
  int ch, ch2 = 0;
479
  /* Character that started the string we're working on.  */
480
2.95k
  static char quotechar;
481
482
  /*State 0: beginning of normal line
483
    1: After first whitespace on line (flush more white)
484
    2: After first non-white (opcode) on line (keep 1white)
485
    3: after second white on line (into operands) (flush white)
486
    4: after putting out a .linefile, put out digits
487
    5: parsing a string, then go to old-state
488
    6: putting out \ escape in a "d string.
489
    7: no longer used
490
    8: no longer used
491
    9: After seeing symbol char in state 3 (keep 1white after symchar)
492
   10: After seeing whitespace in state 9 (keep white before symchar)
493
   11: After seeing a symbol character in state 0 (eg a label definition)
494
   -1: output string in out_string and go to the state in old_state
495
   12: no longer used
496
#ifdef DOUBLEBAR_PARALLEL
497
   13: After seeing a vertical bar, looking for a second
498
       vertical bar as a parallel expression separator.
499
#endif
500
#ifdef TC_PREDICATE_START_CHAR
501
   14: After seeing a predicate start character at state 0, looking
502
       for a predicate end character as predicate.
503
   15: After seeing a predicate start character at state 1, looking
504
       for a predicate end character as predicate.
505
#endif
506
#ifdef TC_Z80
507
   16: After seeing an 'a' or an 'A' at the start of a symbol
508
   17: After seeing an 'f' or an 'F' in state 16
509
#endif
510
    */
511
512
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
513
     constructs like ``.loc 1 20''.  This was turning into ``.loc
514
     120''.  States 9 and 10 ensure that a space is never dropped in
515
     between characters which could appear in an identifier.  Ian
516
     Taylor, ian@cygnus.com.
517
518
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
519
     correctly on the PA (and any other target where colons are optional).
520
     Jeff Law, law@cs.utah.edu.
521
522
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
523
     get squashed into "cmp r1,r2||trap#1", with the all important space
524
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
525
526
  /* This macro gets the next input character.  */
527
528
2.95k
#define GET()             \
529
48.2M
  (from < fromend            \
530
48.2M
   ? * (unsigned char *) (from++)        \
531
48.2M
   : (saved_input = NULL,          \
532
5.17k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
533
5.17k
      from = input_buffer,          \
534
5.17k
      fromend = from + fromlen,         \
535
5.17k
      (fromlen == 0            \
536
5.17k
       ? EOF              \
537
5.17k
       : * (unsigned char *) (from++))))
538
539
  /* This macro pushes a character back on the input stream.  */
540
541
4.51M
#define UNGET(uch) (*--from = (uch))
542
543
  /* This macro puts a character into the output buffer.  If this
544
     character fills the output buffer, this macro jumps to the label
545
     TOFULL.  We use this rather ugly approach because we need to
546
     handle two different termination conditions: EOF on the input
547
     stream, and a full output buffer.  It would be simpler if we
548
     always read in the entire input stream before processing it, but
549
     I don't want to make such a significant change to the assembler's
550
     memory usage.  */
551
552
2.95k
#define PUT(pch)        \
553
43.5M
  do            \
554
43.5M
    {           \
555
43.5M
      *to++ = (pch);        \
556
43.5M
      if (to >= toend)       \
557
43.5M
  goto tofull;       \
558
43.5M
    }           \
559
43.5M
  while (0)
560
561
2.95k
  if (saved_input != NULL)
562
529
    {
563
529
      from = saved_input;
564
529
      fromend = from + saved_input_len;
565
529
    }
566
2.43k
  else
567
2.43k
    {
568
2.43k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
569
2.43k
      if (fromlen == 0)
570
562
  return 0;
571
1.86k
      from = input_buffer;
572
1.86k
      fromend = from + fromlen;
573
574
1.86k
      if (check_multibyte)
575
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
576
0
                (const unsigned char *) fromend,
577
0
                true /* Generate warnings.  */);
578
1.86k
    }
579
580
39.8M
  while (1)
581
39.8M
    {
582
      /* The cases in this switch end with continue, in order to
583
   branch back to the top of this while loop and generate the
584
   next output character in the appropriate state.  */
585
39.8M
      switch (state)
586
39.8M
  {
587
5.24M
  case -1:
588
5.24M
    ch = *out_string++;
589
5.24M
    if (*out_string == '\0')
590
3.33M
      {
591
3.33M
        state = old_state;
592
3.33M
        old_state = 3;
593
3.33M
      }
594
5.24M
    PUT (ch);
595
5.24M
    continue;
596
597
5.24M
  case 4:
598
37.1k
    ch = GET ();
599
37.1k
    if (ch == EOF)
600
0
      goto fromeof;
601
37.1k
    else if (ch >= '0' && ch <= '9')
602
18.8k
      PUT (ch);
603
18.2k
    else
604
18.2k
      {
605
18.2k
        while (ch != EOF && IS_WHITESPACE (ch))
606
10
    ch = GET ();
607
18.2k
        if (ch == '"')
608
17.8k
    {
609
17.8k
      quotechar = ch;
610
17.8k
      state = 5;
611
17.8k
      old_state = 3;
612
17.8k
      PUT (ch);
613
17.8k
    }
614
396
        else
615
396
    {
616
8.90k
      while (ch != EOF && ch != '\n')
617
8.50k
        ch = GET ();
618
396
      state = 0;
619
396
      PUT (ch);
620
396
    }
621
18.2k
      }
622
37.1k
    continue;
623
624
8.38M
  case 5:
625
    /* We are going to copy everything up to a quote character,
626
       with special handling for a backslash.  We try to
627
       optimize the copying in the simple case without using the
628
       GET and PUT macros.  */
629
8.38M
    {
630
8.38M
      char *s;
631
8.38M
      ptrdiff_t len;
632
633
42.4M
      for (s = from; s < fromend; s++)
634
42.4M
        {
635
42.4M
    ch = *s;
636
42.4M
    if (ch == '\\'
637
35.3M
        || ch == quotechar
638
34.7M
        || ch == '\n')
639
8.38M
      break;
640
42.4M
        }
641
8.38M
      len = s - from;
642
8.38M
      if (len > toend - to)
643
3
        len = toend - to;
644
8.38M
      if (len > 0)
645
4.98M
        {
646
4.98M
    memcpy (to, from, len);
647
4.98M
    to += len;
648
4.98M
    from += len;
649
4.98M
    if (to >= toend)
650
3
      goto tofull;
651
4.98M
        }
652
8.38M
    }
653
654
8.38M
    ch = GET ();
655
8.38M
    if (ch == EOF)
656
483
      {
657
        /* This buffer is here specifically so
658
     that the UNGET below will work.  */
659
483
        static char one_char_buf[1];
660
661
483
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
662
483
        state = old_state;
663
483
        from = fromend = one_char_buf + 1;
664
483
        fromlen = 1;
665
483
        UNGET ('\n');
666
483
        PUT (quotechar);
667
483
      }
668
8.38M
    else if (ch == quotechar)
669
620k
      {
670
620k
        state = old_state;
671
620k
        PUT (ch);
672
620k
      }
673
7.76M
    else if (TC_STRING_ESCAPES && ch == '\\')
674
7.00M
      {
675
7.00M
        state = 6;
676
7.00M
        PUT (ch);
677
7.00M
      }
678
761k
    else if (scrub_m68k_mri && ch == '\n')
679
0
      {
680
        /* Just quietly terminate the string.  This permits lines like
681
       bne  label loop if we haven't reach end yet.  */
682
0
        state = old_state;
683
0
        UNGET (ch);
684
0
        PUT ('\'');
685
0
      }
686
761k
    else
687
761k
      {
688
761k
        PUT (ch);
689
761k
      }
690
8.38M
    continue;
691
692
8.38M
  case 6:
693
7.00M
    state = 5;
694
7.00M
    ch = GET ();
695
7.00M
    switch (ch)
696
7.00M
      {
697
        /* Handle strings broken across lines, by turning '\n' into
698
     '\\' and 'n'.  */
699
23
      case '\n':
700
23
        UNGET ('n');
701
23
        add_newlines++;
702
23
        PUT ('\\');
703
23
        continue;
704
705
23
      case EOF:
706
3
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
707
3
        PUT (quotechar);
708
3
        continue;
709
710
        /* These two are used inside macros.  */
711
3
      case '@':
712
4
      case '+':
713
4
        break;
714
715
2
      case '"':
716
1.86M
      case '\\':
717
1.86M
      case 'b':
718
1.86M
      case 'f':
719
1.86M
      case 'n':
720
1.86M
      case 'r':
721
1.86M
      case 't':
722
1.86M
      case 'v':
723
1.86M
      case 'x':
724
1.86M
      case 'X':
725
1.86M
      case '0':
726
1.86M
      case '1':
727
1.86M
      case '2':
728
1.86M
      case '3':
729
1.86M
      case '4':
730
1.86M
      case '5':
731
1.86M
      case '6':
732
1.86M
      case '7':
733
1.86M
        break;
734
735
5.14M
      default:
736
#ifdef ONLY_STANDARD_ESCAPES
737
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
738
#endif
739
5.14M
        break;
740
7.00M
      }
741
7.00M
    PUT (ch);
742
7.00M
    continue;
743
744
#ifdef DOUBLEBAR_PARALLEL
745
  case 13:
746
    ch = GET ();
747
    if (ch != '|')
748
      abort ();
749
750
    /* Reset back to state 1 and pretend that we are parsing a
751
       line from just after the first white space.  */
752
    state = 1;
753
    PUT ('|');
754
    continue;
755
#endif
756
#ifdef TC_Z80
757
  case 16:
758
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
759
    ch = GET ();
760
    if (ch == 'f' || ch == 'F')
761
      {
762
        state = 17;
763
        PUT (ch);
764
      }
765
    else
766
      {
767
        if (ch != EOF)
768
    UNGET (ch);
769
        state = 9;
770
        break;
771
      }
772
    /* Fall through.  */
773
  case 17:
774
    /* We have seen "af" at the start of a symbol,
775
       a ' here is a part of that symbol.  */
776
    ch = GET ();
777
    state = 9;
778
    if (ch == '\'')
779
      /* Change to avoid warning about unclosed string.  */
780
      PUT ('`');
781
    else if (ch != EOF)
782
      UNGET (ch);
783
    break;
784
#endif
785
39.8M
  }
786
787
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
788
789
      /* flushchar: */
790
19.1M
      ch = GET ();
791
792
#ifdef TC_PREDICATE_START_CHAR
793
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
794
  {
795
    state += 14;
796
    PUT (ch);
797
    continue;
798
  }
799
      else if (state == 14 || state == 15)
800
  {
801
    if (ch == TC_PREDICATE_END_CHAR)
802
      {
803
        state -= 14;
804
        PUT (ch);
805
        ch = GET ();
806
      }
807
    else
808
      {
809
        PUT (ch);
810
        continue;
811
      }
812
  }
813
#endif
814
815
19.6M
    recycle:
816
817
      /* We need to watch out for .end directives: We should in particular not
818
   issue diagnostics for anything after an active one.  */
819
19.6M
      if (ch == EOF)
820
1.81k
  end_state = NULL;
821
19.6M
      else if (end_state == NULL)
822
19.0M
  {
823
19.0M
    if ((state == 0 || state == 1)
824
1.41M
        && (ch == '.'
825
925k
      || (no_pseudo_dot && ch == end_pseudo[0])))
826
494k
      end_state = end_pseudo + (ch != '.');
827
19.0M
  }
828
628k
      else if (ch != '\0'
829
628k
         && (*end_state == ch
830
       /* Avoid triggering on directives like .endif or .endr.  */
831
496k
       || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
832
132k
  {
833
132k
    if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
834
7
      goto end_end;
835
132k
    ++end_state;
836
132k
  }
837
495k
      else if (*end_state != '\0')
838
  /* We did not get the expected character, or we didn't
839
     get a valid terminating character after seeing the
840
     entire pseudo-op, so we must go back to the beginning.  */
841
493k
  end_state = NULL;
842
2.10k
      else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
843
368
  {
844
375
  end_end:
845
    /* We've read the entire pseudo-op.  If this is the end of the line,
846
       bail out now by (ab)using the output-full path.  This allows the
847
       caller to process input up to here and terminate processing if this
848
       directive is actually active (not on the false branch of a
849
       conditional and not in a macro definition).  */
850
375
    end_state = NULL;
851
375
    state = 0;
852
375
    PUT (ch);
853
375
    goto tofull;
854
375
  }
855
856
#if defined TC_ARM && defined OBJ_ELF
857
      /* We need to watch out for .symver directives.  See the comment later
858
   in this function.  */
859
      if (ch == EOF)
860
  symver_state = NULL;
861
      else if (symver_state == NULL)
862
  {
863
    if ((state == 0 || state == 1)
864
        && strchr (tc_comment_chars, '@') != NULL
865
        && ch == symver_pseudo[0])
866
      symver_state = symver_pseudo + 1;
867
  }
868
      else
869
  {
870
    /* We advance to the next state if we find the right
871
       character.  */
872
    if (ch != '\0' && (*symver_state == ch))
873
      ++symver_state;
874
    else if (*symver_state != '\0')
875
      /* We did not get the expected character, or we didn't
876
         get a valid terminating character after seeing the
877
         entire pseudo-op, so we must go back to the beginning.  */
878
      symver_state = NULL;
879
    else
880
      {
881
        /* We've read the entire pseudo-op.  If this is the end
882
     of the line, go back to the beginning.  */
883
        if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
884
    symver_state = NULL;
885
      }
886
  }
887
#endif /* TC_ARM && OBJ_ELF */
888
889
#ifdef TC_M68K
890
      /* We want to have pseudo-ops which control whether we are in
891
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
892
   the scrubber, that means that we need a special purpose
893
   recognizer here.  */
894
      if (ch == EOF)
895
  mri_state = NULL;
896
      else if (mri_state == NULL)
897
  {
898
    if ((state == 0 || state == 1)
899
        && ch == mri_pseudo[0])
900
      mri_state = mri_pseudo + 1;
901
  }
902
      else
903
  {
904
    /* We advance to the next state if we find the right
905
       character, or if we need a space character and we get any
906
       whitespace character, or if we need a '0' and we get a
907
       '1' (this is so that we only need one state to handle
908
       ``.mri 0'' and ``.mri 1'').  */
909
    if (ch != '\0'
910
        && (*mri_state == ch
911
      || (*mri_state == ' '
912
          && IS_WHITESPACE (ch))
913
      || (*mri_state == '0'
914
          && ch == '1')))
915
      {
916
        mri_last_ch = ch;
917
        ++mri_state;
918
      }
919
    else if (*mri_state != '\0'
920
       || (!IS_WHITESPACE (ch)
921
           && !IS_LINE_SEPARATOR (ch)
922
           && !IS_NEWLINE (ch)))
923
      {
924
        /* We did not get the expected character, or we didn't
925
     get a valid terminating character after seeing the
926
     entire pseudo-op, so we must go back to the
927
     beginning.  */
928
        mri_state = NULL;
929
      }
930
    else
931
      {
932
        /* We've read the entire pseudo-op.  mri_last_ch is
933
     either '0' or '1' indicating whether to enter or
934
     leave MRI mode.  */
935
        do_scrub_begin (mri_last_ch == '1');
936
        mri_state = NULL;
937
938
        /* We continue handling the character as usual.  The
939
     main gas reader must also handle the .mri pseudo-op
940
     to control expression parsing and the like.  */
941
      }
942
  }
943
#endif
944
945
19.6M
      if (ch == EOF)
946
1.81k
  {
947
1.81k
    if (state != 0)
948
324
      {
949
324
        as_warn (_("end of file not at end of a line; newline inserted"));
950
324
        state = 0;
951
324
        PUT ('\n');
952
324
      }
953
1.81k
    goto fromeof;
954
1.81k
  }
955
956
19.6M
      switch (lex[ch])
957
19.6M
  {
958
1.01M
  case LEX_IS_WHITESPACE:
959
1.01M
    do
960
1.04M
      {
961
1.04M
        ch = GET ();
962
1.04M
      }
963
1.04M
    while (ch != EOF && IS_WHITESPACE (ch));
964
1.01M
    if (ch == EOF)
965
18
      goto fromeof;
966
967
1.01M
    if (state == 0)
968
268k
      {
969
        /* Preserve a single whitespace character at the
970
     beginning of a line.  */
971
268k
        state = 1;
972
268k
        UNGET (ch);
973
268k
        PUT (' ');
974
268k
        break;
975
268k
      }
976
977
#ifdef KEEP_WHITE_AROUND_COLON
978
    if (lex[ch] == LEX_IS_COLON)
979
      {
980
        /* Only keep this white if there's no white *after* the
981
     colon.  */
982
        ch2 = GET ();
983
        if (ch2 != EOF)
984
    UNGET (ch2);
985
        if (!IS_WHITESPACE (ch2))
986
    {
987
      state = 9;
988
      UNGET (ch);
989
      PUT (' ');
990
      break;
991
    }
992
      }
993
#endif
994
995
    /* Prune trailing whitespace.  */
996
742k
    if (IS_COMMENT (ch)
997
742k
        || (IS_LINE_COMMENT (ch)
998
23
            && (state < 1 || strchr (tc_comment_chars, ch)))
999
742k
        || IS_NEWLINE (ch)
1000
741k
        || IS_LINE_SEPARATOR (ch)
1001
723k
        || IS_PARALLEL_SEPARATOR (ch))
1002
19.1k
      {
1003
19.1k
        if (scrub_m68k_mri)
1004
0
    {
1005
      /* In MRI mode, we keep these spaces.  */
1006
0
      UNGET (ch);
1007
0
      PUT (' ');
1008
0
      break;
1009
0
    }
1010
19.1k
        goto recycle;
1011
19.1k
      }
1012
#ifdef DOUBLESLASH_LINE_COMMENTS
1013
    if (IS_TWOCHAR_COMMENT_1ST (ch))
1014
      {
1015
        ch2 = GET ();
1016
        if (ch2 != EOF)
1017
          UNGET (ch2);
1018
        if (ch2 == '/')
1019
    goto recycle;
1020
      }
1021
#endif
1022
1023
    /* If we're in state 2 or 11, we've seen a non-white
1024
       character followed by whitespace.  If the next character
1025
       is ':', this is whitespace after a label name which we
1026
       normally must ignore.  In MRI mode, though, spaces are
1027
       not permitted between the label and the colon.  */
1028
723k
    if ((state == 2 || state == 11)
1029
444k
        && lex[ch] == LEX_IS_COLON
1030
0
        && ! scrub_m68k_mri)
1031
279
      {
1032
279
        state = 1;
1033
279
        PUT (ch);
1034
279
        break;
1035
279
      }
1036
1037
723k
    switch (state)
1038
723k
      {
1039
2
      case 1:
1040
        /* We can arrive here if we leave a leading whitespace
1041
     character at the beginning of a line.  */
1042
2
        goto recycle;
1043
221k
      case 2:
1044
221k
        state = 3;
1045
221k
        if (to + 1 < toend)
1046
221k
    {
1047
      /* Optimize common case by skipping UNGET/GET.  */
1048
221k
      PUT (' '); /* Sp after opco */
1049
221k
      goto recycle;
1050
221k
    }
1051
0
        UNGET (ch);
1052
0
        PUT (' ');
1053
0
        break;
1054
3.11k
      case 3:
1055
3.11k
#ifndef TC_KEEP_OPERAND_SPACES
1056
        /* For TI C6X, we keep these spaces as they may separate
1057
     functional unit specifiers from operands.  */
1058
3.11k
        if (scrub_m68k_mri)
1059
0
#endif
1060
0
    {
1061
      /* In MRI mode, we keep these spaces.  */
1062
0
      UNGET (ch);
1063
0
      PUT (' ');
1064
0
      break;
1065
0
    }
1066
3.11k
        goto recycle; /* Sp in operands */
1067
276k
      case 9:
1068
276k
      case 10:
1069
276k
#ifndef TC_KEEP_OPERAND_SPACES
1070
276k
        if (scrub_m68k_mri)
1071
0
#endif
1072
0
    {
1073
      /* In MRI mode, we keep these spaces.  */
1074
0
      state = 3;
1075
0
      UNGET (ch);
1076
0
      PUT (' ');
1077
0
      break;
1078
0
    }
1079
276k
        state = 10; /* Sp after symbol char */
1080
276k
        goto recycle;
1081
222k
      case 11:
1082
222k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1083
0
    state = 1;
1084
222k
        else
1085
222k
    {
1086
      /* We know that ch is not ':', since we tested that
1087
         case above.  Therefore this is not a label, so it
1088
         must be the opcode, and we've just seen the
1089
         whitespace after it.  */
1090
222k
      state = 3;
1091
222k
    }
1092
222k
        UNGET (ch);
1093
222k
        PUT (' '); /* Sp after label definition.  */
1094
222k
        break;
1095
222k
      default:
1096
0
        BAD_CASE (state);
1097
723k
      }
1098
222k
    break;
1099
1100
222k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1101
0
    ch2 = GET ();
1102
0
    if (ch2 == '*')
1103
0
      {
1104
12
  twochar_comment:
1105
12
        for (;;)
1106
32
    {
1107
32
      do
1108
1.39k
        {
1109
1.39k
          ch2 = GET ();
1110
1.39k
          if (ch2 != EOF && IS_NEWLINE (ch2))
1111
245
      add_newlines++;
1112
1.39k
        }
1113
1.39k
      while (ch2 != EOF && ch2 != '*');
1114
1115
87
      while (ch2 == '*')
1116
55
        ch2 = GET ();
1117
1118
32
      if (ch2 == EOF || ch2 == '/')
1119
12
        break;
1120
1121
      /* This UNGET will ensure that we count newlines
1122
         correctly.  */
1123
20
      UNGET (ch2);
1124
20
    }
1125
1126
12
        if (ch2 == EOF)
1127
7
    as_warn (_("end of file in multiline comment"));
1128
1129
12
        ch = ' ';
1130
12
        goto recycle;
1131
0
      }
1132
#ifdef DOUBLESLASH_LINE_COMMENTS
1133
    else if (ch2 == '/')
1134
      {
1135
        do
1136
    {
1137
      ch = GET ();
1138
    }
1139
        while (ch != EOF && !IS_NEWLINE (ch));
1140
        if (ch == EOF)
1141
    as_warn ("end of file in comment; newline inserted");
1142
        state = 0;
1143
        PUT ('\n');
1144
        break;
1145
      }
1146
#endif
1147
0
    else
1148
0
      {
1149
0
        if (ch2 != EOF)
1150
0
    UNGET (ch2);
1151
0
        if (state == 9 || state == 10)
1152
0
    state = 3;
1153
0
        PUT (ch);
1154
0
      }
1155
0
    break;
1156
1157
603k
  case LEX_IS_STRINGQUOTE:
1158
603k
    quotechar = ch;
1159
603k
    if (state == 10)
1160
123k
      {
1161
        /* Preserve the whitespace in foo "bar".  */
1162
123k
        UNGET (ch);
1163
123k
        state = 3;
1164
123k
        PUT (' ');
1165
1166
        /* PUT didn't jump out.  We could just break, but we
1167
     know what will happen, so optimize a bit.  */
1168
123k
        ch = GET ();
1169
123k
        old_state = 9;
1170
123k
      }
1171
480k
    else if (state == 3)
1172
74.6k
      old_state = 9;
1173
405k
    else if (state == 0)
1174
1.07k
      old_state = 11; /* Now seeing label definition.  */
1175
404k
    else
1176
404k
      old_state = state;
1177
603k
    state = 5;
1178
603k
    PUT (ch);
1179
603k
    break;
1180
1181
3.35M
  case LEX_IS_ONECHAR_QUOTE:
1182
#ifdef H_TICK_HEX
1183
    if (state == 9 && enable_h_tick_hex)
1184
      {
1185
        char c;
1186
1187
        c = GET ();
1188
        as_warn ("'%c found after symbol", c);
1189
        UNGET (c);
1190
      }
1191
#endif
1192
3.35M
    if (state == 10)
1193
160
      {
1194
        /* Preserve the whitespace in foo 'b'.  */
1195
160
        UNGET (ch);
1196
160
        state = 3;
1197
160
        PUT (' ');
1198
160
        break;
1199
160
      }
1200
3.35M
    ch = GET ();
1201
3.35M
    if (ch == EOF)
1202
1
      {
1203
1
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1204
1
        ch = 0;
1205
1
      }
1206
3.35M
    if (ch == '\\')
1207
1.63M
      {
1208
1.63M
        ch = GET ();
1209
1.63M
        if (ch == EOF)
1210
0
    {
1211
0
      as_warn (_("end of file in escape character"));
1212
0
      ch = '\\';
1213
0
    }
1214
1.63M
        else
1215
1.63M
    ch = process_escape (ch);
1216
1.63M
      }
1217
3.35M
    sprintf (out_buf, "%d", ch & 0xff);
1218
1219
    /* None of these 'x constants for us.  We want 'x'.  */
1220
3.35M
    if ((ch = GET ()) != '\'')
1221
3.31M
      {
1222
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1223
        as_warn (_("missing close quote; (assumed)"));
1224
#else
1225
3.31M
        if (ch != EOF)
1226
3.31M
    UNGET (ch);
1227
3.31M
#endif
1228
3.31M
      }
1229
3.35M
    if (strlen (out_buf) == 1)
1230
32.8k
      {
1231
32.8k
        PUT (out_buf[0]);
1232
32.8k
        break;
1233
32.8k
      }
1234
3.32M
    if (state == 9)
1235
2.88M
      old_state = 3;
1236
433k
    else
1237
433k
      old_state = state;
1238
3.32M
    state = -1;
1239
3.32M
    out_string = out_buf;
1240
3.32M
    PUT (*out_string++);
1241
3.32M
    break;
1242
1243
3.32M
  case LEX_IS_COLON:
1244
#ifdef KEEP_WHITE_AROUND_COLON
1245
    state = 9;
1246
#else
1247
40.8k
    if (state == 9 || state == 10)
1248
4.60k
      state = 3;
1249
36.2k
    else if (state != 3)
1250
35.5k
      state = 1;
1251
40.8k
#endif
1252
40.8k
    PUT (ch);
1253
40.8k
    break;
1254
1255
943k
  case LEX_IS_NEWLINE:
1256
    /* Roll out a bunch of newlines from inside comments, etc.  */
1257
943k
    if (add_newlines)
1258
24
      {
1259
24
        --add_newlines;
1260
24
        UNGET (ch);
1261
24
      }
1262
    /* Fall through.  */
1263
1264
1.08M
  case LEX_IS_LINE_SEPARATOR:
1265
1.08M
    state = 0;
1266
1.08M
    PUT (ch);
1267
1.08M
    break;
1268
1269
1.08M
  case LEX_IS_PARALLEL_SEPARATOR:
1270
0
    state = 1;
1271
0
    PUT (ch);
1272
0
    break;
1273
1274
#ifdef TC_V850
1275
  case LEX_IS_DOUBLEDASH_1ST:
1276
    ch2 = GET ();
1277
    if (ch2 != '-')
1278
      {
1279
        if (ch2 != EOF)
1280
    UNGET (ch2);
1281
        goto de_fault;
1282
      }
1283
    /* Read and skip to end of line.  */
1284
    do
1285
      {
1286
        ch = GET ();
1287
      }
1288
    while (ch != EOF && ch != '\n');
1289
1290
    if (ch == EOF)
1291
      as_warn (_("end of file in comment; newline inserted"));
1292
1293
    state = 0;
1294
    PUT ('\n');
1295
    break;
1296
#endif
1297
#ifdef DOUBLEBAR_PARALLEL
1298
  case LEX_IS_DOUBLEBAR_1ST:
1299
    ch2 = GET ();
1300
    if (ch2 != EOF)
1301
      UNGET (ch2);
1302
    if (ch2 != '|')
1303
      goto de_fault;
1304
1305
    /* Handle '||' in two states as invoking PUT twice might
1306
       result in the first one jumping out of this loop.  We'd
1307
       then lose track of the state and one '|' char.  */
1308
    state = 13;
1309
    PUT ('|');
1310
    break;
1311
#endif
1312
456k
  case LEX_IS_LINE_COMMENT_START:
1313
    /* FIXME-someday: The two character comment stuff was badly
1314
       thought out.  On i386, we want '/' as line comment start
1315
       AND we want C style comments.  hence this hack.  The
1316
       whole lexical process should be reworked.  xoxorich.  */
1317
456k
    if (ch == '/')
1318
428k
      {
1319
428k
        ch2 = GET ();
1320
428k
        if (ch2 == '*')
1321
12
    goto twochar_comment;
1322
428k
        if (ch2 != EOF)
1323
428k
    UNGET (ch2);
1324
428k
      }
1325
1326
456k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1327
43.8k
      {
1328
43.8k
        int startch;
1329
1330
43.8k
        startch = ch;
1331
1332
43.8k
        do
1333
43.8k
    {
1334
43.8k
      ch = GET ();
1335
43.8k
    }
1336
43.8k
        while (ch != EOF && IS_WHITESPACE (ch));
1337
1338
43.8k
        if (ch == EOF)
1339
0
    {
1340
0
      as_warn (_("end of file in comment; newline inserted"));
1341
0
      PUT ('\n');
1342
0
      break;
1343
0
    }
1344
1345
43.8k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1346
25.6k
    {
1347
      /* Not a cpp line.  */
1348
333k
      while (ch != EOF && !IS_NEWLINE (ch))
1349
308k
        ch = GET ();
1350
25.6k
      if (ch == EOF)
1351
4
        {
1352
4
          as_warn (_("end of file in comment; newline inserted"));
1353
4
          PUT ('\n');
1354
4
        }
1355
25.6k
      else /* IS_NEWLINE (ch) */
1356
25.6k
        {
1357
          /* To process non-zero add_newlines.  */
1358
25.6k
          UNGET (ch);
1359
25.6k
        }
1360
25.6k
      state = 0;
1361
25.6k
      break;
1362
25.6k
    }
1363
        /* Looks like `# 123 "filename"' from cpp.  */
1364
18.2k
        UNGET (ch);
1365
18.2k
        old_state = 4;
1366
18.2k
        state = -1;
1367
18.2k
        if (scrub_m68k_mri)
1368
0
    out_string = "\tlinefile ";
1369
18.2k
        else
1370
18.2k
    out_string = "\t.linefile ";
1371
18.2k
        PUT (*out_string++);
1372
18.2k
        break;
1373
18.2k
      }
1374
1375
#ifdef TC_D10V
1376
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1377
       Trap is the only short insn that has a first operand that is
1378
       neither register nor label.
1379
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1380
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1381
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1382
       only character in line_comment_chars for d10v, hence we
1383
       can recognize it as such.  */
1384
    /* An alternative approach would be to reset the state to 1 when
1385
       we see '||', '<'- or '->', but that seems to be overkill.  */
1386
    if (state == 10)
1387
      PUT (' ');
1388
#endif
1389
    /* We have a line comment character which is not at the
1390
       start of a line.  If this is also a normal comment
1391
       character, fall through.  Otherwise treat it as a default
1392
       character.  */
1393
412k
    if (strchr (tc_comment_chars, ch) == NULL)
1394
411k
      goto de_fault;
1395
1.46k
    if (scrub_m68k_mri
1396
0
        && (ch == '!' || ch == '*' || ch == '#'))
1397
0
      goto de_fault;
1398
    /* Fall through.  */
1399
1.46k
  case LEX_IS_COMMENT_START:
1400
#if defined TC_ARM && defined OBJ_ELF
1401
    /* On the ARM, `@' is the comment character.
1402
       Unfortunately this is also a special character in ELF .symver
1403
       directives (and .type, though we deal with those another way).
1404
       So we check if this line is such a directive, and treat
1405
       the character as default if so.  This is a hack.  */
1406
    if ((symver_state != NULL) && (*symver_state == 0))
1407
      goto de_fault;
1408
#endif
1409
1410
    /* Care is needed not to damage occurrences of \<comment-char>
1411
       by stripping the <comment-char> onwards.  Yuck.  */
1412
1.46k
    if ((to > tostart ? to[-1] : last_char) == '\\')
1413
      /* Do not treat the <comment-char> as a start-of-comment.  */
1414
1
      goto de_fault;
1415
1416
#ifdef WARN_COMMENTS
1417
    if (!found_comment)
1418
      found_comment_file = as_where (&found_comment);
1419
#endif
1420
1.45k
    do
1421
33.5k
      {
1422
33.5k
        ch = GET ();
1423
33.5k
      }
1424
33.5k
    while (ch != EOF && !IS_NEWLINE (ch));
1425
1.45k
    if (ch == EOF)
1426
13
      as_warn (_("end of file in comment; newline inserted"));
1427
1.45k
    state = 0;
1428
1.45k
    PUT ('\n');
1429
1.45k
    break;
1430
1431
#ifdef H_TICK_HEX
1432
  case LEX_IS_H:
1433
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1434
       the H' with 0x to make them gas-style hex characters.  */
1435
    if (enable_h_tick_hex)
1436
      {
1437
        char quot;
1438
1439
        quot = GET ();
1440
        if (quot == '\'')
1441
    {
1442
      UNGET ('x');
1443
      ch = '0';
1444
    }
1445
        else
1446
    UNGET (quot);
1447
      }
1448
#endif
1449
    /* Fall through.  */
1450
1451
7.79M
  case LEX_IS_SYMBOL_COMPONENT:
1452
7.79M
    if (state == 10)
1453
115k
      {
1454
        /* This is a symbol character following another symbol
1455
     character, with whitespace in between.  We skipped
1456
     the whitespace earlier, so output it now.  */
1457
115k
        UNGET (ch);
1458
115k
        state = 3;
1459
115k
        PUT (' ');
1460
115k
        break;
1461
115k
      }
1462
1463
#ifdef TC_Z80
1464
    /* "af'" is a symbol containing '\''.  */
1465
    if (state == 3 && (ch == 'a' || ch == 'A'))
1466
      {
1467
        state = 16;
1468
        PUT (ch);
1469
        ch = GET ();
1470
        if (ch == 'f' || ch == 'F')
1471
    {
1472
      state = 17;
1473
      PUT (ch);
1474
      break;
1475
    }
1476
        else
1477
    {
1478
      state = 9;
1479
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1480
        {
1481
          if (ch != EOF)
1482
      UNGET (ch);
1483
          break;
1484
        }
1485
    }
1486
      }
1487
#endif
1488
7.67M
    if (state == 3)
1489
3.73M
      state = 9;
1490
1491
    /* This is a common case.  Quickly copy CH and all the
1492
       following symbol component or normal characters.  */
1493
7.67M
    if (to + 1 < toend
1494
#ifdef TC_M68K
1495
        && mri_state == NULL
1496
#endif
1497
#if defined TC_ARM && defined OBJ_ELF
1498
        && symver_state == NULL
1499
#endif
1500
7.67M
        && end_state == NULL)
1501
7.04M
      {
1502
7.04M
        char *s;
1503
7.04M
        ptrdiff_t len;
1504
1505
35.3M
        for (s = from; s < fromend; s++)
1506
35.3M
    {
1507
35.3M
      int type;
1508
1509
35.3M
      ch2 = *(unsigned char *) s;
1510
35.3M
      type = lex[ch2];
1511
35.3M
      if (type != 0
1512
30.8M
          && type != LEX_IS_SYMBOL_COMPONENT)
1513
7.04M
        break;
1514
35.3M
    }
1515
1516
7.04M
        if (s > from)
1517
    /* Handle the last character normally, for
1518
       simplicity.  */
1519
5.08M
    --s;
1520
1521
7.04M
        len = s - from;
1522
1523
7.04M
        if (len > (toend - to) - 1)
1524
1
    len = (toend - to) - 1;
1525
1526
7.04M
        if (len > 0)
1527
3.39M
    {
1528
3.39M
      PUT (ch);
1529
3.39M
      memcpy (to, from, len);
1530
3.39M
      to += len;
1531
3.39M
      from += len;
1532
3.39M
      if (to >= toend)
1533
2
        goto tofull;
1534
3.39M
      ch = GET ();
1535
3.39M
    }
1536
7.04M
      }
1537
1538
    /* Fall through.  */
1539
12.9M
  default:
1540
13.3M
  de_fault:
1541
    /* Some relatively `normal' character.  */
1542
13.3M
    if (state == 0)
1543
689k
      {
1544
689k
        state = 11; /* Now seeing label definition.  */
1545
689k
      }
1546
12.7M
    else if (state == 1)
1547
258k
      {
1548
258k
        state = 2;  /* Ditto.  */
1549
258k
      }
1550
12.4M
    else if (state == 9)
1551
5.57M
      {
1552
5.57M
        if (!IS_SYMBOL_COMPONENT (ch))
1553
337k
    state = 3;
1554
5.57M
      }
1555
6.87M
    else if (state == 10)
1556
36.5k
      {
1557
36.5k
        if (ch == '\\')
1558
18
    {
1559
      /* Special handling for backslash: a backslash may
1560
         be the beginning of a formal parameter (of a
1561
         macro) following another symbol character, with
1562
         whitespace in between.  If that is the case, we
1563
         output a space before the parameter.  Strictly
1564
         speaking, correct handling depends upon what the
1565
         macro parameter expands into; if the parameter
1566
         expands into something which does not start with
1567
         an operand character, then we don't want to keep
1568
         the space.  We don't have enough information to
1569
         make the right choice, so here we are making the
1570
         choice which is more likely to be correct.  */
1571
18
      if (to + 1 >= toend)
1572
0
        {
1573
          /* If we're near the end of the buffer, save the
1574
             character for the next time round.  Otherwise
1575
             we'll lose our state.  */
1576
0
          UNGET (ch);
1577
0
          goto tofull;
1578
0
        }
1579
18
      *to++ = ' ';
1580
18
    }
1581
1582
36.5k
        state = 3;
1583
36.5k
      }
1584
13.3M
    PUT (ch);
1585
13.3M
    break;
1586
19.6M
  }
1587
19.6M
    }
1588
1589
  /*NOTREACHED*/
1590
1591
1.83k
 fromeof:
1592
  /* We have reached the end of the input.  */
1593
1.83k
  if (to > tostart)
1594
1.83k
    last_char = to[-1];
1595
1.83k
  return to - tostart;
1596
1597
564
 tofull:
1598
  /* The output buffer is full.  Save any input we have not yet
1599
     processed.  */
1600
564
  if (fromend > from)
1601
529
    {
1602
529
      saved_input = from;
1603
529
      saved_input_len = fromend - from;
1604
529
    }
1605
35
  else
1606
35
    saved_input = NULL;
1607
1608
564
  if (to > tostart)
1609
564
    last_char = to[-1];
1610
564
  return to - tostart;
1611
2.39k
}
1612
1613
/* Return amount of pending input.  */
1614
1615
size_t
1616
do_scrub_pending (void)
1617
3.35k
{
1618
3.35k
  size_t len = 0;
1619
3.35k
  if (saved_input)
1620
380
    len += saved_input_len;
1621
3.35k
  if (state == -1)
1622
19
    len += strlen (out_string);
1623
3.35k
  return len;
1624
3.35k
}