Coverage Report

Created: 2026-05-11 07:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/binutils-gdb/gas/app.c
Line
Count
Source
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2026 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
static const char *mri_state;
49
static char mri_last_ch;
50
#else
51
983k
#define scrub_m68k_mri 0
52
#endif
53
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
61
/* The pseudo-op (without leading dot) at which we want to (perhaps just
62
   temporarily) stop processing.  See the comments in do_scrub_chars().  */
63
static const char   end_pseudo[] = "end ";
64
static const char * end_state;
65
66
/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67
   active.  */
68
static bool no_pseudo_dot;
69
70
static char last_char;
71
72
38.1M
#define LEX_IS_SYMBOL_COMPONENT   1
73
1.84M
#define LEX_IS_WHITESPACE   2
74
1.34M
#define LEX_IS_LINE_SEPARATOR   3
75
591k
#define LEX_IS_COMMENT_START    4
76
932k
#define LEX_IS_LINE_COMMENT_START 5
77
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
78
327k
#define LEX_IS_STRINGQUOTE    8
79
920k
#define LEX_IS_COLON      9
80
1.43M
#define LEX_IS_NEWLINE      10
81
3.34M
#define LEX_IS_ONECHAR_QUOTE    11
82
#ifdef TC_V850
83
#define LEX_IS_DOUBLEDASH_1ST   12
84
#endif
85
#ifdef DOUBLEBAR_PARALLEL
86
#define LEX_IS_DOUBLEBAR_1ST    13
87
#endif
88
587k
#define LEX_IS_PARALLEL_SEPARATOR 14
89
#ifdef H_TICK_HEX
90
#define LEX_IS_H      15
91
#endif
92
5.06M
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93
986k
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
94
1.20M
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
95
587k
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96
1.18M
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
97
1.18M
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
98
#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99
1.44M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
100
101
static char lex[256] = {
102
  [' ']  = LEX_IS_WHITESPACE,
103
  ['\t'] = LEX_IS_WHITESPACE,
104
#ifdef CR_EOL
105
  ['\r'] = LEX_IS_LINE_SEPARATOR,
106
#else
107
  ['\r'] = LEX_IS_WHITESPACE,
108
#endif
109
  ['\n'] = LEX_IS_NEWLINE,
110
  [':'] = LEX_IS_COLON,
111
  ['$'] = LEX_IS_SYMBOL_COMPONENT,
112
  ['.'] = LEX_IS_SYMBOL_COMPONENT,
113
  ['_'] = LEX_IS_SYMBOL_COMPONENT,
114
  ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115
  ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116
  ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117
  ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118
  ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119
  ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120
  ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121
  ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122
  ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123
  ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124
  ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125
  ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126
  ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127
  ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128
  ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129
  ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130
  ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131
  ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132
  ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133
  ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134
  ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135
  ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136
  ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137
  ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138
  ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139
  ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140
  ['0'] = LEX_IS_SYMBOL_COMPONENT,
141
  ['1'] = LEX_IS_SYMBOL_COMPONENT,
142
  ['2'] = LEX_IS_SYMBOL_COMPONENT,
143
  ['3'] = LEX_IS_SYMBOL_COMPONENT,
144
  ['4'] = LEX_IS_SYMBOL_COMPONENT,
145
  ['5'] = LEX_IS_SYMBOL_COMPONENT,
146
  ['6'] = LEX_IS_SYMBOL_COMPONENT,
147
  ['7'] = LEX_IS_SYMBOL_COMPONENT,
148
  ['8'] = LEX_IS_SYMBOL_COMPONENT,
149
  ['9'] = LEX_IS_SYMBOL_COMPONENT,
150
#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151
     [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152
#define INIT4(n)    INIT2 (n),  INIT2 ((n) +  2)
153
#define INIT8(n)    INIT4 (n),  INIT4 ((n) +  4)
154
#define INIT16(n)   INIT8 (n),  INIT8 ((n) +  8)
155
#define INIT32(n)  INIT16 (n), INIT16 ((n) + 16)
156
#define INIT64(n)  INIT32 (n), INIT32 ((n) + 32)
157
#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158
  INIT128 (128),
159
#undef INIT128
160
#undef INIT64
161
#undef INIT32
162
#undef INIT16
163
#undef INIT8
164
#undef INIT4
165
#undef INIT2
166
};
167
168
void
169
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
170
478
{
171
478
  const char *p;
172
173
  /* Latch this once at start.  xtensa uses a hook function, yet context isn't
174
     meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175
     state changes).  */
176
478
  if (lex['/'] == 0)
177
1
    no_pseudo_dot = NO_PSEUDO_DOT;
178
179
#ifdef TC_M68K
180
  scrub_m68k_mri = m68k_mri;
181
182
  if (! m68k_mri)
183
#endif
184
478
    {
185
478
      lex['"'] = LEX_IS_STRINGQUOTE;
186
187
478
#if ! defined (TC_HPPA)
188
478
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
189
478
#endif
190
191
#ifdef SINGLE_QUOTE_STRINGS
192
      lex['\''] = LEX_IS_STRINGQUOTE;
193
#endif
194
478
    }
195
196
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197
     in state 5 of do_scrub_chars must be changed.  */
198
199
  /* Note that these override the previous defaults, e.g. if ';' is a
200
     comment char, then it isn't a line separator.  */
201
202
478
#ifdef tc_symbol_chars
203
  /* This macro permits the processor to specify all characters which
204
     may appears in an operand.  This will prevent the scrubber from
205
     discarding meaningful whitespace in certain cases.  The i386
206
     backend uses this to support prefixes, which can confuse the
207
     scrubber as to whether it is parsing operands or opcodes.  */
208
2.86k
  for (p = tc_symbol_chars; *p; ++p)
209
2.39k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210
478
#endif
211
212
  /* The m68k backend wants to be able to change comment_chars.  */
213
#ifndef tc_comment_chars
214
#define tc_comment_chars comment_chars
215
#endif
216
956
  for (p = tc_comment_chars; *p; p++)
217
478
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
218
219
  /* While counter intuitive to have more special purpose line comment chars
220
     override more general purpose ordinary ones, logic in do_scrub_chars()
221
     depends on this ordering.   */
222
1.43k
  for (p = line_comment_chars; *p; p++)
223
956
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
224
225
478
#ifndef tc_line_separator_chars
226
478
#define tc_line_separator_chars line_separator_chars
227
478
#endif
228
956
  for (p = tc_line_separator_chars; *p; p++)
229
478
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
230
231
#ifdef tc_parallel_separator_chars
232
  /* This macro permits the processor to specify all characters which
233
     separate parallel insns on the same line.  */
234
  for (p = tc_parallel_separator_chars; *p; p++)
235
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236
#endif
237
238
  /* Only allow slash-star comments if slash is not in use.  Certain
239
     other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240
     FIXME: This isn't right.  We should always permit them.  */
241
478
  if (lex['/'] == 0)
242
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
243
244
#ifdef TC_M68K
245
  if (m68k_mri)
246
    {
247
      lex['\''] = LEX_IS_STRINGQUOTE;
248
      lex[';'] = LEX_IS_COMMENT_START;
249
      lex['*'] = LEX_IS_LINE_COMMENT_START;
250
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251
   then it can't be used in an expression.  */
252
      lex['!'] = LEX_IS_LINE_COMMENT_START;
253
    }
254
#endif
255
256
#ifdef TC_V850
257
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258
#endif
259
#ifdef DOUBLEBAR_PARALLEL
260
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261
#endif
262
263
#ifdef H_TICK_HEX
264
  if (enable_h_tick_hex)
265
    {
266
      lex['h'] = LEX_IS_H;
267
      lex['H'] = LEX_IS_H;
268
    }
269
#endif
270
478
}
271
272
/* Saved state of the scrubber.  */
273
static int state;
274
static int old_state;
275
static const char *out_string;
276
static char out_buf[20];
277
static int add_newlines;
278
static char *saved_input;
279
static size_t saved_input_len;
280
static char input_buffer[32 * 1024];
281
282
/* Data structure for saving the state of app across #include's.  Note that
283
   app is called asynchronously to the parsing of the .include's, so our
284
   state at the time .include is interpreted is completely unrelated.
285
   That's why we have to save it all.  */
286
287
struct app_save
288
{
289
  int          state;
290
  int          old_state;
291
  const char * out_string;
292
  char         out_buf[sizeof (out_buf)];
293
  int          add_newlines;
294
  char *       saved_input;
295
  size_t       saved_input_len;
296
  const char * end_state;
297
#ifdef TC_M68K
298
  int          scrub_m68k_mri;
299
  const char * mri_state;
300
  char         mri_last_ch;
301
#endif
302
#if defined TC_ARM && defined OBJ_ELF
303
  const char * symver_state;
304
#endif
305
  char         last_char;
306
};
307
308
char *
309
app_push (void)
310
1.44k
{
311
1.44k
  struct app_save *saved;
312
313
1.44k
  saved = XNEW (struct app_save);
314
1.44k
  saved->state = state;
315
1.44k
  saved->old_state = old_state;
316
1.44k
  saved->out_string = out_string;
317
1.44k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
318
1.44k
  saved->add_newlines = add_newlines;
319
1.44k
  if (saved_input == NULL)
320
1.44k
    saved->saved_input = NULL;
321
1
  else
322
1
    {
323
1
      saved->saved_input = XNEWVEC (char, saved_input_len);
324
1
      memcpy (saved->saved_input, saved_input, saved_input_len);
325
1
      saved->saved_input_len = saved_input_len;
326
1
    }
327
1.44k
  saved->end_state = end_state;
328
#ifdef TC_M68K
329
  saved->scrub_m68k_mri = scrub_m68k_mri;
330
  saved->mri_state = mri_state;
331
  saved->mri_last_ch = mri_last_ch;
332
#endif
333
#if defined TC_ARM && defined OBJ_ELF
334
  saved->symver_state = symver_state;
335
#endif
336
1.44k
  saved->last_char = last_char;
337
338
  /* do_scrub_begin() is not useful, just wastes time.  */
339
340
1.44k
  state = 0;
341
1.44k
  saved_input = NULL;
342
1.44k
  add_newlines = 0;
343
344
1.44k
  return (char *) saved;
345
1.44k
}
346
347
void
348
app_pop (char *arg)
349
1.44k
{
350
1.44k
  struct app_save *saved = (struct app_save *) arg;
351
352
  /* There is no do_scrub_end ().  */
353
1.44k
  state = saved->state;
354
1.44k
  old_state = saved->old_state;
355
1.44k
  out_string = saved->out_string;
356
1.44k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
357
1.44k
  add_newlines = saved->add_newlines;
358
1.44k
  if (saved->saved_input == NULL)
359
1.44k
    saved_input = NULL;
360
1
  else
361
1
    {
362
1
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
363
1
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
364
1
      saved_input = input_buffer;
365
1
      saved_input_len = saved->saved_input_len;
366
1
      free (saved->saved_input);
367
1
    }
368
1.44k
  end_state = saved->end_state;
369
#ifdef TC_M68K
370
  scrub_m68k_mri = saved->scrub_m68k_mri;
371
  mri_state = saved->mri_state;
372
  mri_last_ch = saved->mri_last_ch;
373
#endif
374
#if defined TC_ARM && defined OBJ_ELF
375
  symver_state = saved->symver_state;
376
#endif
377
1.44k
  last_char = saved->last_char;
378
379
1.44k
  free (arg);
380
1.44k
}
381
382
/* @@ This assumes that \n &c are the same on host and target.  This is not
383
   necessarily true.  */
384
385
static int
386
process_escape (int ch)
387
1.63M
{
388
1.63M
  switch (ch)
389
1.63M
    {
390
2
    case 'b':
391
2
      return '\b';
392
0
    case 'f':
393
0
      return '\f';
394
0
    case 'n':
395
0
      return '\n';
396
0
    case 'r':
397
0
      return '\r';
398
0
    case 't':
399
0
      return '\t';
400
0
    case '\'':
401
0
      return '\'';
402
0
    case '"':
403
0
      return '\"';
404
1.63M
    default:
405
1.63M
      return ch;
406
1.63M
    }
407
1.63M
}
408
409
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
410
static unsigned int multibyte_warn_count = 0;
411
412
bool
413
scan_for_multibyte_characters (const unsigned char *  start,
414
             const unsigned char *  end,
415
             bool                   warn)
416
0
{
417
0
  if (end <= start)
418
0
    return false;
419
420
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
421
0
    return false;
422
423
0
  bool found = false;
424
425
0
  while (start < end)
426
0
    {
427
0
      unsigned char c;
428
429
0
      if ((c = * start++) <= 0x7f)
430
0
  continue;
431
432
0
      if (!warn)
433
0
  return true;
434
435
0
      found = true;
436
437
0
      const char * filename;
438
0
      unsigned int lineno;
439
440
0
      filename = as_where (& lineno);
441
0
      if (filename == NULL)
442
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
443
0
      else if (lineno == 0)
444
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
445
0
      else
446
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
447
448
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
449
0
  {
450
0
    as_warn (_("further multibyte character warnings suppressed"));
451
0
    break;
452
0
  }
453
0
    }
454
455
0
  return found;
456
0
}
457
458
/* This function is called to process input characters.  The GET
459
   parameter is used to retrieve more input characters.  GET should
460
   set its parameter to point to a buffer, and return the length of
461
   the buffer; it should return 0 at end of file.  The scrubbed output
462
   characters are put into the buffer starting at TOSTART; the TOSTART
463
   buffer is TOLEN bytes in length.  The function returns the number
464
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
465
   end of file was seen.  This function is arranged as a state
466
   machine, and saves its state so that it may return at any point.
467
   This is the way the old code used to work.  */
468
469
size_t
470
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
471
    bool check_multibyte)
472
2.23k
{
473
2.23k
  char *to = tostart;
474
2.23k
  char *toend = tostart + tolen;
475
2.23k
  char *from;
476
2.23k
  char *fromend;
477
2.23k
  size_t fromlen;
478
2.23k
  int ch, ch2 = 0;
479
  /* Character that started the string we're working on.  */
480
2.23k
  static char quotechar;
481
482
  /*State 0: beginning of normal line
483
    1: After first whitespace on line (flush more white)
484
    2: After first non-white (opcode) on line (keep 1white)
485
    3: after second white on line (into operands) (flush white)
486
    4: after putting out a .linefile, put out digits
487
    5: parsing a string, then go to old-state
488
    6: putting out \ escape in a "d string.
489
    7: no longer used
490
    8: no longer used
491
    9: After seeing symbol char in state 3 (keep 1white after symchar)
492
   10: After seeing whitespace in state 9 (keep white before symchar)
493
   11: After seeing a symbol character in state 0 (eg a label definition)
494
   -1: output string in out_string and go to the state in old_state
495
   12: no longer used
496
#ifdef DOUBLEBAR_PARALLEL
497
   13: After seeing a vertical bar, looking for a second
498
       vertical bar as a parallel expression separator.
499
#endif
500
#ifdef TC_PREDICATE_START_CHAR
501
   14: After seeing a predicate start character at state 0, looking
502
       for a predicate end character as predicate.
503
   15: After seeing a predicate start character at state 1, looking
504
       for a predicate end character as predicate.
505
#endif
506
#ifdef TC_Z80
507
   16: After seeing an 'a' or an 'A' at the start of a symbol
508
   17: After seeing an 'f' or an 'F' in state 16
509
#endif
510
    */
511
512
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
513
     constructs like ``.loc 1 20''.  This was turning into ``.loc
514
     120''.  States 9 and 10 ensure that a space is never dropped in
515
     between characters which could appear in an identifier.  Ian
516
     Taylor, ian@cygnus.com.
517
518
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
519
     correctly on the PA (and any other target where colons are optional).
520
     Jeff Law, law@cs.utah.edu.
521
522
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
523
     get squashed into "cmp r1,r2||trap#1", with the all important space
524
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
525
526
  /* This macro gets the next input character.  */
527
528
2.23k
#define GET()             \
529
44.5M
  (from < fromend            \
530
44.5M
   ? * (unsigned char *) (from++)        \
531
44.5M
   : (saved_input = NULL,          \
532
4.04k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
533
4.04k
      from = input_buffer,          \
534
4.04k
      fromend = from + fromlen,         \
535
4.04k
      (fromlen == 0            \
536
4.04k
       ? EOF              \
537
4.04k
       : * (unsigned char *) (from++))))
538
539
  /* This macro pushes a character back on the input stream.  */
540
541
4.27M
#define UNGET(uch) (*--from = (uch))
542
543
  /* This macro puts a character into the output buffer.  If this
544
     character fills the output buffer, this macro jumps to the label
545
     TOFULL.  We use this rather ugly approach because we need to
546
     handle two different termination conditions: EOF on the input
547
     stream, and a full output buffer.  It would be simpler if we
548
     always read in the entire input stream before processing it, but
549
     I don't want to make such a significant change to the assembler's
550
     memory usage.  */
551
552
2.23k
#define PUT(pch)        \
553
40.3M
  do            \
554
40.3M
    {           \
555
40.3M
      *to++ = (pch);        \
556
40.3M
      if (to >= toend)       \
557
40.3M
  goto tofull;       \
558
40.3M
    }           \
559
40.3M
  while (0)
560
561
2.23k
  if (saved_input != NULL)
562
317
    {
563
317
      from = saved_input;
564
317
      fromend = from + saved_input_len;
565
317
    }
566
1.91k
  else
567
1.91k
    {
568
1.91k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
569
1.91k
      if (fromlen == 0)
570
486
  return 0;
571
1.43k
      from = input_buffer;
572
1.43k
      fromend = from + fromlen;
573
574
1.43k
      if (check_multibyte)
575
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
576
0
                (const unsigned char *) fromend,
577
0
                true /* Generate warnings.  */);
578
1.43k
    }
579
580
37.2M
  while (1)
581
37.2M
    {
582
      /* The cases in this switch end with continue, in order to
583
   branch back to the top of this while loop and generate the
584
   next output character in the appropriate state.  */
585
37.2M
      switch (state)
586
37.2M
  {
587
5.41M
  case -1:
588
5.41M
    ch = *out_string++;
589
5.41M
    if (*out_string == '\0')
590
3.34M
      {
591
3.34M
        state = old_state;
592
3.34M
        old_state = 3;
593
3.34M
      }
594
5.41M
    PUT (ch);
595
5.41M
    continue;
596
597
5.41M
  case 4:
598
407k
    ch = GET ();
599
407k
    if (ch == EOF)
600
0
      goto fromeof;
601
407k
    else if (ch >= '0' && ch <= '9')
602
370k
      PUT (ch);
603
36.1k
    else
604
36.1k
      {
605
36.1k
        while (ch != EOF && IS_WHITESPACE (ch))
606
0
    ch = GET ();
607
36.1k
        if (ch == '"')
608
35.6k
    {
609
35.6k
      quotechar = ch;
610
35.6k
      state = 5;
611
35.6k
      old_state = 3;
612
35.6k
      PUT (ch);
613
35.6k
    }
614
510
        else
615
510
    {
616
6.25k
      while (ch != EOF && ch != '\n')
617
5.74k
        ch = GET ();
618
510
      state = 0;
619
510
      PUT (ch);
620
510
    }
621
36.1k
      }
622
406k
    continue;
623
624
7.54M
  case 5:
625
    /* We are going to copy everything up to a quote character,
626
       with special handling for a backslash.  We try to
627
       optimize the copying in the simple case without using the
628
       GET and PUT macros.  */
629
7.54M
    {
630
7.54M
      char *s;
631
7.54M
      ptrdiff_t len;
632
633
33.4M
      for (s = from; s < fromend; s++)
634
33.4M
        {
635
33.4M
    ch = *s;
636
33.4M
    if (ch == '\\'
637
26.5M
        || ch == quotechar
638
26.1M
        || ch == '\n')
639
7.53M
      break;
640
33.4M
        }
641
7.54M
      len = s - from;
642
7.54M
      if (len > toend - to)
643
1
        len = toend - to;
644
7.54M
      if (len > 0)
645
4.30M
        {
646
4.30M
    memcpy (to, from, len);
647
4.30M
    to += len;
648
4.30M
    from += len;
649
4.30M
    if (to >= toend)
650
1
      goto tofull;
651
4.30M
        }
652
7.54M
    }
653
654
7.54M
    ch = GET ();
655
7.54M
    if (ch == EOF)
656
356
      {
657
        /* This buffer is here specifically so
658
     that the UNGET below will work.  */
659
356
        static char one_char_buf[1];
660
661
356
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
662
356
        state = old_state;
663
356
        from = fromend = one_char_buf + 1;
664
356
        fromlen = 1;
665
356
        UNGET ('\n');
666
356
        PUT (quotechar);
667
356
      }
668
7.54M
    else if (ch == quotechar)
669
362k
      {
670
362k
        state = old_state;
671
362k
        PUT (ch);
672
362k
      }
673
7.17M
    else if (TC_STRING_ESCAPES && ch == '\\')
674
6.85M
      {
675
6.85M
        state = 6;
676
6.85M
        PUT (ch);
677
6.85M
      }
678
327k
    else if (scrub_m68k_mri && ch == '\n')
679
0
      {
680
        /* Just quietly terminate the string.  This permits lines like
681
       bne  label loop if we haven't reach end yet.  */
682
0
        state = old_state;
683
0
        UNGET (ch);
684
0
        PUT ('\'');
685
0
      }
686
327k
    else
687
327k
      {
688
327k
        PUT (ch);
689
327k
      }
690
7.54M
    continue;
691
692
7.54M
  case 6:
693
6.85M
    state = 5;
694
6.85M
    ch = GET ();
695
6.85M
    switch (ch)
696
6.85M
      {
697
        /* Handle strings broken across lines, by turning '\n' into
698
     '\\' and 'n'.  */
699
1
      case '\n':
700
1
        UNGET ('n');
701
1
        add_newlines++;
702
1
        PUT ('\\');
703
1
        continue;
704
705
4
      case EOF:
706
4
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
707
4
        PUT (quotechar);
708
4
        continue;
709
710
        /* These two are used inside macros.  */
711
4
      case '@':
712
3
      case '+':
713
3
        break;
714
715
6
      case '"':
716
1.77M
      case '\\':
717
1.77M
      case 'b':
718
1.77M
      case 'f':
719
1.77M
      case 'n':
720
1.77M
      case 'r':
721
1.77M
      case 't':
722
1.77M
      case 'v':
723
1.77M
      case 'x':
724
1.77M
      case 'X':
725
1.77M
      case '0':
726
1.77M
      case '1':
727
1.77M
      case '2':
728
1.77M
      case '3':
729
1.77M
      case '4':
730
1.77M
      case '5':
731
1.77M
      case '6':
732
1.77M
      case '7':
733
1.77M
        break;
734
735
5.07M
      default:
736
#ifdef ONLY_STANDARD_ESCAPES
737
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
738
#endif
739
5.07M
        break;
740
6.85M
      }
741
6.85M
    PUT (ch);
742
6.85M
    continue;
743
744
#ifdef DOUBLEBAR_PARALLEL
745
  case 13:
746
    ch = GET ();
747
    if (ch != '|')
748
      abort ();
749
750
    /* Reset back to state 1 and pretend that we are parsing a
751
       line from just after the first white space.  */
752
    state = 1;
753
    PUT ('|');
754
    continue;
755
#endif
756
#ifdef TC_Z80
757
  case 16:
758
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
759
    ch = GET ();
760
    if (ch == 'f' || ch == 'F')
761
      {
762
        state = 17;
763
        PUT (ch);
764
      }
765
    else
766
      {
767
        if (ch != EOF)
768
    UNGET (ch);
769
        state = 9;
770
        break;
771
      }
772
    /* Fall through.  */
773
  case 17:
774
    /* We have seen "af" at the start of a symbol,
775
       a ' here is a part of that symbol.  */
776
    ch = GET ();
777
    state = 9;
778
    if (ch == '\'')
779
      /* Change to avoid warning about unclosed string.  */
780
      PUT ('`');
781
    else if (ch != EOF)
782
      UNGET (ch);
783
    break;
784
#endif
785
37.2M
  }
786
787
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
788
789
      /* flushchar: */
790
17.0M
      ch = GET ();
791
792
#ifdef TC_PREDICATE_START_CHAR
793
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
794
  {
795
    state += 14;
796
    PUT (ch);
797
    continue;
798
  }
799
      else if (state == 14 || state == 15)
800
  {
801
    if (ch == TC_PREDICATE_END_CHAR)
802
      {
803
        state -= 14;
804
        PUT (ch);
805
        ch = GET ();
806
      }
807
    else
808
      {
809
        PUT (ch);
810
        continue;
811
      }
812
  }
813
#endif
814
815
17.5M
    recycle:
816
817
      /* We need to watch out for .end directives: We should in particular not
818
   issue diagnostics for anything after an active one.  */
819
17.5M
      if (ch == EOF)
820
1.41k
  end_state = NULL;
821
17.5M
      else if (end_state == NULL)
822
17.1M
  {
823
17.1M
    if ((state == 0 || state == 1)
824
1.02M
        && (ch == '.'
825
689k
      || (no_pseudo_dot && ch == end_pseudo[0])))
826
339k
      end_state = end_pseudo + (ch != '.');
827
17.1M
  }
828
369k
      else if (ch != '\0'
829
369k
         && (*end_state == ch
830
       /* Avoid triggering on directives like .endif or .endr.  */
831
339k
       || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
832
29.2k
  {
833
29.2k
    if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
834
14
      goto end_end;
835
29.2k
    ++end_state;
836
29.2k
  }
837
339k
      else if (*end_state != '\0')
838
  /* We did not get the expected character, or we didn't
839
     get a valid terminating character after seeing the
840
     entire pseudo-op, so we must go back to the beginning.  */
841
339k
  end_state = NULL;
842
323
      else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
843
8
  {
844
22
  end_end:
845
    /* We've read the entire pseudo-op.  If this is the end of the line,
846
       bail out now by (ab)using the output-full path.  This allows the
847
       caller to process input up to here and terminate processing if this
848
       directive is actually active (not on the false branch of a
849
       conditional and not in a macro definition).  */
850
22
    end_state = NULL;
851
22
    state = 0;
852
22
    PUT (ch);
853
22
    goto tofull;
854
22
  }
855
856
#if defined TC_ARM && defined OBJ_ELF
857
      /* We need to watch out for .symver directives.  See the comment later
858
   in this function.  */
859
      if (ch == EOF)
860
  symver_state = NULL;
861
      else if (symver_state == NULL)
862
  {
863
    if ((state == 0 || state == 1)
864
        && strchr (tc_comment_chars, '@') != NULL
865
        && ch == symver_pseudo[0])
866
      symver_state = symver_pseudo + 1;
867
  }
868
      else
869
  {
870
    /* We advance to the next state if we find the right
871
       character.  */
872
    if (ch != '\0' && (*symver_state == ch))
873
      ++symver_state;
874
    else if (*symver_state != '\0')
875
      /* We did not get the expected character, or we didn't
876
         get a valid terminating character after seeing the
877
         entire pseudo-op, so we must go back to the beginning.  */
878
      symver_state = NULL;
879
    else
880
      {
881
        /* We've read the entire pseudo-op.  If this is the end
882
     of the line, go back to the beginning.  */
883
        if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
884
    symver_state = NULL;
885
      }
886
  }
887
#endif /* TC_ARM && OBJ_ELF */
888
889
#ifdef TC_M68K
890
      /* We want to have pseudo-ops which control whether we are in
891
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
892
   the scrubber, that means that we need a special purpose
893
   recognizer here.  */
894
      if (ch == EOF)
895
  mri_state = NULL;
896
      else if (mri_state == NULL)
897
  {
898
    if ((state == 0 || state == 1)
899
        && ch == mri_pseudo[0])
900
      mri_state = mri_pseudo + 1;
901
  }
902
      else
903
  {
904
    /* We advance to the next state if we find the right
905
       character, or if we need a space character and we get any
906
       whitespace character, or if we need a '0' and we get a
907
       '1' (this is so that we only need one state to handle
908
       ``.mri 0'' and ``.mri 1'').  */
909
    if (ch != '\0'
910
        && (*mri_state == ch
911
      || (*mri_state == ' '
912
          && IS_WHITESPACE (ch))
913
      || (*mri_state == '0'
914
          && ch == '1')))
915
      {
916
        mri_last_ch = ch;
917
        ++mri_state;
918
      }
919
    else if (*mri_state != '\0'
920
       || (!IS_WHITESPACE (ch)
921
           && !IS_LINE_SEPARATOR (ch)
922
           && !IS_NEWLINE (ch)))
923
      {
924
        /* We did not get the expected character, or we didn't
925
     get a valid terminating character after seeing the
926
     entire pseudo-op, so we must go back to the
927
     beginning.  */
928
        mri_state = NULL;
929
      }
930
    else
931
      {
932
        /* We've read the entire pseudo-op.  mri_last_ch is
933
     either '0' or '1' indicating whether to enter or
934
     leave MRI mode.  */
935
        do_scrub_begin (mri_last_ch == '1');
936
        mri_state = NULL;
937
938
        /* We continue handling the character as usual.  The
939
     main gas reader must also handle the .mri pseudo-op
940
     to control expression parsing and the like.  */
941
      }
942
  }
943
#endif
944
945
17.5M
      if (ch == EOF)
946
1.41k
  {
947
1.41k
    if (state != 0)
948
252
      {
949
252
        as_warn (_("end of file not at end of a line; newline inserted"));
950
252
        state = 0;
951
252
        PUT ('\n');
952
252
      }
953
1.41k
    goto fromeof;
954
1.41k
  }
955
956
17.5M
      switch (lex[ch])
957
17.5M
  {
958
862k
  case LEX_IS_WHITESPACE:
959
862k
    do
960
907k
      {
961
907k
        ch = GET ();
962
907k
      }
963
907k
    while (ch != EOF && IS_WHITESPACE (ch));
964
862k
    if (ch == EOF)
965
17
      goto fromeof;
966
967
862k
    if (state == 0)
968
272k
      {
969
        /* Preserve a single whitespace character at the
970
     beginning of a line.  */
971
272k
        state = 1;
972
272k
        UNGET (ch);
973
272k
        PUT (' ');
974
272k
        break;
975
272k
      }
976
977
#ifdef KEEP_WHITE_AROUND_COLON
978
    if (lex[ch] == LEX_IS_COLON)
979
      {
980
        /* Only keep this white if there's no white *after* the
981
     colon.  */
982
        ch2 = GET ();
983
        if (ch2 != EOF)
984
    UNGET (ch2);
985
        if (!IS_WHITESPACE (ch2))
986
    {
987
      state = 9;
988
      UNGET (ch);
989
      PUT (' ');
990
      break;
991
    }
992
      }
993
#endif
994
995
    /* Prune trailing whitespace.  */
996
590k
    if (IS_COMMENT (ch)
997
590k
        || (IS_LINE_COMMENT (ch)
998
239
            && (state < 1 || strchr (tc_comment_chars, ch)))
999
590k
        || IS_NEWLINE (ch)
1000
588k
        || IS_LINE_SEPARATOR (ch)
1001
587k
        || IS_PARALLEL_SEPARATOR (ch))
1002
2.83k
      {
1003
2.83k
        if (scrub_m68k_mri)
1004
0
    {
1005
      /* In MRI mode, we keep these spaces.  */
1006
0
      UNGET (ch);
1007
0
      PUT (' ');
1008
0
      break;
1009
0
    }
1010
2.83k
        goto recycle;
1011
2.83k
      }
1012
#ifdef DOUBLESLASH_LINE_COMMENTS
1013
    if (IS_TWOCHAR_COMMENT_1ST (ch))
1014
      {
1015
        ch2 = GET ();
1016
        if (ch2 != EOF)
1017
          UNGET (ch2);
1018
        if (ch2 == '/')
1019
    goto recycle;
1020
      }
1021
#endif
1022
1023
    /* If we're in state 2 or 11, we've seen a non-white
1024
       character followed by whitespace.  If the next character
1025
       is ':', this is whitespace after a label name which we
1026
       normally must ignore.  In MRI mode, though, spaces are
1027
       not permitted between the label and the colon.  */
1028
587k
    if ((state == 2 || state == 11)
1029
298k
        && lex[ch] == LEX_IS_COLON
1030
0
        && ! scrub_m68k_mri)
1031
105
      {
1032
105
        state = 1;
1033
105
        PUT (ch);
1034
105
        break;
1035
105
      }
1036
1037
587k
    switch (state)
1038
587k
      {
1039
8
      case 1:
1040
        /* We can arrive here if we leave a leading whitespace
1041
     character at the beginning of a line.  */
1042
8
        goto recycle;
1043
197k
      case 2:
1044
197k
        state = 3;
1045
197k
        if (to + 1 < toend)
1046
197k
    {
1047
      /* Optimize common case by skipping UNGET/GET.  */
1048
197k
      PUT (' '); /* Sp after opco */
1049
197k
      goto recycle;
1050
197k
    }
1051
133
        UNGET (ch);
1052
133
        PUT (' ');
1053
0
        break;
1054
2.95k
      case 3:
1055
2.95k
#ifndef TC_KEEP_OPERAND_SPACES
1056
        /* For TI C6X, we keep these spaces as they may separate
1057
     functional unit specifiers from operands.  */
1058
2.95k
        if (scrub_m68k_mri)
1059
0
#endif
1060
0
    {
1061
      /* In MRI mode, we keep these spaces.  */
1062
0
      UNGET (ch);
1063
0
      PUT (' ');
1064
0
      break;
1065
0
    }
1066
2.95k
        goto recycle; /* Sp in operands */
1067
285k
      case 9:
1068
285k
      case 10:
1069
285k
#ifndef TC_KEEP_OPERAND_SPACES
1070
285k
        if (scrub_m68k_mri)
1071
0
#endif
1072
0
    {
1073
      /* In MRI mode, we keep these spaces.  */
1074
0
      state = 3;
1075
0
      UNGET (ch);
1076
0
      PUT (' ');
1077
0
      break;
1078
0
    }
1079
285k
        state = 10; /* Sp after symbol char */
1080
285k
        goto recycle;
1081
101k
      case 11:
1082
101k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1083
0
    state = 1;
1084
101k
        else
1085
101k
    {
1086
      /* We know that ch is not ':', since we tested that
1087
         case above.  Therefore this is not a label, so it
1088
         must be the opcode, and we've just seen the
1089
         whitespace after it.  */
1090
101k
      state = 3;
1091
101k
    }
1092
101k
        UNGET (ch);
1093
101k
        PUT (' '); /* Sp after label definition.  */
1094
101k
        break;
1095
101k
      default:
1096
0
        BAD_CASE (state);
1097
587k
      }
1098
101k
    break;
1099
1100
101k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1101
0
    ch2 = GET ();
1102
0
    if (ch2 == '*')
1103
0
      {
1104
7
  twochar_comment:
1105
7
        for (;;)
1106
29
    {
1107
29
      do
1108
1.61k
        {
1109
1.61k
          ch2 = GET ();
1110
1.61k
          if (ch2 != EOF && IS_NEWLINE (ch2))
1111
245
      add_newlines++;
1112
1.61k
        }
1113
1.61k
      while (ch2 != EOF && ch2 != '*');
1114
1115
81
      while (ch2 == '*')
1116
52
        ch2 = GET ();
1117
1118
29
      if (ch2 == EOF || ch2 == '/')
1119
7
        break;
1120
1121
      /* This UNGET will ensure that we count newlines
1122
         correctly.  */
1123
22
      UNGET (ch2);
1124
22
    }
1125
1126
7
        if (ch2 == EOF)
1127
7
    as_warn (_("end of file in multiline comment"));
1128
1129
7
        ch = ' ';
1130
7
        goto recycle;
1131
0
      }
1132
#ifdef DOUBLESLASH_LINE_COMMENTS
1133
    else if (ch2 == '/')
1134
      {
1135
        do
1136
    {
1137
      ch = GET ();
1138
    }
1139
        while (ch != EOF && !IS_NEWLINE (ch));
1140
        if (ch == EOF)
1141
    as_warn ("end of file in comment; newline inserted");
1142
        state = 0;
1143
        PUT ('\n');
1144
        break;
1145
      }
1146
#endif
1147
0
    else
1148
0
      {
1149
0
        if (ch2 != EOF)
1150
0
    UNGET (ch2);
1151
0
        if (state == 9 || state == 10)
1152
0
    state = 3;
1153
0
        PUT (ch);
1154
0
      }
1155
0
    break;
1156
1157
326k
  case LEX_IS_STRINGQUOTE:
1158
326k
    quotechar = ch;
1159
326k
    if (state == 10)
1160
111k
      {
1161
        /* Preserve the whitespace in foo "bar".  */
1162
111k
        UNGET (ch);
1163
111k
        state = 3;
1164
111k
        PUT (' ');
1165
1166
        /* PUT didn't jump out.  We could just break, but we
1167
     know what will happen, so optimize a bit.  */
1168
111k
        ch = GET ();
1169
111k
        old_state = 9;
1170
111k
      }
1171
214k
    else if (state == 3)
1172
41.6k
      old_state = 9;
1173
173k
    else if (state == 0)
1174
1.15k
      old_state = 11; /* Now seeing label definition.  */
1175
172k
    else
1176
172k
      old_state = state;
1177
326k
    state = 5;
1178
326k
    PUT (ch);
1179
326k
    break;
1180
1181
3.34M
  case LEX_IS_ONECHAR_QUOTE:
1182
#ifdef H_TICK_HEX
1183
    if (state == 9 && enable_h_tick_hex)
1184
      {
1185
        char c;
1186
1187
        c = GET ();
1188
        as_warn ("'%c found after symbol", c);
1189
        UNGET (c);
1190
      }
1191
#endif
1192
3.34M
    if (state == 10)
1193
5
      {
1194
        /* Preserve the whitespace in foo 'b'.  */
1195
5
        UNGET (ch);
1196
5
        state = 3;
1197
5
        PUT (' ');
1198
5
        break;
1199
5
      }
1200
3.34M
    ch = GET ();
1201
3.34M
    if (ch == EOF)
1202
0
      {
1203
0
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1204
0
        ch = 0;
1205
0
      }
1206
3.34M
    if (ch == '\\')
1207
1.63M
      {
1208
1.63M
        ch = GET ();
1209
1.63M
        if (ch == EOF)
1210
0
    {
1211
0
      as_warn (_("end of file in escape character"));
1212
0
      ch = '\\';
1213
0
    }
1214
1.63M
        else
1215
1.63M
    ch = process_escape (ch);
1216
1.63M
      }
1217
3.34M
    sprintf (out_buf, "%d", ch & 0xff);
1218
1219
    /* None of these 'x constants for us.  We want 'x'.  */
1220
3.34M
    if ((ch = GET ()) != '\'')
1221
3.31M
      {
1222
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1223
        as_warn (_("missing close quote; (assumed)"));
1224
#else
1225
3.31M
        if (ch != EOF)
1226
3.31M
    UNGET (ch);
1227
3.31M
#endif
1228
3.31M
      }
1229
3.34M
    if (strlen (out_buf) == 1)
1230
32.8k
      {
1231
32.8k
        PUT (out_buf[0]);
1232
32.8k
        break;
1233
32.8k
      }
1234
3.31M
    if (state == 9)
1235
2.88M
      old_state = 3;
1236
427k
    else
1237
427k
      old_state = state;
1238
3.31M
    state = -1;
1239
3.31M
    out_string = out_buf;
1240
3.31M
    PUT (*out_string++);
1241
3.31M
    break;
1242
1243
3.31M
  case LEX_IS_COLON:
1244
#ifdef KEEP_WHITE_AROUND_COLON
1245
    state = 9;
1246
#else
1247
34.6k
    if (state == 9 || state == 10)
1248
11.0k
      state = 3;
1249
23.5k
    else if (state != 3)
1250
23.2k
      state = 1;
1251
34.6k
#endif
1252
34.6k
    PUT (ch);
1253
34.6k
    break;
1254
1255
607k
  case LEX_IS_NEWLINE:
1256
    /* Roll out a bunch of newlines from inside comments, etc.  */
1257
607k
    if (add_newlines)
1258
2
      {
1259
2
        --add_newlines;
1260
2
        UNGET (ch);
1261
2
      }
1262
    /* Fall through.  */
1263
1264
725k
  case LEX_IS_LINE_SEPARATOR:
1265
725k
    state = 0;
1266
725k
    PUT (ch);
1267
725k
    break;
1268
1269
725k
  case LEX_IS_PARALLEL_SEPARATOR:
1270
0
    state = 1;
1271
0
    PUT (ch);
1272
0
    break;
1273
1274
#ifdef TC_V850
1275
  case LEX_IS_DOUBLEDASH_1ST:
1276
    ch2 = GET ();
1277
    if (ch2 != '-')
1278
      {
1279
        if (ch2 != EOF)
1280
    UNGET (ch2);
1281
        goto de_fault;
1282
      }
1283
    /* Read and skip to end of line.  */
1284
    do
1285
      {
1286
        ch = GET ();
1287
      }
1288
    while (ch != EOF && ch != '\n');
1289
1290
    if (ch == EOF)
1291
      as_warn (_("end of file in comment; newline inserted"));
1292
1293
    state = 0;
1294
    PUT ('\n');
1295
    break;
1296
#endif
1297
#ifdef DOUBLEBAR_PARALLEL
1298
  case LEX_IS_DOUBLEBAR_1ST:
1299
    ch2 = GET ();
1300
    if (ch2 != EOF)
1301
      UNGET (ch2);
1302
    if (ch2 != '|')
1303
      goto de_fault;
1304
1305
    /* Handle '||' in two states as invoking PUT twice might
1306
       result in the first one jumping out of this loop.  We'd
1307
       then lose track of the state and one '|' char.  */
1308
    state = 13;
1309
    PUT ('|');
1310
    break;
1311
#endif
1312
341k
  case LEX_IS_LINE_COMMENT_START:
1313
    /* FIXME-someday: The two character comment stuff was badly
1314
       thought out.  On i386, we want '/' as line comment start
1315
       AND we want C style comments.  hence this hack.  The
1316
       whole lexical process should be reworked.  xoxorich.  */
1317
341k
    if (ch == '/')
1318
298k
      {
1319
298k
        ch2 = GET ();
1320
298k
        if (ch2 == '*')
1321
7
    goto twochar_comment;
1322
298k
        if (ch2 != EOF)
1323
298k
    UNGET (ch2);
1324
298k
      }
1325
1326
341k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1327
42.2k
      {
1328
42.2k
        int startch;
1329
1330
42.2k
        startch = ch;
1331
1332
42.2k
        do
1333
42.2k
    {
1334
42.2k
      ch = GET ();
1335
42.2k
    }
1336
42.2k
        while (ch != EOF && IS_WHITESPACE (ch));
1337
1338
42.2k
        if (ch == EOF)
1339
0
    {
1340
0
      as_warn (_("end of file in comment; newline inserted"));
1341
0
      PUT ('\n');
1342
0
      break;
1343
0
    }
1344
1345
42.2k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1346
6.09k
    {
1347
      /* Not a cpp line.  */
1348
185k
      while (ch != EOF && !IS_NEWLINE (ch))
1349
179k
        ch = GET ();
1350
6.09k
      if (ch == EOF)
1351
4
        {
1352
4
          as_warn (_("end of file in comment; newline inserted"));
1353
4
          PUT ('\n');
1354
4
        }
1355
6.09k
      else /* IS_NEWLINE (ch) */
1356
6.09k
        {
1357
          /* To process non-zero add_newlines.  */
1358
6.09k
          UNGET (ch);
1359
6.09k
        }
1360
6.09k
      state = 0;
1361
6.09k
      break;
1362
6.09k
    }
1363
        /* Looks like `# 123 "filename"' from cpp.  */
1364
36.1k
        UNGET (ch);
1365
36.1k
        old_state = 4;
1366
36.1k
        state = -1;
1367
36.1k
        if (scrub_m68k_mri)
1368
0
    out_string = "\tlinefile ";
1369
36.1k
        else
1370
36.1k
    out_string = "\t.linefile ";
1371
36.1k
        PUT (*out_string++);
1372
36.1k
        break;
1373
36.1k
      }
1374
1375
#ifdef TC_D10V
1376
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1377
       Trap is the only short insn that has a first operand that is
1378
       neither register nor label.
1379
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1380
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1381
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1382
       only character in line_comment_chars for d10v, hence we
1383
       can recognize it as such.  */
1384
    /* An alternative approach would be to reset the state to 1 when
1385
       we see '||', '<'- or '->', but that seems to be overkill.  */
1386
    if (state == 10)
1387
      PUT (' ');
1388
#endif
1389
    /* We have a line comment character which is not at the
1390
       start of a line.  If this is also a normal comment
1391
       character, fall through.  Otherwise treat it as a default
1392
       character.  */
1393
299k
    if (strchr (tc_comment_chars, ch) == NULL)
1394
298k
      goto de_fault;
1395
854
    if (scrub_m68k_mri
1396
0
        && (ch == '!' || ch == '*' || ch == '#'))
1397
0
      goto de_fault;
1398
    /* Fall through.  */
1399
854
  case LEX_IS_COMMENT_START:
1400
#if defined TC_ARM && defined OBJ_ELF
1401
    /* On the ARM, `@' is the comment character.
1402
       Unfortunately this is also a special character in ELF .symver
1403
       directives (and .type, though we deal with those another way).
1404
       So we check if this line is such a directive, and treat
1405
       the character as default if so.  This is a hack.  */
1406
    if ((symver_state != NULL) && (*symver_state == 0))
1407
      goto de_fault;
1408
#endif
1409
1410
    /* Care is needed not to damage occurrences of \<comment-char>
1411
       by stripping the <comment-char> onwards.  Yuck.  */
1412
854
    if ((to > tostart ? to[-1] : last_char) == '\\')
1413
      /* Do not treat the <comment-char> as a start-of-comment.  */
1414
12
      goto de_fault;
1415
1416
#ifdef WARN_COMMENTS
1417
    if (!found_comment)
1418
      found_comment_file = as_where (&found_comment);
1419
#endif
1420
842
    do
1421
21.9k
      {
1422
21.9k
        ch = GET ();
1423
21.9k
      }
1424
21.9k
    while (ch != EOF && !IS_NEWLINE (ch));
1425
842
    if (ch == EOF)
1426
9
      as_warn (_("end of file in comment; newline inserted"));
1427
842
    state = 0;
1428
842
    PUT ('\n');
1429
842
    break;
1430
1431
#ifdef H_TICK_HEX
1432
  case LEX_IS_H:
1433
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1434
       the H' with 0x to make them gas-style hex characters.  */
1435
    if (enable_h_tick_hex)
1436
      {
1437
        char quot;
1438
1439
        quot = GET ();
1440
        if (quot == '\'')
1441
    {
1442
      UNGET ('x');
1443
      ch = '0';
1444
    }
1445
        else
1446
    UNGET (quot);
1447
      }
1448
#endif
1449
    /* Fall through.  */
1450
1451
6.67M
  case LEX_IS_SYMBOL_COMPONENT:
1452
6.67M
    if (state == 10)
1453
139k
      {
1454
        /* This is a symbol character following another symbol
1455
     character, with whitespace in between.  We skipped
1456
     the whitespace earlier, so output it now.  */
1457
139k
        UNGET (ch);
1458
139k
        state = 3;
1459
139k
        PUT (' ');
1460
139k
        break;
1461
139k
      }
1462
1463
#ifdef TC_Z80
1464
    /* "af'" is a symbol containing '\''.  */
1465
    if (state == 3 && (ch == 'a' || ch == 'A'))
1466
      {
1467
        state = 16;
1468
        PUT (ch);
1469
        ch = GET ();
1470
        if (ch == 'f' || ch == 'F')
1471
    {
1472
      state = 17;
1473
      PUT (ch);
1474
      break;
1475
    }
1476
        else
1477
    {
1478
      state = 9;
1479
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1480
        {
1481
          if (ch != EOF)
1482
      UNGET (ch);
1483
          break;
1484
        }
1485
    }
1486
      }
1487
#endif
1488
6.53M
    if (state == 3)
1489
3.52M
      state = 9;
1490
1491
    /* This is a common case.  Quickly copy CH and all the
1492
       following symbol component or normal characters.  */
1493
6.53M
    if (to + 1 < toend
1494
#ifdef TC_M68K
1495
        && mri_state == NULL
1496
#endif
1497
#if defined TC_ARM && defined OBJ_ELF
1498
        && symver_state == NULL
1499
#endif
1500
6.53M
        && end_state == NULL)
1501
6.16M
      {
1502
6.16M
        char *s;
1503
6.16M
        ptrdiff_t len;
1504
1505
29.0M
        for (s = from; s < fromend; s++)
1506
29.0M
    {
1507
29.0M
      int type;
1508
1509
29.0M
      ch2 = *(unsigned char *) s;
1510
29.0M
      type = lex[ch2];
1511
29.0M
      if (type != 0
1512
26.4M
          && type != LEX_IS_SYMBOL_COMPONENT)
1513
6.16M
        break;
1514
29.0M
    }
1515
1516
6.16M
        if (s > from)
1517
    /* Handle the last character normally, for
1518
       simplicity.  */
1519
4.30M
    --s;
1520
1521
6.16M
        len = s - from;
1522
1523
6.16M
        if (len > (toend - to) - 1)
1524
12
    len = (toend - to) - 1;
1525
1526
6.16M
        if (len > 0)
1527
2.77M
    {
1528
2.77M
      PUT (ch);
1529
2.77M
      memcpy (to, from, len);
1530
2.77M
      to += len;
1531
2.77M
      from += len;
1532
2.77M
      if (to >= toend)
1533
12
        goto tofull;
1534
2.77M
      ch = GET ();
1535
2.77M
    }
1536
6.16M
      }
1537
1538
    /* Fall through.  */
1539
11.7M
  default:
1540
12.0M
  de_fault:
1541
    /* Some relatively `normal' character.  */
1542
12.0M
    if (state == 0)
1543
339k
      {
1544
339k
        state = 11; /* Now seeing label definition.  */
1545
339k
      }
1546
11.7M
    else if (state == 1)
1547
286k
      {
1548
286k
        state = 2;  /* Ditto.  */
1549
286k
      }
1550
11.4M
    else if (state == 9)
1551
5.05M
      {
1552
5.05M
        if (!IS_SYMBOL_COMPONENT (ch))
1553
186k
    state = 3;
1554
5.05M
      }
1555
6.36M
    else if (state == 10)
1556
34.3k
      {
1557
34.3k
        if (ch == '\\')
1558
43
    {
1559
      /* Special handling for backslash: a backslash may
1560
         be the beginning of a formal parameter (of a
1561
         macro) following another symbol character, with
1562
         whitespace in between.  If that is the case, we
1563
         output a space before the parameter.  Strictly
1564
         speaking, correct handling depends upon what the
1565
         macro parameter expands into; if the parameter
1566
         expands into something which does not start with
1567
         an operand character, then we don't want to keep
1568
         the space.  We don't have enough information to
1569
         make the right choice, so here we are making the
1570
         choice which is more likely to be correct.  */
1571
43
      if (to + 1 >= toend)
1572
0
        {
1573
          /* If we're near the end of the buffer, save the
1574
             character for the next time round.  Otherwise
1575
             we'll lose our state.  */
1576
0
          UNGET (ch);
1577
0
          goto tofull;
1578
0
        }
1579
43
      *to++ = ' ';
1580
43
    }
1581
1582
34.3k
        state = 3;
1583
34.3k
      }
1584
12.0M
    PUT (ch);
1585
12.0M
    break;
1586
17.5M
  }
1587
17.5M
    }
1588
1589
  /*NOTREACHED*/
1590
1591
1.42k
 fromeof:
1592
  /* We have reached the end of the input.  */
1593
1.42k
  if (to > tostart)
1594
1.42k
    last_char = to[-1];
1595
1.42k
  return to - tostart;
1596
1597
318
 tofull:
1598
  /* The output buffer is full.  Save any input we have not yet
1599
     processed.  */
1600
318
  if (fromend > from)
1601
317
    {
1602
317
      saved_input = from;
1603
317
      saved_input_len = fromend - from;
1604
317
    }
1605
1
  else
1606
1
    saved_input = NULL;
1607
1608
318
  if (to > tostart)
1609
318
    last_char = to[-1];
1610
318
  return to - tostart;
1611
1.74k
}
1612
1613
/* Return amount of pending input.  */
1614
1615
size_t
1616
do_scrub_pending (void)
1617
2.55k
{
1618
2.55k
  size_t len = 0;
1619
2.55k
  if (saved_input)
1620
306
    len += saved_input_len;
1621
2.55k
  if (state == -1)
1622
2
    len += strlen (out_string);
1623
2.55k
  return len;
1624
2.55k
}