Coverage Report

Created: 2026-03-10 08:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/binutils-gdb/gas/app.c
Line
Count
Source
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2026 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
static const char *mri_state;
49
static char mri_last_ch;
50
#else
51
80.4k
#define scrub_m68k_mri 0
52
#endif
53
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
61
/* The pseudo-op (without leading dot) at which we want to (perhaps just
62
   temporarily) stop processing.  See the comments in do_scrub_chars().  */
63
static const char   end_pseudo[] = "end ";
64
static const char * end_state;
65
66
/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67
   active.  */
68
static bool no_pseudo_dot;
69
70
static char last_char;
71
72
1.29M
#define LEX_IS_SYMBOL_COMPONENT   1
73
157k
#define LEX_IS_WHITESPACE   2
74
123k
#define LEX_IS_LINE_SEPARATOR   3
75
61.0k
#define LEX_IS_COMMENT_START    4
76
65.2k
#define LEX_IS_LINE_COMMENT_START 5
77
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
78
23.4k
#define LEX_IS_STRINGQUOTE    8
79
98.3k
#define LEX_IS_COLON      9
80
136k
#define LEX_IS_NEWLINE      10
81
1.05k
#define LEX_IS_ONECHAR_QUOTE    11
82
#ifdef TC_V850
83
#define LEX_IS_DOUBLEDASH_1ST   12
84
#endif
85
#ifdef DOUBLEBAR_PARALLEL
86
#define LEX_IS_DOUBLEBAR_1ST    13
87
#endif
88
58.9k
#define LEX_IS_PARALLEL_SEPARATOR 14
89
#ifdef H_TICK_HEX
90
#define LEX_IS_H      15
91
#endif
92
69.8k
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93
81.8k
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
94
120k
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
95
58.9k
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96
120k
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
97
120k
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
98
#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99
147k
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
100
101
static char lex[256] = {
102
  [' ']  = LEX_IS_WHITESPACE,
103
  ['\t'] = LEX_IS_WHITESPACE,
104
#ifdef CR_EOL
105
  ['\r'] = LEX_IS_LINE_SEPARATOR,
106
#else
107
  ['\r'] = LEX_IS_WHITESPACE,
108
#endif
109
  ['\n'] = LEX_IS_NEWLINE,
110
  [':'] = LEX_IS_COLON,
111
  ['$'] = LEX_IS_SYMBOL_COMPONENT,
112
  ['.'] = LEX_IS_SYMBOL_COMPONENT,
113
  ['_'] = LEX_IS_SYMBOL_COMPONENT,
114
  ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115
  ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116
  ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117
  ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118
  ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119
  ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120
  ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121
  ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122
  ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123
  ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124
  ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125
  ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126
  ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127
  ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128
  ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129
  ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130
  ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131
  ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132
  ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133
  ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134
  ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135
  ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136
  ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137
  ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138
  ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139
  ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140
  ['0'] = LEX_IS_SYMBOL_COMPONENT,
141
  ['1'] = LEX_IS_SYMBOL_COMPONENT,
142
  ['2'] = LEX_IS_SYMBOL_COMPONENT,
143
  ['3'] = LEX_IS_SYMBOL_COMPONENT,
144
  ['4'] = LEX_IS_SYMBOL_COMPONENT,
145
  ['5'] = LEX_IS_SYMBOL_COMPONENT,
146
  ['6'] = LEX_IS_SYMBOL_COMPONENT,
147
  ['7'] = LEX_IS_SYMBOL_COMPONENT,
148
  ['8'] = LEX_IS_SYMBOL_COMPONENT,
149
  ['9'] = LEX_IS_SYMBOL_COMPONENT,
150
#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151
     [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152
#define INIT4(n)    INIT2 (n),  INIT2 ((n) +  2)
153
#define INIT8(n)    INIT4 (n),  INIT4 ((n) +  4)
154
#define INIT16(n)   INIT8 (n),  INIT8 ((n) +  8)
155
#define INIT32(n)  INIT16 (n), INIT16 ((n) + 16)
156
#define INIT64(n)  INIT32 (n), INIT32 ((n) + 32)
157
#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158
  INIT128 (128),
159
#undef INIT128
160
#undef INIT64
161
#undef INIT32
162
#undef INIT16
163
#undef INIT8
164
#undef INIT4
165
#undef INIT2
166
};
167
168
void
169
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
170
207
{
171
207
  const char *p;
172
173
  /* Latch this once at start.  xtensa uses a hook function, yet context isn't
174
     meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175
     state changes).  */
176
207
  if (lex['/'] == 0)
177
1
    no_pseudo_dot = NO_PSEUDO_DOT;
178
179
#ifdef TC_M68K
180
  scrub_m68k_mri = m68k_mri;
181
182
  if (! m68k_mri)
183
#endif
184
207
    {
185
207
      lex['"'] = LEX_IS_STRINGQUOTE;
186
187
207
#if ! defined (TC_HPPA)
188
207
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
189
207
#endif
190
191
#ifdef SINGLE_QUOTE_STRINGS
192
      lex['\''] = LEX_IS_STRINGQUOTE;
193
#endif
194
207
    }
195
196
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197
     in state 5 of do_scrub_chars must be changed.  */
198
199
  /* Note that these override the previous defaults, e.g. if ';' is a
200
     comment char, then it isn't a line separator.  */
201
202
207
#ifdef tc_symbol_chars
203
  /* This macro permits the processor to specify all characters which
204
     may appears in an operand.  This will prevent the scrubber from
205
     discarding meaningful whitespace in certain cases.  The i386
206
     backend uses this to support prefixes, which can confuse the
207
     scrubber as to whether it is parsing operands or opcodes.  */
208
1.24k
  for (p = tc_symbol_chars; *p; ++p)
209
1.03k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210
207
#endif
211
212
  /* The m68k backend wants to be able to change comment_chars.  */
213
#ifndef tc_comment_chars
214
#define tc_comment_chars comment_chars
215
#endif
216
414
  for (p = tc_comment_chars; *p; p++)
217
207
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
218
219
  /* While counter intuitive to have more special purpose line comment chars
220
     override more general purpose ordinary ones, logic in do_scrub_chars()
221
     depends on this ordering.   */
222
621
  for (p = line_comment_chars; *p; p++)
223
414
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
224
225
207
#ifndef tc_line_separator_chars
226
207
#define tc_line_separator_chars line_separator_chars
227
207
#endif
228
414
  for (p = tc_line_separator_chars; *p; p++)
229
207
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
230
231
#ifdef tc_parallel_separator_chars
232
  /* This macro permits the processor to specify all characters which
233
     separate parallel insns on the same line.  */
234
  for (p = tc_parallel_separator_chars; *p; p++)
235
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236
#endif
237
238
  /* Only allow slash-star comments if slash is not in use.  Certain
239
     other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240
     FIXME: This isn't right.  We should always permit them.  */
241
207
  if (lex['/'] == 0)
242
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
243
244
#ifdef TC_M68K
245
  if (m68k_mri)
246
    {
247
      lex['\''] = LEX_IS_STRINGQUOTE;
248
      lex[';'] = LEX_IS_COMMENT_START;
249
      lex['*'] = LEX_IS_LINE_COMMENT_START;
250
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251
   then it can't be used in an expression.  */
252
      lex['!'] = LEX_IS_LINE_COMMENT_START;
253
    }
254
#endif
255
256
#ifdef TC_V850
257
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258
#endif
259
#ifdef DOUBLEBAR_PARALLEL
260
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261
#endif
262
263
#ifdef H_TICK_HEX
264
  if (enable_h_tick_hex)
265
    {
266
      lex['h'] = LEX_IS_H;
267
      lex['H'] = LEX_IS_H;
268
    }
269
#endif
270
207
}
271
272
/* Saved state of the scrubber.  */
273
static int state;
274
static int old_state;
275
static const char *out_string;
276
static char out_buf[20];
277
static int add_newlines;
278
static char *saved_input;
279
static size_t saved_input_len;
280
static char input_buffer[32 * 1024];
281
282
/* Data structure for saving the state of app across #include's.  Note that
283
   app is called asynchronously to the parsing of the .include's, so our
284
   state at the time .include is interpreted is completely unrelated.
285
   That's why we have to save it all.  */
286
287
struct app_save
288
{
289
  int          state;
290
  int          old_state;
291
  const char * out_string;
292
  char         out_buf[sizeof (out_buf)];
293
  int          add_newlines;
294
  char *       saved_input;
295
  size_t       saved_input_len;
296
  const char * end_state;
297
#ifdef TC_M68K
298
  int          scrub_m68k_mri;
299
  const char * mri_state;
300
  char         mri_last_ch;
301
#endif
302
#if defined TC_ARM && defined OBJ_ELF
303
  const char * symver_state;
304
#endif
305
  char         last_char;
306
};
307
308
char *
309
app_push (void)
310
113
{
311
113
  struct app_save *saved;
312
313
113
  saved = XNEW (struct app_save);
314
113
  saved->state = state;
315
113
  saved->old_state = old_state;
316
113
  saved->out_string = out_string;
317
113
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
318
113
  saved->add_newlines = add_newlines;
319
113
  if (saved_input == NULL)
320
113
    saved->saved_input = NULL;
321
0
  else
322
0
    {
323
0
      saved->saved_input = XNEWVEC (char, saved_input_len);
324
0
      memcpy (saved->saved_input, saved_input, saved_input_len);
325
0
      saved->saved_input_len = saved_input_len;
326
0
    }
327
113
  saved->end_state = end_state;
328
#ifdef TC_M68K
329
  saved->scrub_m68k_mri = scrub_m68k_mri;
330
  saved->mri_state = mri_state;
331
  saved->mri_last_ch = mri_last_ch;
332
#endif
333
#if defined TC_ARM && defined OBJ_ELF
334
  saved->symver_state = symver_state;
335
#endif
336
113
  saved->last_char = last_char;
337
338
  /* do_scrub_begin() is not useful, just wastes time.  */
339
340
113
  state = 0;
341
113
  saved_input = NULL;
342
113
  add_newlines = 0;
343
344
113
  return (char *) saved;
345
113
}
346
347
void
348
app_pop (char *arg)
349
113
{
350
113
  struct app_save *saved = (struct app_save *) arg;
351
352
  /* There is no do_scrub_end ().  */
353
113
  state = saved->state;
354
113
  old_state = saved->old_state;
355
113
  out_string = saved->out_string;
356
113
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
357
113
  add_newlines = saved->add_newlines;
358
113
  if (saved->saved_input == NULL)
359
113
    saved_input = NULL;
360
0
  else
361
0
    {
362
0
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
363
0
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
364
0
      saved_input = input_buffer;
365
0
      saved_input_len = saved->saved_input_len;
366
0
      free (saved->saved_input);
367
0
    }
368
113
  end_state = saved->end_state;
369
#ifdef TC_M68K
370
  scrub_m68k_mri = saved->scrub_m68k_mri;
371
  mri_state = saved->mri_state;
372
  mri_last_ch = saved->mri_last_ch;
373
#endif
374
#if defined TC_ARM && defined OBJ_ELF
375
  symver_state = saved->symver_state;
376
#endif
377
113
  last_char = saved->last_char;
378
379
113
  free (arg);
380
113
}
381
382
/* @@ This assumes that \n &c are the same on host and target.  This is not
383
   necessarily true.  */
384
385
static int
386
process_escape (int ch)
387
2
{
388
2
  switch (ch)
389
2
    {
390
0
    case 'b':
391
0
      return '\b';
392
0
    case 'f':
393
0
      return '\f';
394
2
    case 'n':
395
2
      return '\n';
396
0
    case 'r':
397
0
      return '\r';
398
0
    case 't':
399
0
      return '\t';
400
0
    case '\'':
401
0
      return '\'';
402
0
    case '"':
403
0
      return '\"';
404
0
    default:
405
0
      return ch;
406
2
    }
407
2
}
408
409
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
410
static unsigned int multibyte_warn_count = 0;
411
412
bool
413
scan_for_multibyte_characters (const unsigned char *  start,
414
             const unsigned char *  end,
415
             bool                   warn)
416
0
{
417
0
  if (end <= start)
418
0
    return false;
419
420
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
421
0
    return false;
422
423
0
  bool found = false;
424
425
0
  while (start < end)
426
0
    {
427
0
      unsigned char c;
428
429
0
      if ((c = * start++) <= 0x7f)
430
0
  continue;
431
432
0
      if (!warn)
433
0
  return true;
434
435
0
      found = true;
436
437
0
      const char * filename;
438
0
      unsigned int lineno;
439
440
0
      filename = as_where (& lineno);
441
0
      if (filename == NULL)
442
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
443
0
      else if (lineno == 0)
444
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
445
0
      else
446
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
447
448
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
449
0
  {
450
0
    as_warn (_("further multibyte character warnings suppressed"));
451
0
    break;
452
0
  }
453
0
    }
454
455
0
  return found;
456
0
}
457
458
/* This function is called to process input characters.  The GET
459
   parameter is used to retrieve more input characters.  GET should
460
   set its parameter to point to a buffer, and return the length of
461
   the buffer; it should return 0 at end of file.  The scrubbed output
462
   characters are put into the buffer starting at TOSTART; the TOSTART
463
   buffer is TOLEN bytes in length.  The function returns the number
464
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
465
   end of file was seen.  This function is arranged as a state
466
   machine, and saves its state so that it may return at any point.
467
   This is the way the old code used to work.  */
468
469
size_t
470
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
471
    bool check_multibyte)
472
525
{
473
525
  char *to = tostart;
474
525
  char *toend = tostart + tolen;
475
525
  char *from;
476
525
  char *fromend;
477
525
  size_t fromlen;
478
525
  int ch, ch2 = 0;
479
  /* Character that started the string we're working on.  */
480
525
  static char quotechar;
481
482
  /*State 0: beginning of normal line
483
    1: After first whitespace on line (flush more white)
484
    2: After first non-white (opcode) on line (keep 1white)
485
    3: after second white on line (into operands) (flush white)
486
    4: after putting out a .linefile, put out digits
487
    5: parsing a string, then go to old-state
488
    6: putting out \ escape in a "d string.
489
    7: no longer used
490
    8: no longer used
491
    9: After seeing symbol char in state 3 (keep 1white after symchar)
492
   10: After seeing whitespace in state 9 (keep white before symchar)
493
   11: After seeing a symbol character in state 0 (eg a label definition)
494
   -1: output string in out_string and go to the state in old_state
495
   12: no longer used
496
#ifdef DOUBLEBAR_PARALLEL
497
   13: After seeing a vertical bar, looking for a second
498
       vertical bar as a parallel expression separator.
499
#endif
500
#ifdef TC_PREDICATE_START_CHAR
501
   14: After seeing a predicate start character at state 0, looking
502
       for a predicate end character as predicate.
503
   15: After seeing a predicate start character at state 1, looking
504
       for a predicate end character as predicate.
505
#endif
506
#ifdef TC_Z80
507
   16: After seeing an 'a' or an 'A' at the start of a symbol
508
   17: After seeing an 'f' or an 'F' in state 16
509
#endif
510
    */
511
512
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
513
     constructs like ``.loc 1 20''.  This was turning into ``.loc
514
     120''.  States 9 and 10 ensure that a space is never dropped in
515
     between characters which could appear in an identifier.  Ian
516
     Taylor, ian@cygnus.com.
517
518
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
519
     correctly on the PA (and any other target where colons are optional).
520
     Jeff Law, law@cs.utah.edu.
521
522
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
523
     get squashed into "cmp r1,r2||trap#1", with the all important space
524
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
525
526
  /* This macro gets the next input character.  */
527
528
525
#define GET()             \
529
597k
  (from < fromend            \
530
597k
   ? * (unsigned char *) (from++)        \
531
597k
   : (saved_input = NULL,          \
532
436
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
533
436
      from = input_buffer,          \
534
436
      fromend = from + fromlen,         \
535
436
      (fromlen == 0            \
536
436
       ? EOF              \
537
436
       : * (unsigned char *) (from++))))
538
539
  /* This macro pushes a character back on the input stream.  */
540
541
65.1k
#define UNGET(uch) (*--from = (uch))
542
543
  /* This macro puts a character into the output buffer.  If this
544
     character fills the output buffer, this macro jumps to the label
545
     TOFULL.  We use this rather ugly approach because we need to
546
     handle two different termination conditions: EOF on the input
547
     stream, and a full output buffer.  It would be simpler if we
548
     always read in the entire input stream before processing it, but
549
     I don't want to make such a significant change to the assembler's
550
     memory usage.  */
551
552
525
#define PUT(pch)        \
553
506k
  do            \
554
506k
    {           \
555
506k
      *to++ = (pch);        \
556
506k
      if (to >= toend)       \
557
506k
  goto tofull;       \
558
506k
    }           \
559
506k
  while (0)
560
561
525
  if (saved_input != NULL)
562
33
    {
563
33
      from = saved_input;
564
33
      fromend = from + saved_input_len;
565
33
    }
566
492
  else
567
492
    {
568
492
      fromlen = (*get) (input_buffer, sizeof input_buffer);
569
492
      if (fromlen == 0)
570
207
  return 0;
571
285
      from = input_buffer;
572
285
      fromend = from + fromlen;
573
574
285
      if (check_multibyte)
575
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
576
0
                (const unsigned char *) fromend,
577
0
                true /* Generate warnings.  */);
578
285
    }
579
580
394k
  while (1)
581
394k
    {
582
      /* The cases in this switch end with continue, in order to
583
   branch back to the top of this while loop and generate the
584
   next output character in the appropriate state.  */
585
394k
      switch (state)
586
394k
  {
587
7.35k
  case -1:
588
7.35k
    ch = *out_string++;
589
7.35k
    if (*out_string == '\0')
590
1.45k
      {
591
1.45k
        state = old_state;
592
1.45k
        old_state = 3;
593
1.45k
      }
594
7.35k
    PUT (ch);
595
7.35k
    continue;
596
597
7.35k
  case 4:
598
1.49k
    ch = GET ();
599
1.49k
    if (ch == EOF)
600
0
      goto fromeof;
601
1.49k
    else if (ch >= '0' && ch <= '9')
602
868
      PUT (ch);
603
624
    else
604
624
      {
605
624
        while (ch != EOF && IS_WHITESPACE (ch))
606
0
    ch = GET ();
607
624
        if (ch == '"')
608
314
    {
609
314
      quotechar = ch;
610
314
      state = 5;
611
314
      old_state = 3;
612
314
      PUT (ch);
613
314
    }
614
310
        else
615
310
    {
616
1.09k
      while (ch != EOF && ch != '\n')
617
780
        ch = GET ();
618
310
      state = 0;
619
310
      PUT (ch);
620
310
    }
621
624
      }
622
1.49k
    continue;
623
624
51.3k
  case 5:
625
    /* We are going to copy everything up to a quote character,
626
       with special handling for a backslash.  We try to
627
       optimize the copying in the simple case without using the
628
       GET and PUT macros.  */
629
51.3k
    {
630
51.3k
      char *s;
631
51.3k
      ptrdiff_t len;
632
633
1.40M
      for (s = from; s < fromend; s++)
634
1.40M
        {
635
1.40M
    ch = *s;
636
1.40M
    if (ch == '\\'
637
1.40M
        || ch == quotechar
638
1.38M
        || ch == '\n')
639
51.2k
      break;
640
1.40M
        }
641
51.3k
      len = s - from;
642
51.3k
      if (len > toend - to)
643
2
        len = toend - to;
644
51.3k
      if (len > 0)
645
42.9k
        {
646
42.9k
    memcpy (to, from, len);
647
42.9k
    to += len;
648
42.9k
    from += len;
649
42.9k
    if (to >= toend)
650
2
      goto tofull;
651
42.9k
        }
652
51.3k
    }
653
654
51.3k
    ch = GET ();
655
51.3k
    if (ch == EOF)
656
84
      {
657
        /* This buffer is here specifically so
658
     that the UNGET below will work.  */
659
84
        static char one_char_buf[1];
660
661
84
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
662
84
        state = old_state;
663
84
        from = fromend = one_char_buf + 1;
664
84
        fromlen = 1;
665
84
        UNGET ('\n');
666
84
        PUT (quotechar);
667
84
      }
668
51.3k
    else if (ch == quotechar)
669
23.4k
      {
670
23.4k
        state = old_state;
671
23.4k
        PUT (ch);
672
23.4k
      }
673
27.8k
    else if (TC_STRING_ESCAPES && ch == '\\')
674
694
      {
675
694
        state = 6;
676
694
        PUT (ch);
677
694
      }
678
27.1k
    else if (scrub_m68k_mri && ch == '\n')
679
0
      {
680
        /* Just quietly terminate the string.  This permits lines like
681
       bne  label loop if we haven't reach end yet.  */
682
0
        state = old_state;
683
0
        UNGET (ch);
684
0
        PUT ('\'');
685
0
      }
686
27.1k
    else
687
27.1k
      {
688
27.1k
        PUT (ch);
689
27.1k
      }
690
51.3k
    continue;
691
692
51.3k
  case 6:
693
694
    state = 5;
694
694
    ch = GET ();
695
694
    switch (ch)
696
694
      {
697
        /* Handle strings broken across lines, by turning '\n' into
698
     '\\' and 'n'.  */
699
1
      case '\n':
700
1
        UNGET ('n');
701
1
        add_newlines++;
702
1
        PUT ('\\');
703
1
        continue;
704
705
1
      case EOF:
706
0
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
707
0
        PUT (quotechar);
708
0
        continue;
709
710
        /* These two are used inside macros.  */
711
0
      case '@':
712
0
      case '+':
713
0
        break;
714
715
1
      case '"':
716
33
      case '\\':
717
45
      case 'b':
718
48
      case 'f':
719
50
      case 'n':
720
123
      case 'r':
721
126
      case 't':
722
126
      case 'v':
723
126
      case 'x':
724
126
      case 'X':
725
181
      case '0':
726
183
      case '1':
727
184
      case '2':
728
187
      case '3':
729
189
      case '4':
730
191
      case '5':
731
195
      case '6':
732
195
      case '7':
733
195
        break;
734
735
498
      default:
736
#ifdef ONLY_STANDARD_ESCAPES
737
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
738
#endif
739
498
        break;
740
694
      }
741
693
    PUT (ch);
742
693
    continue;
743
744
#ifdef DOUBLEBAR_PARALLEL
745
  case 13:
746
    ch = GET ();
747
    if (ch != '|')
748
      abort ();
749
750
    /* Reset back to state 1 and pretend that we are parsing a
751
       line from just after the first white space.  */
752
    state = 1;
753
    PUT ('|');
754
    continue;
755
#endif
756
#ifdef TC_Z80
757
  case 16:
758
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
759
    ch = GET ();
760
    if (ch == 'f' || ch == 'F')
761
      {
762
        state = 17;
763
        PUT (ch);
764
      }
765
    else
766
      {
767
        if (ch != EOF)
768
    UNGET (ch);
769
        state = 9;
770
        break;
771
      }
772
    /* Fall through.  */
773
  case 17:
774
    /* We have seen "af" at the start of a symbol,
775
       a ' here is a part of that symbol.  */
776
    ch = GET ();
777
    state = 9;
778
    if (ch == '\'')
779
      /* Change to avoid warning about unclosed string.  */
780
      PUT ('`');
781
    else if (ch != EOF)
782
      UNGET (ch);
783
    break;
784
#endif
785
394k
  }
786
787
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
788
789
      /* flushchar: */
790
333k
      ch = GET ();
791
792
#ifdef TC_PREDICATE_START_CHAR
793
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
794
  {
795
    state += 14;
796
    PUT (ch);
797
    continue;
798
  }
799
      else if (state == 14 || state == 15)
800
  {
801
    if (ch == TC_PREDICATE_END_CHAR)
802
      {
803
        state -= 14;
804
        PUT (ch);
805
        ch = GET ();
806
      }
807
    else
808
      {
809
        PUT (ch);
810
        continue;
811
      }
812
  }
813
#endif
814
815
371k
    recycle:
816
817
      /* We need to watch out for .end directives: We should in particular not
818
   issue diagnostics for anything after an active one.  */
819
371k
      if (ch == EOF)
820
272
  end_state = NULL;
821
370k
      else if (end_state == NULL)
822
338k
  {
823
338k
    if ((state == 0 || state == 1)
824
82.9k
        && (ch == '.'
825
51.4k
      || (no_pseudo_dot && ch == end_pseudo[0])))
826
31.5k
      end_state = end_pseudo + (ch != '.');
827
338k
  }
828
32.7k
      else if (ch != '\0'
829
32.7k
         && (*end_state == ch
830
       /* Avoid triggering on directives like .endif or .endr.  */
831
31.6k
       || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
832
1.05k
  {
833
1.05k
    if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
834
1
      goto end_end;
835
1.05k
    ++end_state;
836
1.05k
  }
837
31.6k
      else if (*end_state != '\0')
838
  /* We did not get the expected character, or we didn't
839
     get a valid terminating character after seeing the
840
     entire pseudo-op, so we must go back to the beginning.  */
841
31.5k
  end_state = NULL;
842
144
      else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
843
18
  {
844
19
  end_end:
845
    /* We've read the entire pseudo-op.  If this is the end of the line,
846
       bail out now by (ab)using the output-full path.  This allows the
847
       caller to process input up to here and terminate processing if this
848
       directive is actually active (not on the false branch of a
849
       conditional and not in a macro definition).  */
850
19
    end_state = NULL;
851
19
    state = 0;
852
19
    PUT (ch);
853
19
    goto tofull;
854
19
  }
855
856
#if defined TC_ARM && defined OBJ_ELF
857
      /* We need to watch out for .symver directives.  See the comment later
858
   in this function.  */
859
      if (ch == EOF)
860
  symver_state = NULL;
861
      else if (symver_state == NULL)
862
  {
863
    if ((state == 0 || state == 1)
864
        && strchr (tc_comment_chars, '@') != NULL
865
        && ch == symver_pseudo[0])
866
      symver_state = symver_pseudo + 1;
867
  }
868
      else
869
  {
870
    /* We advance to the next state if we find the right
871
       character.  */
872
    if (ch != '\0' && (*symver_state == ch))
873
      ++symver_state;
874
    else if (*symver_state != '\0')
875
      /* We did not get the expected character, or we didn't
876
         get a valid terminating character after seeing the
877
         entire pseudo-op, so we must go back to the beginning.  */
878
      symver_state = NULL;
879
    else
880
      {
881
        /* We've read the entire pseudo-op.  If this is the end
882
     of the line, go back to the beginning.  */
883
        if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
884
    symver_state = NULL;
885
      }
886
  }
887
#endif /* TC_ARM && OBJ_ELF */
888
889
#ifdef TC_M68K
890
      /* We want to have pseudo-ops which control whether we are in
891
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
892
   the scrubber, that means that we need a special purpose
893
   recognizer here.  */
894
      if (ch == EOF)
895
  mri_state = NULL;
896
      else if (mri_state == NULL)
897
  {
898
    if ((state == 0 || state == 1)
899
        && ch == mri_pseudo[0])
900
      mri_state = mri_pseudo + 1;
901
  }
902
      else
903
  {
904
    /* We advance to the next state if we find the right
905
       character, or if we need a space character and we get any
906
       whitespace character, or if we need a '0' and we get a
907
       '1' (this is so that we only need one state to handle
908
       ``.mri 0'' and ``.mri 1'').  */
909
    if (ch != '\0'
910
        && (*mri_state == ch
911
      || (*mri_state == ' '
912
          && IS_WHITESPACE (ch))
913
      || (*mri_state == '0'
914
          && ch == '1')))
915
      {
916
        mri_last_ch = ch;
917
        ++mri_state;
918
      }
919
    else if (*mri_state != '\0'
920
       || (!IS_WHITESPACE (ch)
921
           && !IS_LINE_SEPARATOR (ch)
922
           && !IS_NEWLINE (ch)))
923
      {
924
        /* We did not get the expected character, or we didn't
925
     get a valid terminating character after seeing the
926
     entire pseudo-op, so we must go back to the
927
     beginning.  */
928
        mri_state = NULL;
929
      }
930
    else
931
      {
932
        /* We've read the entire pseudo-op.  mri_last_ch is
933
     either '0' or '1' indicating whether to enter or
934
     leave MRI mode.  */
935
        do_scrub_begin (mri_last_ch == '1');
936
        mri_state = NULL;
937
938
        /* We continue handling the character as usual.  The
939
     main gas reader must also handle the .mri pseudo-op
940
     to control expression parsing and the like.  */
941
      }
942
  }
943
#endif
944
945
371k
      if (ch == EOF)
946
272
  {
947
272
    if (state != 0)
948
113
      {
949
113
        as_warn (_("end of file not at end of a line; newline inserted"));
950
113
        state = 0;
951
113
        PUT ('\n');
952
113
      }
953
272
    goto fromeof;
954
272
  }
955
956
370k
      switch (lex[ch])
957
370k
  {
958
75.9k
  case LEX_IS_WHITESPACE:
959
75.9k
    do
960
79.8k
      {
961
79.8k
        ch = GET ();
962
79.8k
      }
963
79.8k
    while (ch != EOF && IS_WHITESPACE (ch));
964
75.9k
    if (ch == EOF)
965
12
      goto fromeof;
966
967
75.9k
    if (state == 0)
968
15.8k
      {
969
        /* Preserve a single whitespace character at the
970
     beginning of a line.  */
971
15.8k
        state = 1;
972
15.8k
        UNGET (ch);
973
15.8k
        PUT (' ');
974
15.8k
        break;
975
15.8k
      }
976
977
#ifdef KEEP_WHITE_AROUND_COLON
978
    if (lex[ch] == LEX_IS_COLON)
979
      {
980
        /* Only keep this white if there's no white *after* the
981
     colon.  */
982
        ch2 = GET ();
983
        if (ch2 != EOF)
984
    UNGET (ch2);
985
        if (!IS_WHITESPACE (ch2))
986
    {
987
      state = 9;
988
      UNGET (ch);
989
      PUT (' ');
990
      break;
991
    }
992
      }
993
#endif
994
995
    /* Prune trailing whitespace.  */
996
60.0k
    if (IS_COMMENT (ch)
997
60.0k
        || (IS_LINE_COMMENT (ch)
998
237
            && (state < 1 || strchr (tc_comment_chars, ch)))
999
59.8k
        || IS_NEWLINE (ch)
1000
59.2k
        || IS_LINE_SEPARATOR (ch)
1001
58.9k
        || IS_PARALLEL_SEPARATOR (ch))
1002
1.16k
      {
1003
1.16k
        if (scrub_m68k_mri)
1004
0
    {
1005
      /* In MRI mode, we keep these spaces.  */
1006
0
      UNGET (ch);
1007
0
      PUT (' ');
1008
0
      break;
1009
0
    }
1010
1.16k
        goto recycle;
1011
1.16k
      }
1012
#ifdef DOUBLESLASH_LINE_COMMENTS
1013
    if (IS_TWOCHAR_COMMENT_1ST (ch))
1014
      {
1015
        ch2 = GET ();
1016
        if (ch2 != EOF)
1017
          UNGET (ch2);
1018
        if (ch2 == '/')
1019
    goto recycle;
1020
      }
1021
#endif
1022
1023
    /* If we're in state 2 or 11, we've seen a non-white
1024
       character followed by whitespace.  If the next character
1025
       is ':', this is whitespace after a label name which we
1026
       normally must ignore.  In MRI mode, though, spaces are
1027
       not permitted between the label and the colon.  */
1028
58.9k
    if ((state == 2 || state == 11)
1029
36.0k
        && lex[ch] == LEX_IS_COLON
1030
0
        && ! scrub_m68k_mri)
1031
12
      {
1032
12
        state = 1;
1033
12
        PUT (ch);
1034
12
        break;
1035
12
      }
1036
1037
58.9k
    switch (state)
1038
58.9k
      {
1039
8
      case 1:
1040
        /* We can arrive here if we leave a leading whitespace
1041
     character at the beginning of a line.  */
1042
8
        goto recycle;
1043
13.6k
      case 2:
1044
13.6k
        state = 3;
1045
13.6k
        if (to + 1 < toend)
1046
13.6k
    {
1047
      /* Optimize common case by skipping UNGET/GET.  */
1048
13.6k
      PUT (' '); /* Sp after opco */
1049
13.6k
      goto recycle;
1050
13.6k
    }
1051
0
        UNGET (ch);
1052
0
        PUT (' ');
1053
0
        break;
1054
427
      case 3:
1055
427
#ifndef TC_KEEP_OPERAND_SPACES
1056
        /* For TI C6X, we keep these spaces as they may separate
1057
     functional unit specifiers from operands.  */
1058
427
        if (scrub_m68k_mri)
1059
0
#endif
1060
0
    {
1061
      /* In MRI mode, we keep these spaces.  */
1062
0
      UNGET (ch);
1063
0
      PUT (' ');
1064
0
      break;
1065
0
    }
1066
427
        goto recycle; /* Sp in operands */
1067
22.4k
      case 9:
1068
22.4k
      case 10:
1069
22.4k
#ifndef TC_KEEP_OPERAND_SPACES
1070
22.4k
        if (scrub_m68k_mri)
1071
0
#endif
1072
0
    {
1073
      /* In MRI mode, we keep these spaces.  */
1074
0
      state = 3;
1075
0
      UNGET (ch);
1076
0
      PUT (' ');
1077
0
      break;
1078
0
    }
1079
22.4k
        state = 10; /* Sp after symbol char */
1080
22.4k
        goto recycle;
1081
22.3k
      case 11:
1082
22.3k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1083
0
    state = 1;
1084
22.3k
        else
1085
22.3k
    {
1086
      /* We know that ch is not ':', since we tested that
1087
         case above.  Therefore this is not a label, so it
1088
         must be the opcode, and we've just seen the
1089
         whitespace after it.  */
1090
22.3k
      state = 3;
1091
22.3k
    }
1092
22.3k
        UNGET (ch);
1093
22.3k
        PUT (' '); /* Sp after label definition.  */
1094
22.3k
        break;
1095
22.3k
      default:
1096
0
        BAD_CASE (state);
1097
58.9k
      }
1098
22.3k
    break;
1099
1100
22.3k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1101
0
    ch2 = GET ();
1102
0
    if (ch2 == '*')
1103
0
      {
1104
0
  twochar_comment:
1105
0
        for (;;)
1106
0
    {
1107
0
      do
1108
0
        {
1109
0
          ch2 = GET ();
1110
0
          if (ch2 != EOF && IS_NEWLINE (ch2))
1111
0
      add_newlines++;
1112
0
        }
1113
0
      while (ch2 != EOF && ch2 != '*');
1114
1115
0
      while (ch2 == '*')
1116
0
        ch2 = GET ();
1117
1118
0
      if (ch2 == EOF || ch2 == '/')
1119
0
        break;
1120
1121
      /* This UNGET will ensure that we count newlines
1122
         correctly.  */
1123
0
      UNGET (ch2);
1124
0
    }
1125
1126
0
        if (ch2 == EOF)
1127
0
    as_warn (_("end of file in multiline comment"));
1128
1129
0
        ch = ' ';
1130
0
        goto recycle;
1131
0
      }
1132
#ifdef DOUBLESLASH_LINE_COMMENTS
1133
    else if (ch2 == '/')
1134
      {
1135
        do
1136
    {
1137
      ch = GET ();
1138
    }
1139
        while (ch != EOF && !IS_NEWLINE (ch));
1140
        if (ch == EOF)
1141
    as_warn ("end of file in comment; newline inserted");
1142
        state = 0;
1143
        PUT ('\n');
1144
        break;
1145
      }
1146
#endif
1147
0
    else
1148
0
      {
1149
0
        if (ch2 != EOF)
1150
0
    UNGET (ch2);
1151
0
        if (state == 9 || state == 10)
1152
0
    state = 3;
1153
0
        PUT (ch);
1154
0
      }
1155
0
    break;
1156
1157
23.2k
  case LEX_IS_STRINGQUOTE:
1158
23.2k
    quotechar = ch;
1159
23.2k
    if (state == 10)
1160
7.40k
      {
1161
        /* Preserve the whitespace in foo "bar".  */
1162
7.40k
        UNGET (ch);
1163
7.40k
        state = 3;
1164
7.40k
        PUT (' ');
1165
1166
        /* PUT didn't jump out.  We could just break, but we
1167
     know what will happen, so optimize a bit.  */
1168
7.40k
        ch = GET ();
1169
7.40k
        old_state = 9;
1170
7.40k
      }
1171
15.8k
    else if (state == 3)
1172
408
      old_state = 9;
1173
15.4k
    else if (state == 0)
1174
28
      old_state = 11; /* Now seeing label definition.  */
1175
15.4k
    else
1176
15.4k
      old_state = state;
1177
23.2k
    state = 5;
1178
23.2k
    PUT (ch);
1179
23.2k
    break;
1180
1181
23.2k
  case LEX_IS_ONECHAR_QUOTE:
1182
#ifdef H_TICK_HEX
1183
    if (state == 9 && enable_h_tick_hex)
1184
      {
1185
        char c;
1186
1187
        c = GET ();
1188
        as_warn ("'%c found after symbol", c);
1189
        UNGET (c);
1190
      }
1191
#endif
1192
850
    if (state == 10)
1193
3
      {
1194
        /* Preserve the whitespace in foo 'b'.  */
1195
3
        UNGET (ch);
1196
3
        state = 3;
1197
3
        PUT (' ');
1198
3
        break;
1199
3
      }
1200
847
    ch = GET ();
1201
847
    if (ch == EOF)
1202
1
      {
1203
1
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1204
1
        ch = 0;
1205
1
      }
1206
847
    if (ch == '\\')
1207
2
      {
1208
2
        ch = GET ();
1209
2
        if (ch == EOF)
1210
0
    {
1211
0
      as_warn (_("end of file in escape character"));
1212
0
      ch = '\\';
1213
0
    }
1214
2
        else
1215
2
    ch = process_escape (ch);
1216
2
      }
1217
847
    sprintf (out_buf, "%d", ch & 0xff);
1218
1219
    /* None of these 'x constants for us.  We want 'x'.  */
1220
847
    if ((ch = GET ()) != '\'')
1221
791
      {
1222
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1223
        as_warn (_("missing close quote; (assumed)"));
1224
#else
1225
791
        if (ch != EOF)
1226
790
    UNGET (ch);
1227
791
#endif
1228
791
      }
1229
847
    if (strlen (out_buf) == 1)
1230
14
      {
1231
14
        PUT (out_buf[0]);
1232
14
        break;
1233
14
      }
1234
833
    if (state == 9)
1235
548
      old_state = 3;
1236
285
    else
1237
285
      old_state = state;
1238
833
    state = -1;
1239
833
    out_string = out_buf;
1240
833
    PUT (*out_string++);
1241
833
    break;
1242
1243
3.37k
  case LEX_IS_COLON:
1244
#ifdef KEEP_WHITE_AROUND_COLON
1245
    state = 9;
1246
#else
1247
3.37k
    if (state == 9 || state == 10)
1248
927
      state = 3;
1249
2.44k
    else if (state != 3)
1250
2.34k
      state = 1;
1251
3.37k
#endif
1252
3.37k
    PUT (ch);
1253
3.37k
    break;
1254
1255
50.1k
  case LEX_IS_NEWLINE:
1256
    /* Roll out a bunch of newlines from inside comments, etc.  */
1257
50.1k
    if (add_newlines)
1258
1
      {
1259
1
        --add_newlines;
1260
1
        UNGET (ch);
1261
1
      }
1262
    /* Fall through.  */
1263
1264
62.6k
  case LEX_IS_LINE_SEPARATOR:
1265
62.6k
    state = 0;
1266
62.6k
    PUT (ch);
1267
62.6k
    break;
1268
1269
62.6k
  case LEX_IS_PARALLEL_SEPARATOR:
1270
0
    state = 1;
1271
0
    PUT (ch);
1272
0
    break;
1273
1274
#ifdef TC_V850
1275
  case LEX_IS_DOUBLEDASH_1ST:
1276
    ch2 = GET ();
1277
    if (ch2 != '-')
1278
      {
1279
        if (ch2 != EOF)
1280
    UNGET (ch2);
1281
        goto de_fault;
1282
      }
1283
    /* Read and skip to end of line.  */
1284
    do
1285
      {
1286
        ch = GET ();
1287
      }
1288
    while (ch != EOF && ch != '\n');
1289
1290
    if (ch == EOF)
1291
      as_warn (_("end of file in comment; newline inserted"));
1292
1293
    state = 0;
1294
    PUT ('\n');
1295
    break;
1296
#endif
1297
#ifdef DOUBLEBAR_PARALLEL
1298
  case LEX_IS_DOUBLEBAR_1ST:
1299
    ch2 = GET ();
1300
    if (ch2 != EOF)
1301
      UNGET (ch2);
1302
    if (ch2 != '|')
1303
      goto de_fault;
1304
1305
    /* Handle '||' in two states as invoking PUT twice might
1306
       result in the first one jumping out of this loop.  We'd
1307
       then lose track of the state and one '|' char.  */
1308
    state = 13;
1309
    PUT ('|');
1310
    break;
1311
#endif
1312
4.72k
  case LEX_IS_LINE_COMMENT_START:
1313
    /* FIXME-someday: The two character comment stuff was badly
1314
       thought out.  On i386, we want '/' as line comment start
1315
       AND we want C style comments.  hence this hack.  The
1316
       whole lexical process should be reworked.  xoxorich.  */
1317
4.72k
    if (ch == '/')
1318
2.57k
      {
1319
2.57k
        ch2 = GET ();
1320
2.57k
        if (ch2 == '*')
1321
0
    goto twochar_comment;
1322
2.57k
        if (ch2 != EOF)
1323
2.57k
    UNGET (ch2);
1324
2.57k
      }
1325
1326
4.72k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1327
1.38k
      {
1328
1.38k
        int startch;
1329
1330
1.38k
        startch = ch;
1331
1332
1.38k
        do
1333
1.39k
    {
1334
1.39k
      ch = GET ();
1335
1.39k
    }
1336
1.39k
        while (ch != EOF && IS_WHITESPACE (ch));
1337
1338
1.38k
        if (ch == EOF)
1339
0
    {
1340
0
      as_warn (_("end of file in comment; newline inserted"));
1341
0
      PUT ('\n');
1342
0
      break;
1343
0
    }
1344
1345
1.38k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1346
764
    {
1347
      /* Not a cpp line.  */
1348
2.43k
      while (ch != EOF && !IS_NEWLINE (ch))
1349
1.66k
        ch = GET ();
1350
764
      if (ch == EOF)
1351
2
        {
1352
2
          as_warn (_("end of file in comment; newline inserted"));
1353
2
          PUT ('\n');
1354
2
        }
1355
762
      else /* IS_NEWLINE (ch) */
1356
762
        {
1357
          /* To process non-zero add_newlines.  */
1358
762
          UNGET (ch);
1359
762
        }
1360
764
      state = 0;
1361
764
      break;
1362
764
    }
1363
        /* Looks like `# 123 "filename"' from cpp.  */
1364
624
        UNGET (ch);
1365
624
        old_state = 4;
1366
624
        state = -1;
1367
624
        if (scrub_m68k_mri)
1368
0
    out_string = "\tlinefile ";
1369
624
        else
1370
624
    out_string = "\t.linefile ";
1371
624
        PUT (*out_string++);
1372
624
        break;
1373
624
      }
1374
1375
#ifdef TC_D10V
1376
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1377
       Trap is the only short insn that has a first operand that is
1378
       neither register nor label.
1379
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1380
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1381
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1382
       only character in line_comment_chars for d10v, hence we
1383
       can recognize it as such.  */
1384
    /* An alternative approach would be to reset the state to 1 when
1385
       we see '||', '<'- or '->', but that seems to be overkill.  */
1386
    if (state == 10)
1387
      PUT (' ');
1388
#endif
1389
    /* We have a line comment character which is not at the
1390
       start of a line.  If this is also a normal comment
1391
       character, fall through.  Otherwise treat it as a default
1392
       character.  */
1393
3.33k
    if (strchr (tc_comment_chars, ch) == NULL)
1394
2.57k
      goto de_fault;
1395
764
    if (scrub_m68k_mri
1396
0
        && (ch == '!' || ch == '*' || ch == '#'))
1397
0
      goto de_fault;
1398
    /* Fall through.  */
1399
764
  case LEX_IS_COMMENT_START:
1400
#if defined TC_ARM && defined OBJ_ELF
1401
    /* On the ARM, `@' is the comment character.
1402
       Unfortunately this is also a special character in ELF .symver
1403
       directives (and .type, though we deal with those another way).
1404
       So we check if this line is such a directive, and treat
1405
       the character as default if so.  This is a hack.  */
1406
    if ((symver_state != NULL) && (*symver_state == 0))
1407
      goto de_fault;
1408
#endif
1409
1410
    /* Care is needed not to damage occurrences of \<comment-char>
1411
       by stripping the <comment-char> onwards.  Yuck.  */
1412
764
    if ((to > tostart ? to[-1] : last_char) == '\\')
1413
      /* Do not treat the <comment-char> as a start-of-comment.  */
1414
0
      goto de_fault;
1415
1416
#ifdef WARN_COMMENTS
1417
    if (!found_comment)
1418
      found_comment_file = as_where (&found_comment);
1419
#endif
1420
764
    do
1421
22.7k
      {
1422
22.7k
        ch = GET ();
1423
22.7k
      }
1424
22.7k
    while (ch != EOF && !IS_NEWLINE (ch));
1425
764
    if (ch == EOF)
1426
6
      as_warn (_("end of file in comment; newline inserted"));
1427
764
    state = 0;
1428
764
    PUT ('\n');
1429
764
    break;
1430
1431
#ifdef H_TICK_HEX
1432
  case LEX_IS_H:
1433
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1434
       the H' with 0x to make them gas-style hex characters.  */
1435
    if (enable_h_tick_hex)
1436
      {
1437
        char quot;
1438
1439
        quot = GET ();
1440
        if (quot == '\'')
1441
    {
1442
      UNGET ('x');
1443
      ch = '0';
1444
    }
1445
        else
1446
    UNGET (quot);
1447
      }
1448
#endif
1449
    /* Fall through.  */
1450
1451
176k
  case LEX_IS_SYMBOL_COMPONENT:
1452
176k
    if (state == 10)
1453
14.7k
      {
1454
        /* This is a symbol character following another symbol
1455
     character, with whitespace in between.  We skipped
1456
     the whitespace earlier, so output it now.  */
1457
14.7k
        UNGET (ch);
1458
14.7k
        state = 3;
1459
14.7k
        PUT (' ');
1460
14.7k
        break;
1461
14.7k
      }
1462
1463
#ifdef TC_Z80
1464
    /* "af'" is a symbol containing '\''.  */
1465
    if (state == 3 && (ch == 'a' || ch == 'A'))
1466
      {
1467
        state = 16;
1468
        PUT (ch);
1469
        ch = GET ();
1470
        if (ch == 'f' || ch == 'F')
1471
    {
1472
      state = 17;
1473
      PUT (ch);
1474
      break;
1475
    }
1476
        else
1477
    {
1478
      state = 9;
1479
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1480
        {
1481
          if (ch != EOF)
1482
      UNGET (ch);
1483
          break;
1484
        }
1485
    }
1486
      }
1487
#endif
1488
161k
    if (state == 3)
1489
56.8k
      state = 9;
1490
1491
    /* This is a common case.  Quickly copy CH and all the
1492
       following symbol component or normal characters.  */
1493
161k
    if (to + 1 < toend
1494
#ifdef TC_M68K
1495
        && mri_state == NULL
1496
#endif
1497
#if defined TC_ARM && defined OBJ_ELF
1498
        && symver_state == NULL
1499
#endif
1500
161k
        && end_state == NULL)
1501
129k
      {
1502
129k
        char *s;
1503
129k
        ptrdiff_t len;
1504
1505
1.13M
        for (s = from; s < fromend; s++)
1506
1.13M
    {
1507
1.13M
      int type;
1508
1509
1.13M
      ch2 = *(unsigned char *) s;
1510
1.13M
      type = lex[ch2];
1511
1.13M
      if (type != 0
1512
1.04M
          && type != LEX_IS_SYMBOL_COMPONENT)
1513
129k
        break;
1514
1.13M
    }
1515
1516
129k
        if (s > from)
1517
    /* Handle the last character normally, for
1518
       simplicity.  */
1519
102k
    --s;
1520
1521
129k
        len = s - from;
1522
1523
129k
        if (len > (toend - to) - 1)
1524
0
    len = (toend - to) - 1;
1525
1526
129k
        if (len > 0)
1527
92.5k
    {
1528
92.5k
      PUT (ch);
1529
92.5k
      memcpy (to, from, len);
1530
92.5k
      to += len;
1531
92.5k
      from += len;
1532
92.5k
      if (to >= toend)
1533
1
        goto tofull;
1534
92.5k
      ch = GET ();
1535
92.5k
    }
1536
129k
      }
1537
1538
    /* Fall through.  */
1539
185k
  default:
1540
187k
  de_fault:
1541
    /* Some relatively `normal' character.  */
1542
187k
    if (state == 0)
1543
40.9k
      {
1544
40.9k
        state = 11; /* Now seeing label definition.  */
1545
40.9k
      }
1546
146k
    else if (state == 1)
1547
18.0k
      {
1548
18.0k
        state = 2;  /* Ditto.  */
1549
18.0k
      }
1550
128k
    else if (state == 9)
1551
69.6k
      {
1552
69.6k
        if (!IS_SYMBOL_COMPONENT (ch))
1553
9.48k
    state = 3;
1554
69.6k
      }
1555
59.0k
    else if (state == 10)
1556
189
      {
1557
189
        if (ch == '\\')
1558
1
    {
1559
      /* Special handling for backslash: a backslash may
1560
         be the beginning of a formal parameter (of a
1561
         macro) following another symbol character, with
1562
         whitespace in between.  If that is the case, we
1563
         output a space before the parameter.  Strictly
1564
         speaking, correct handling depends upon what the
1565
         macro parameter expands into; if the parameter
1566
         expands into something which does not start with
1567
         an operand character, then we don't want to keep
1568
         the space.  We don't have enough information to
1569
         make the right choice, so here we are making the
1570
         choice which is more likely to be correct.  */
1571
1
      if (to + 1 >= toend)
1572
0
        {
1573
          /* If we're near the end of the buffer, save the
1574
             character for the next time round.  Otherwise
1575
             we'll lose our state.  */
1576
0
          UNGET (ch);
1577
0
          goto tofull;
1578
0
        }
1579
1
      *to++ = ' ';
1580
1
    }
1581
1582
189
        state = 3;
1583
189
      }
1584
187k
    PUT (ch);
1585
187k
    break;
1586
370k
  }
1587
370k
    }
1588
1589
  /*NOTREACHED*/
1590
1591
284
 fromeof:
1592
  /* We have reached the end of the input.  */
1593
284
  if (to > tostart)
1594
284
    last_char = to[-1];
1595
284
  return to - tostart;
1596
1597
34
 tofull:
1598
  /* The output buffer is full.  Save any input we have not yet
1599
     processed.  */
1600
34
  if (fromend > from)
1601
33
    {
1602
33
      saved_input = from;
1603
33
      saved_input_len = fromend - from;
1604
33
    }
1605
1
  else
1606
1
    saved_input = NULL;
1607
1608
34
  if (to > tostart)
1609
34
    last_char = to[-1];
1610
34
  return to - tostart;
1611
318
}
1612
1613
/* Return amount of pending input.  */
1614
1615
size_t
1616
do_scrub_pending (void)
1617
209
{
1618
209
  size_t len = 0;
1619
209
  if (saved_input)
1620
30
    len += saved_input_len;
1621
209
  if (state == -1)
1622
0
    len += strlen (out_string);
1623
209
  return len;
1624
209
}