Coverage Report

Created: 2025-06-24 06:45

/src/binutils-gdb/gas/app.c
Line
Count
Source (jump to first uncovered line)
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2025 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
static const char *mri_state;
49
static char mri_last_ch;
50
#else
51
1.05M
#define scrub_m68k_mri 0
52
#endif
53
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
61
/* The pseudo-op (without leading dot) at which we want to (perhaps just
62
   temporarily) stop processing.  See the comments in do_scrub_chars().  */
63
static const char   end_pseudo[] = "end ";
64
static const char * end_state;
65
66
/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67
   active.  */
68
static bool no_pseudo_dot;
69
70
static char last_char;
71
72
16.3M
#define LEX_IS_SYMBOL_COMPONENT   1
73
961k
#define LEX_IS_WHITESPACE   2
74
1.08M
#define LEX_IS_LINE_SEPARATOR   3
75
389k
#define LEX_IS_COMMENT_START    4
76
420k
#define LEX_IS_LINE_COMMENT_START 5
77
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
78
451k
#define LEX_IS_STRINGQUOTE    8
79
584k
#define LEX_IS_COLON      9
80
6.36M
#define LEX_IS_NEWLINE      10
81
18.3k
#define LEX_IS_ONECHAR_QUOTE    11
82
#ifdef TC_V850
83
#define LEX_IS_DOUBLEDASH_1ST   12
84
#endif
85
#ifdef DOUBLEBAR_PARALLEL
86
#define LEX_IS_DOUBLEBAR_1ST    13
87
#endif
88
365k
#define LEX_IS_PARALLEL_SEPARATOR 14
89
#ifdef H_TICK_HEX
90
#define LEX_IS_H      15
91
#endif
92
447k
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93
503k
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
94
765k
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
95
365k
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96
747k
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
97
747k
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
98
#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99
6.14M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
100
101
static char lex[256] = {
102
  [' ']  = LEX_IS_WHITESPACE,
103
  ['\t'] = LEX_IS_WHITESPACE,
104
#ifdef CR_EOL
105
  ['\r'] = LEX_IS_LINE_SEPARATOR,
106
#else
107
  ['\r'] = LEX_IS_WHITESPACE,
108
#endif
109
  ['\n'] = LEX_IS_NEWLINE,
110
  [':'] = LEX_IS_COLON,
111
  ['$'] = LEX_IS_SYMBOL_COMPONENT,
112
  ['.'] = LEX_IS_SYMBOL_COMPONENT,
113
  ['_'] = LEX_IS_SYMBOL_COMPONENT,
114
  ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115
  ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116
  ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117
  ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118
  ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119
  ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120
  ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121
  ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122
  ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123
  ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124
  ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125
  ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126
  ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127
  ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128
  ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129
  ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130
  ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131
  ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132
  ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133
  ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134
  ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135
  ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136
  ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137
  ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138
  ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139
  ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140
  ['0'] = LEX_IS_SYMBOL_COMPONENT,
141
  ['1'] = LEX_IS_SYMBOL_COMPONENT,
142
  ['2'] = LEX_IS_SYMBOL_COMPONENT,
143
  ['3'] = LEX_IS_SYMBOL_COMPONENT,
144
  ['4'] = LEX_IS_SYMBOL_COMPONENT,
145
  ['5'] = LEX_IS_SYMBOL_COMPONENT,
146
  ['6'] = LEX_IS_SYMBOL_COMPONENT,
147
  ['7'] = LEX_IS_SYMBOL_COMPONENT,
148
  ['8'] = LEX_IS_SYMBOL_COMPONENT,
149
  ['9'] = LEX_IS_SYMBOL_COMPONENT,
150
#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151
     [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152
#define INIT4(n)    INIT2 (n),  INIT2 ((n) +  2)
153
#define INIT8(n)    INIT4 (n),  INIT4 ((n) +  4)
154
#define INIT16(n)   INIT8 (n),  INIT8 ((n) +  8)
155
#define INIT32(n)  INIT16 (n), INIT16 ((n) + 16)
156
#define INIT64(n)  INIT32 (n), INIT32 ((n) + 32)
157
#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158
  INIT128 (128),
159
#undef INIT128
160
#undef INIT64
161
#undef INIT32
162
#undef INIT16
163
#undef INIT8
164
#undef INIT4
165
#undef INIT2
166
};
167
168
void
169
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
170
28
{
171
28
  const char *p;
172
173
  /* Latch this once at start.  xtensa uses a hook function, yet context isn't
174
     meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175
     state changes).  */
176
28
  if (lex['/'] == 0)
177
1
    no_pseudo_dot = NO_PSEUDO_DOT;
178
179
#ifdef TC_M68K
180
  scrub_m68k_mri = m68k_mri;
181
182
  if (! m68k_mri)
183
#endif
184
28
    {
185
28
      lex['"'] = LEX_IS_STRINGQUOTE;
186
187
28
#if ! defined (TC_HPPA)
188
28
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
189
28
#endif
190
191
#ifdef SINGLE_QUOTE_STRINGS
192
      lex['\''] = LEX_IS_STRINGQUOTE;
193
#endif
194
28
    }
195
196
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197
     in state 5 of do_scrub_chars must be changed.  */
198
199
  /* Note that these override the previous defaults, e.g. if ';' is a
200
     comment char, then it isn't a line separator.  */
201
202
28
#ifdef tc_symbol_chars
203
  /* This macro permits the processor to specify all characters which
204
     may appears in an operand.  This will prevent the scrubber from
205
     discarding meaningful whitespace in certain cases.  The i386
206
     backend uses this to support prefixes, which can confuse the
207
     scrubber as to whether it is parsing operands or opcodes.  */
208
168
  for (p = tc_symbol_chars; *p; ++p)
209
140
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210
28
#endif
211
212
  /* The m68k backend wants to be able to change comment_chars.  */
213
#ifndef tc_comment_chars
214
#define tc_comment_chars comment_chars
215
#endif
216
56
  for (p = tc_comment_chars; *p; p++)
217
28
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
218
219
  /* While counter intuitive to have more special purpose line comment chars
220
     override more general purpose ordinary ones, logic in do_scrub_chars()
221
     depends on this ordering.   */
222
84
  for (p = line_comment_chars; *p; p++)
223
56
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
224
225
28
#ifndef tc_line_separator_chars
226
28
#define tc_line_separator_chars line_separator_chars
227
28
#endif
228
56
  for (p = tc_line_separator_chars; *p; p++)
229
28
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
230
231
#ifdef tc_parallel_separator_chars
232
  /* This macro permits the processor to specify all characters which
233
     separate parallel insns on the same line.  */
234
  for (p = tc_parallel_separator_chars; *p; p++)
235
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236
#endif
237
238
  /* Only allow slash-star comments if slash is not in use.  Certain
239
     other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240
     FIXME: This isn't right.  We should always permit them.  */
241
28
  if (lex['/'] == 0)
242
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
243
244
#ifdef TC_M68K
245
  if (m68k_mri)
246
    {
247
      lex['\''] = LEX_IS_STRINGQUOTE;
248
      lex[';'] = LEX_IS_COMMENT_START;
249
      lex['*'] = LEX_IS_LINE_COMMENT_START;
250
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251
   then it can't be used in an expression.  */
252
      lex['!'] = LEX_IS_LINE_COMMENT_START;
253
    }
254
#endif
255
256
#ifdef TC_V850
257
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258
#endif
259
#ifdef DOUBLEBAR_PARALLEL
260
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261
#endif
262
#ifdef TC_D30V
263
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
264
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
265
#endif
266
267
#ifdef H_TICK_HEX
268
  if (enable_h_tick_hex)
269
    {
270
      lex['h'] = LEX_IS_H;
271
      lex['H'] = LEX_IS_H;
272
    }
273
#endif
274
28
}
275
276
/* Saved state of the scrubber.  */
277
static int state;
278
static int old_state;
279
static const char *out_string;
280
static char out_buf[20];
281
static int add_newlines;
282
static char *saved_input;
283
static size_t saved_input_len;
284
static char input_buffer[32 * 1024];
285
286
/* Data structure for saving the state of app across #include's.  Note that
287
   app is called asynchronously to the parsing of the .include's, so our
288
   state at the time .include is interpreted is completely unrelated.
289
   That's why we have to save it all.  */
290
291
struct app_save
292
{
293
  int          state;
294
  int          old_state;
295
  const char * out_string;
296
  char         out_buf[sizeof (out_buf)];
297
  int          add_newlines;
298
  char *       saved_input;
299
  size_t       saved_input_len;
300
  const char * end_state;
301
#ifdef TC_M68K
302
  int          scrub_m68k_mri;
303
  const char * mri_state;
304
  char         mri_last_ch;
305
#endif
306
#if defined TC_ARM && defined OBJ_ELF
307
  const char * symver_state;
308
#endif
309
  char         last_char;
310
};
311
312
char *
313
app_push (void)
314
1.38k
{
315
1.38k
  struct app_save *saved;
316
317
1.38k
  saved = XNEW (struct app_save);
318
1.38k
  saved->state = state;
319
1.38k
  saved->old_state = old_state;
320
1.38k
  saved->out_string = out_string;
321
1.38k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
322
1.38k
  saved->add_newlines = add_newlines;
323
1.38k
  if (saved_input == NULL)
324
1.04k
    saved->saved_input = NULL;
325
337
  else
326
337
    {
327
337
      saved->saved_input = XNEWVEC (char, saved_input_len);
328
337
      memcpy (saved->saved_input, saved_input, saved_input_len);
329
337
      saved->saved_input_len = saved_input_len;
330
337
    }
331
1.38k
  saved->end_state = end_state;
332
#ifdef TC_M68K
333
  saved->scrub_m68k_mri = scrub_m68k_mri;
334
  saved->mri_state = mri_state;
335
  saved->mri_last_ch = mri_last_ch;
336
#endif
337
#if defined TC_ARM && defined OBJ_ELF
338
  saved->symver_state = symver_state;
339
#endif
340
1.38k
  saved->last_char = last_char;
341
342
  /* do_scrub_begin() is not useful, just wastes time.  */
343
344
1.38k
  state = 0;
345
1.38k
  saved_input = NULL;
346
1.38k
  add_newlines = 0;
347
348
1.38k
  return (char *) saved;
349
1.38k
}
350
351
void
352
app_pop (char *arg)
353
1.38k
{
354
1.38k
  struct app_save *saved = (struct app_save *) arg;
355
356
  /* There is no do_scrub_end ().  */
357
1.38k
  state = saved->state;
358
1.38k
  old_state = saved->old_state;
359
1.38k
  out_string = saved->out_string;
360
1.38k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
361
1.38k
  add_newlines = saved->add_newlines;
362
1.38k
  if (saved->saved_input == NULL)
363
1.04k
    saved_input = NULL;
364
337
  else
365
337
    {
366
337
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
367
337
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
368
337
      saved_input = input_buffer;
369
337
      saved_input_len = saved->saved_input_len;
370
337
      free (saved->saved_input);
371
337
    }
372
1.38k
  end_state = saved->end_state;
373
#ifdef TC_M68K
374
  scrub_m68k_mri = saved->scrub_m68k_mri;
375
  mri_state = saved->mri_state;
376
  mri_last_ch = saved->mri_last_ch;
377
#endif
378
#if defined TC_ARM && defined OBJ_ELF
379
  symver_state = saved->symver_state;
380
#endif
381
1.38k
  last_char = saved->last_char;
382
383
1.38k
  free (arg);
384
1.38k
}
385
386
/* @@ This assumes that \n &c are the same on host and target.  This is not
387
   necessarily true.  */
388
389
static int
390
process_escape (int ch)
391
127
{
392
127
  switch (ch)
393
127
    {
394
1
    case 'b':
395
1
      return '\b';
396
10
    case 'f':
397
10
      return '\f';
398
0
    case 'n':
399
0
      return '\n';
400
0
    case 'r':
401
0
      return '\r';
402
0
    case 't':
403
0
      return '\t';
404
82
    case '\'':
405
82
      return '\'';
406
0
    case '"':
407
0
      return '\"';
408
34
    default:
409
34
      return ch;
410
127
    }
411
127
}
412
413
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
414
static unsigned int multibyte_warn_count = 0;
415
416
bool
417
scan_for_multibyte_characters (const unsigned char *  start,
418
             const unsigned char *  end,
419
             bool                   warn)
420
0
{
421
0
  if (end <= start)
422
0
    return false;
423
424
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
425
0
    return false;
426
427
0
  bool found = false;
428
429
0
  while (start < end)
430
0
    {
431
0
      unsigned char c;
432
433
0
      if ((c = * start++) <= 0x7f)
434
0
  continue;
435
436
0
      if (!warn)
437
0
  return true;
438
439
0
      found = true;
440
441
0
      const char * filename;
442
0
      unsigned int lineno;
443
444
0
      filename = as_where (& lineno);
445
0
      if (filename == NULL)
446
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
447
0
      else if (lineno == 0)
448
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
449
0
      else
450
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
451
452
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
453
0
  {
454
0
    as_warn (_("further multibyte character warnings suppressed"));
455
0
    break;
456
0
  }
457
0
    }
458
459
0
  return found;
460
0
}
461
462
/* This function is called to process input characters.  The GET
463
   parameter is used to retrieve more input characters.  GET should
464
   set its parameter to point to a buffer, and return the length of
465
   the buffer; it should return 0 at end of file.  The scrubbed output
466
   characters are put into the buffer starting at TOSTART; the TOSTART
467
   buffer is TOLEN bytes in length.  The function returns the number
468
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
469
   end of file was seen.  This function is arranged as a state
470
   machine, and saves its state so that it may return at any point.
471
   This is the way the old code used to work.  */
472
473
size_t
474
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
475
    bool check_multibyte)
476
1.89k
{
477
1.89k
  char *to = tostart;
478
1.89k
  char *toend = tostart + tolen;
479
1.89k
  char *from;
480
1.89k
  char *fromend;
481
1.89k
  size_t fromlen;
482
1.89k
  int ch, ch2 = 0;
483
  /* Character that started the string we're working on.  */
484
1.89k
  static char quotechar;
485
486
  /*State 0: beginning of normal line
487
    1: After first whitespace on line (flush more white)
488
    2: After first non-white (opcode) on line (keep 1white)
489
    3: after second white on line (into operands) (flush white)
490
    4: after putting out a .linefile, put out digits
491
    5: parsing a string, then go to old-state
492
    6: putting out \ escape in a "d string.
493
    7: no longer used
494
    8: no longer used
495
    9: After seeing symbol char in state 3 (keep 1white after symchar)
496
   10: After seeing whitespace in state 9 (keep white before symchar)
497
   11: After seeing a symbol character in state 0 (eg a label definition)
498
   -1: output string in out_string and go to the state in old_state
499
   12: no longer used
500
#ifdef DOUBLEBAR_PARALLEL
501
   13: After seeing a vertical bar, looking for a second
502
       vertical bar as a parallel expression separator.
503
#endif
504
#ifdef TC_PREDICATE_START_CHAR
505
   14: After seeing a predicate start character at state 0, looking
506
       for a predicate end character as predicate.
507
   15: After seeing a predicate start character at state 1, looking
508
       for a predicate end character as predicate.
509
#endif
510
#ifdef TC_Z80
511
   16: After seeing an 'a' or an 'A' at the start of a symbol
512
   17: After seeing an 'f' or an 'F' in state 16
513
#endif
514
    */
515
516
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
517
     constructs like ``.loc 1 20''.  This was turning into ``.loc
518
     120''.  States 9 and 10 ensure that a space is never dropped in
519
     between characters which could appear in an identifier.  Ian
520
     Taylor, ian@cygnus.com.
521
522
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
523
     correctly on the PA (and any other target where colons are optional).
524
     Jeff Law, law@cs.utah.edu.
525
526
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
527
     get squashed into "cmp r1,r2||trap#1", with the all important space
528
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
529
530
  /* This macro gets the next input character.  */
531
532
1.89k
#define GET()             \
533
12.6M
  (from < fromend            \
534
12.6M
   ? * (unsigned char *) (from++)        \
535
12.6M
   : (saved_input = NULL,          \
536
2.54k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
537
2.54k
      from = input_buffer,          \
538
2.54k
      fromend = from + fromlen,         \
539
2.54k
      (fromlen == 0            \
540
2.54k
       ? EOF              \
541
2.54k
       : * (unsigned char *) (from++))))
542
543
  /* This macro pushes a character back on the input stream.  */
544
545
437k
#define UNGET(uch) (*--from = (uch))
546
547
  /* This macro puts a character into the output buffer.  If this
548
     character fills the output buffer, this macro jumps to the label
549
     TOFULL.  We use this rather ugly approach because we need to
550
     handle two different termination conditions: EOF on the input
551
     stream, and a full output buffer.  It would be simpler if we
552
     always read in the entire input stream before processing it, but
553
     I don't want to make such a significant change to the assembler's
554
     memory usage.  */
555
556
1.89k
#define PUT(pch)        \
557
6.80M
  do            \
558
6.80M
    {           \
559
6.80M
      *to++ = (pch);        \
560
6.80M
      if (to >= toend)       \
561
6.80M
  goto tofull;       \
562
6.80M
    }           \
563
6.80M
  while (0)
564
565
1.89k
  if (saved_input != NULL)
566
612
    {
567
612
      from = saved_input;
568
612
      fromend = from + saved_input_len;
569
612
    }
570
1.28k
  else
571
1.28k
    {
572
1.28k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
573
1.28k
      if (fromlen == 0)
574
27
  return 0;
575
1.25k
      from = input_buffer;
576
1.25k
      fromend = from + fromlen;
577
578
1.25k
      if (check_multibyte)
579
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
580
0
                (const unsigned char* ) fromend,
581
0
                true /* Generate warnings.  */);
582
1.25k
    }
583
584
6.08M
  while (1)
585
6.08M
    {
586
      /* The cases in this switch end with continue, in order to
587
   branch back to the top of this while loop and generate the
588
   next output character in the appropriate state.  */
589
6.08M
      switch (state)
590
6.08M
  {
591
43.4k
  case -1:
592
43.4k
    ch = *out_string++;
593
43.4k
    if (*out_string == '\0')
594
19.2k
      {
595
19.2k
        state = old_state;
596
19.2k
        old_state = 3;
597
19.2k
      }
598
43.4k
    PUT (ch);
599
43.4k
    continue;
600
601
43.4k
  case 4:
602
6.19k
    ch = GET ();
603
6.19k
    if (ch == EOF)
604
0
      goto fromeof;
605
6.19k
    else if (ch >= '0' && ch <= '9')
606
4.10k
      PUT (ch);
607
2.09k
    else
608
2.09k
      {
609
2.17k
        while (ch != EOF && IS_WHITESPACE (ch))
610
87
    ch = GET ();
611
2.09k
        if (ch == '"')
612
1.66k
    {
613
1.66k
      quotechar = ch;
614
1.66k
      state = 5;
615
1.66k
      old_state = 3;
616
1.66k
      PUT (ch);
617
1.66k
    }
618
425
        else
619
425
    {
620
15.2k
      while (ch != EOF && ch != '\n')
621
14.8k
        ch = GET ();
622
425
      state = 0;
623
425
      PUT (ch);
624
425
    }
625
2.09k
      }
626
6.19k
    continue;
627
628
890k
  case 5:
629
    /* We are going to copy everything up to a quote character,
630
       with special handling for a backslash.  We try to
631
       optimize the copying in the simple case without using the
632
       GET and PUT macros.  */
633
890k
    {
634
890k
      char *s;
635
890k
      ptrdiff_t len;
636
637
14.6M
      for (s = from; s < fromend; s++)
638
14.6M
        {
639
14.6M
    ch = *s;
640
14.6M
    if (ch == '\\'
641
14.6M
        || ch == quotechar
642
14.6M
        || ch == '\n')
643
890k
      break;
644
14.6M
        }
645
890k
      len = s - from;
646
890k
      if (len > toend - to)
647
209
        len = toend - to;
648
890k
      if (len > 0)
649
221k
        {
650
221k
    memcpy (to, from, len);
651
221k
    to += len;
652
221k
    from += len;
653
221k
    if (to >= toend)
654
243
      goto tofull;
655
221k
        }
656
890k
    }
657
658
890k
    ch = GET ();
659
890k
    if (ch == EOF)
660
212
      {
661
        /* This buffer is here specifically so
662
     that the UNGET below will work.  */
663
212
        static char one_char_buf[1];
664
665
212
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
666
212
        state = old_state;
667
212
        from = fromend = one_char_buf + 1;
668
212
        fromlen = 1;
669
212
        UNGET ('\n');
670
212
        PUT (quotechar);
671
212
      }
672
890k
    else if (ch == quotechar)
673
452k
      {
674
452k
        state = old_state;
675
452k
        PUT (ch);
676
452k
      }
677
437k
    else if (TC_STRING_ESCAPES && ch == '\\')
678
22.3k
      {
679
22.3k
        state = 6;
680
22.3k
        PUT (ch);
681
22.3k
      }
682
415k
    else if (scrub_m68k_mri && ch == '\n')
683
0
      {
684
        /* Just quietly terminate the string.  This permits lines like
685
       bne  label loop if we haven't reach end yet.  */
686
0
        state = old_state;
687
0
        UNGET (ch);
688
0
        PUT ('\'');
689
0
      }
690
415k
    else
691
415k
      {
692
415k
        PUT (ch);
693
415k
      }
694
890k
    continue;
695
696
890k
  case 6:
697
22.3k
    state = 5;
698
22.3k
    ch = GET ();
699
22.3k
    switch (ch)
700
22.3k
      {
701
        /* Handle strings broken across lines, by turning '\n' into
702
     '\\' and 'n'.  */
703
368
      case '\n':
704
368
        UNGET ('n');
705
368
        add_newlines++;
706
368
        PUT ('\\');
707
368
        continue;
708
709
368
      case EOF:
710
0
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
711
0
        PUT (quotechar);
712
0
        continue;
713
714
        /* These two are used inside macros.  */
715
31
      case '@':
716
82
      case '+':
717
82
        break;
718
719
167
      case '"':
720
3.11k
      case '\\':
721
3.27k
      case 'b':
722
3.40k
      case 'f':
723
4.07k
      case 'n':
724
4.32k
      case 'r':
725
4.40k
      case 't':
726
4.45k
      case 'v':
727
4.54k
      case 'x':
728
4.57k
      case 'X':
729
4.68k
      case '0':
730
4.73k
      case '1':
731
4.88k
      case '2':
732
5.49k
      case '3':
733
5.57k
      case '4':
734
5.60k
      case '5':
735
5.64k
      case '6':
736
5.69k
      case '7':
737
5.69k
        break;
738
739
16.2k
      default:
740
#ifdef ONLY_STANDARD_ESCAPES
741
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
742
#endif
743
16.2k
        break;
744
22.3k
      }
745
21.9k
    PUT (ch);
746
21.9k
    continue;
747
748
#ifdef DOUBLEBAR_PARALLEL
749
  case 13:
750
    ch = GET ();
751
    if (ch != '|')
752
      abort ();
753
754
    /* Reset back to state 1 and pretend that we are parsing a
755
       line from just after the first white space.  */
756
    state = 1;
757
    PUT ('|');
758
    continue;
759
#endif
760
#ifdef TC_Z80
761
  case 16:
762
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
763
    ch = GET ();
764
    if (ch == 'f' || ch == 'F')
765
      {
766
        state = 17;
767
        PUT (ch);
768
      }
769
    else
770
      {
771
        if (ch != EOF)
772
    UNGET (ch);
773
        state = 9;
774
        break;
775
      }
776
    /* Fall through.  */
777
  case 17:
778
    /* We have seen "af" at the start of a symbol,
779
       a ' here is a part of that symbol.  */
780
    ch = GET ();
781
    state = 9;
782
    if (ch == '\'')
783
      /* Change to avoid warning about unclosed string.  */
784
      PUT ('`');
785
    else if (ch != EOF)
786
      UNGET (ch);
787
    break;
788
#endif
789
6.08M
  }
790
791
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
792
793
      /* flushchar: */
794
5.12M
      ch = GET ();
795
796
#ifdef TC_PREDICATE_START_CHAR
797
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
798
  {
799
    state += 14;
800
    PUT (ch);
801
    continue;
802
  }
803
      else if (state == 14 || state == 15)
804
  {
805
    if (ch == TC_PREDICATE_END_CHAR)
806
      {
807
        state -= 14;
808
        PUT (ch);
809
        ch = GET ();
810
      }
811
    else
812
      {
813
        PUT (ch);
814
        continue;
815
      }
816
  }
817
#endif
818
819
5.36M
    recycle:
820
821
      /* We need to watch out for .end directives: We should in particular not
822
   issue diagnostics for anything after an active one.  */
823
5.36M
      if (ch == EOF)
824
1.15k
  end_state = NULL;
825
5.36M
      else if (end_state == NULL)
826
5.24M
  {
827
5.24M
    if ((state == 0 || state == 1)
828
5.24M
        && (ch == '.'
829
824k
      || (no_pseudo_dot && ch == end_pseudo[0])))
830
101k
      end_state = end_pseudo + (ch != '.');
831
5.24M
  }
832
122k
      else if (ch != '\0'
833
122k
         && (*end_state == ch
834
       /* Avoid triggering on directives like .endif or .endr.  */
835
121k
       || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
836
20.6k
  {
837
20.6k
    if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
838
0
      goto end_end;
839
20.6k
    ++end_state;
840
20.6k
  }
841
101k
      else if (*end_state != '\0')
842
  /* We did not get the expected character, or we didn't
843
     get a valid terminating character after seeing the
844
     entire pseudo-op, so we must go back to the beginning.  */
845
101k
  end_state = NULL;
846
53
      else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
847
53
  {
848
53
  end_end:
849
    /* We've read the entire pseudo-op.  If this is the end of the line,
850
       bail out now by (ab)using the output-full path.  This allows the
851
       caller to process input up to here and terminate processing if this
852
       directive is actually active (not on the false branch of a
853
       conditional and not in a macro definition).  */
854
53
    end_state = NULL;
855
53
    state = 0;
856
53
    PUT (ch);
857
53
    goto tofull;
858
53
  }
859
860
#if defined TC_ARM && defined OBJ_ELF
861
      /* We need to watch out for .symver directives.  See the comment later
862
   in this function.  */
863
      if (ch == EOF)
864
  symver_state = NULL;
865
      else if (symver_state == NULL)
866
  {
867
    if ((state == 0 || state == 1)
868
        && strchr (tc_comment_chars, '@') != NULL
869
        && ch == symver_pseudo[0])
870
      symver_state = symver_pseudo + 1;
871
  }
872
      else
873
  {
874
    /* We advance to the next state if we find the right
875
       character.  */
876
    if (ch != '\0' && (*symver_state == ch))
877
      ++symver_state;
878
    else if (*symver_state != '\0')
879
      /* We did not get the expected character, or we didn't
880
         get a valid terminating character after seeing the
881
         entire pseudo-op, so we must go back to the beginning.  */
882
      symver_state = NULL;
883
    else
884
      {
885
        /* We've read the entire pseudo-op.  If this is the end
886
     of the line, go back to the beginning.  */
887
        if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
888
    symver_state = NULL;
889
      }
890
  }
891
#endif /* TC_ARM && OBJ_ELF */
892
893
#ifdef TC_M68K
894
      /* We want to have pseudo-ops which control whether we are in
895
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
896
   the scrubber, that means that we need a special purpose
897
   recognizer here.  */
898
      if (ch == EOF)
899
  mri_state = NULL;
900
      else if (mri_state == NULL)
901
  {
902
    if ((state == 0 || state == 1)
903
        && ch == mri_pseudo[0])
904
      mri_state = mri_pseudo + 1;
905
  }
906
      else
907
  {
908
    /* We advance to the next state if we find the right
909
       character, or if we need a space character and we get any
910
       whitespace character, or if we need a '0' and we get a
911
       '1' (this is so that we only need one state to handle
912
       ``.mri 0'' and ``.mri 1'').  */
913
    if (ch != '\0'
914
        && (*mri_state == ch
915
      || (*mri_state == ' '
916
          && IS_WHITESPACE (ch))
917
      || (*mri_state == '0'
918
          && ch == '1')))
919
      {
920
        mri_last_ch = ch;
921
        ++mri_state;
922
      }
923
    else if (*mri_state != '\0'
924
       || (!IS_WHITESPACE (ch)
925
           && !IS_LINE_SEPARATOR (ch)
926
           && !IS_NEWLINE (ch)))
927
      {
928
        /* We did not get the expected character, or we didn't
929
     get a valid terminating character after seeing the
930
     entire pseudo-op, so we must go back to the
931
     beginning.  */
932
        mri_state = NULL;
933
      }
934
    else
935
      {
936
        /* We've read the entire pseudo-op.  mri_last_ch is
937
     either '0' or '1' indicating whether to enter or
938
     leave MRI mode.  */
939
        do_scrub_begin (mri_last_ch == '1');
940
        mri_state = NULL;
941
942
        /* We continue handling the character as usual.  The
943
     main gas reader must also handle the .mri pseudo-op
944
     to control expression parsing and the like.  */
945
      }
946
  }
947
#endif
948
949
5.36M
      if (ch == EOF)
950
1.15k
  {
951
1.15k
    if (state != 0)
952
18
      {
953
18
        as_warn (_("end of file not at end of a line; newline inserted"));
954
18
        state = 0;
955
18
        PUT ('\n');
956
18
      }
957
1.15k
    goto fromeof;
958
1.15k
  }
959
960
5.36M
      switch (lex[ch])
961
5.36M
  {
962
457k
  case LEX_IS_WHITESPACE:
963
457k
    do
964
495k
      {
965
495k
        ch = GET ();
966
495k
      }
967
495k
    while (ch != EOF && IS_WHITESPACE (ch));
968
457k
    if (ch == EOF)
969
1
      goto fromeof;
970
971
457k
    if (state == 0)
972
83.4k
      {
973
        /* Preserve a single whitespace character at the
974
     beginning of a line.  */
975
83.4k
        state = 1;
976
83.4k
        UNGET (ch);
977
83.4k
        PUT (' ');
978
83.4k
        break;
979
83.4k
      }
980
981
#ifdef KEEP_WHITE_AROUND_COLON
982
    if (lex[ch] == LEX_IS_COLON)
983
      {
984
        /* Only keep this white if there's no white *after* the
985
     colon.  */
986
        ch2 = GET ();
987
        if (ch2 != EOF)
988
    UNGET (ch2);
989
        if (!IS_WHITESPACE (ch2))
990
    {
991
      state = 9;
992
      UNGET (ch);
993
      PUT (' ');
994
      break;
995
    }
996
      }
997
#endif
998
999
    /* Prune trailing whitespace.  */
1000
373k
    if (IS_COMMENT (ch)
1001
373k
        || (IS_LINE_COMMENT (ch)
1002
373k
            && (state < 1 || strchr (tc_comment_chars, ch)))
1003
373k
        || IS_NEWLINE (ch)
1004
373k
        || IS_LINE_SEPARATOR (ch)
1005
373k
        || IS_PARALLEL_SEPARATOR (ch))
1006
8.38k
      {
1007
8.38k
        if (scrub_m68k_mri)
1008
0
    {
1009
      /* In MRI mode, we keep these spaces.  */
1010
0
      UNGET (ch);
1011
0
      PUT (' ');
1012
0
      break;
1013
0
    }
1014
8.38k
        goto recycle;
1015
8.38k
      }
1016
#ifdef DOUBLESLASH_LINE_COMMENTS
1017
    if (IS_TWOCHAR_COMMENT_1ST (ch))
1018
      {
1019
        ch2 = GET ();
1020
        if (ch2 != EOF)
1021
          UNGET (ch2);
1022
        if (ch2 == '/')
1023
    goto recycle;
1024
      }
1025
#endif
1026
1027
    /* If we're in state 2 or 11, we've seen a non-white
1028
       character followed by whitespace.  If the next character
1029
       is ':', this is whitespace after a label name which we
1030
       normally must ignore.  In MRI mode, though, spaces are
1031
       not permitted between the label and the colon.  */
1032
365k
    if ((state == 2 || state == 11)
1033
365k
        && lex[ch] == LEX_IS_COLON
1034
365k
        && ! scrub_m68k_mri)
1035
131
      {
1036
131
        state = 1;
1037
131
        PUT (ch);
1038
131
        break;
1039
131
      }
1040
1041
365k
    switch (state)
1042
365k
      {
1043
1.10k
      case 1:
1044
        /* We can arrive here if we leave a leading whitespace
1045
     character at the beginning of a line.  */
1046
1.10k
        goto recycle;
1047
49.5k
      case 2:
1048
49.5k
        state = 3;
1049
49.5k
        if (to + 1 < toend)
1050
49.5k
    {
1051
      /* Optimize common case by skipping UNGET/GET.  */
1052
49.5k
      PUT (' '); /* Sp after opco */
1053
49.5k
      goto recycle;
1054
49.5k
    }
1055
0
        UNGET (ch);
1056
0
        PUT (' ');
1057
0
        break;
1058
14.6k
      case 3:
1059
14.6k
#ifndef TC_KEEP_OPERAND_SPACES
1060
        /* For TI C6X, we keep these spaces as they may separate
1061
     functional unit specifiers from operands.  */
1062
14.6k
        if (scrub_m68k_mri)
1063
0
#endif
1064
0
    {
1065
      /* In MRI mode, we keep these spaces.  */
1066
0
      UNGET (ch);
1067
0
      PUT (' ');
1068
0
      break;
1069
0
    }
1070
14.6k
        goto recycle; /* Sp in operands */
1071
165k
      case 9:
1072
165k
      case 10:
1073
165k
#ifndef TC_KEEP_OPERAND_SPACES
1074
165k
        if (scrub_m68k_mri)
1075
0
#endif
1076
0
    {
1077
      /* In MRI mode, we keep these spaces.  */
1078
0
      state = 3;
1079
0
      UNGET (ch);
1080
0
      PUT (' ');
1081
0
      break;
1082
0
    }
1083
165k
        state = 10; /* Sp after symbol char */
1084
165k
        goto recycle;
1085
134k
      case 11:
1086
134k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1087
0
    state = 1;
1088
134k
        else
1089
134k
    {
1090
      /* We know that ch is not ':', since we tested that
1091
         case above.  Therefore this is not a label, so it
1092
         must be the opcode, and we've just seen the
1093
         whitespace after it.  */
1094
134k
      state = 3;
1095
134k
    }
1096
134k
        UNGET (ch);
1097
134k
        PUT (' '); /* Sp after label definition.  */
1098
134k
        break;
1099
134k
      default:
1100
0
        BAD_CASE (state);
1101
365k
      }
1102
134k
    break;
1103
1104
134k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1105
0
    ch2 = GET ();
1106
0
    if (ch2 == '*')
1107
0
      {
1108
375
  twochar_comment:
1109
375
        for (;;)
1110
5.94k
    {
1111
5.94k
      do
1112
1.38M
        {
1113
1.38M
          ch2 = GET ();
1114
1.38M
          if (ch2 != EOF && IS_NEWLINE (ch2))
1115
6.86k
      add_newlines++;
1116
1.38M
        }
1117
1.38M
      while (ch2 != EOF && ch2 != '*');
1118
1119
11.9k
      while (ch2 == '*')
1120
5.99k
        ch2 = GET ();
1121
1122
5.94k
      if (ch2 == EOF || ch2 == '/')
1123
375
        break;
1124
1125
      /* This UNGET will ensure that we count newlines
1126
         correctly.  */
1127
5.56k
      UNGET (ch2);
1128
5.56k
    }
1129
1130
375
        if (ch2 == EOF)
1131
1
    as_warn (_("end of file in multiline comment"));
1132
1133
375
        ch = ' ';
1134
375
        goto recycle;
1135
0
      }
1136
#ifdef DOUBLESLASH_LINE_COMMENTS
1137
    else if (ch2 == '/')
1138
      {
1139
        do
1140
    {
1141
      ch = GET ();
1142
    }
1143
        while (ch != EOF && !IS_NEWLINE (ch));
1144
        if (ch == EOF)
1145
    as_warn ("end of file in comment; newline inserted");
1146
        state = 0;
1147
        PUT ('\n');
1148
        break;
1149
      }
1150
#endif
1151
0
    else
1152
0
      {
1153
0
        if (ch2 != EOF)
1154
0
    UNGET (ch2);
1155
0
        if (state == 9 || state == 10)
1156
0
    state = 3;
1157
0
        PUT (ch);
1158
0
      }
1159
0
    break;
1160
1161
451k
  case LEX_IS_STRINGQUOTE:
1162
451k
    quotechar = ch;
1163
451k
    if (state == 10)
1164
834
      {
1165
        /* Preserve the whitespace in foo "bar".  */
1166
834
        UNGET (ch);
1167
834
        state = 3;
1168
834
        PUT (' ');
1169
1170
        /* PUT didn't jump out.  We could just break, but we
1171
     know what will happen, so optimize a bit.  */
1172
834
        ch = GET ();
1173
834
        old_state = 9;
1174
834
      }
1175
450k
    else if (state == 3)
1176
6.96k
      old_state = 9;
1177
443k
    else if (state == 0)
1178
6.29k
      old_state = 11; /* Now seeing label definition.  */
1179
437k
    else
1180
437k
      old_state = state;
1181
451k
    state = 5;
1182
451k
    PUT (ch);
1183
451k
    break;
1184
1185
451k
  case LEX_IS_ONECHAR_QUOTE:
1186
#ifdef H_TICK_HEX
1187
    if (state == 9 && enable_h_tick_hex)
1188
      {
1189
        char c;
1190
1191
        c = GET ();
1192
        as_warn ("'%c found after symbol", c);
1193
        UNGET (c);
1194
      }
1195
#endif
1196
18.2k
    if (state == 10)
1197
132
      {
1198
        /* Preserve the whitespace in foo 'b'.  */
1199
132
        UNGET (ch);
1200
132
        state = 3;
1201
132
        PUT (' ');
1202
132
        break;
1203
132
      }
1204
18.1k
    ch = GET ();
1205
18.1k
    if (ch == EOF)
1206
1
      {
1207
1
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1208
1
        ch = 0;
1209
1
      }
1210
18.1k
    if (ch == '\\')
1211
127
      {
1212
127
        ch = GET ();
1213
127
        if (ch == EOF)
1214
0
    {
1215
0
      as_warn (_("end of file in escape character"));
1216
0
      ch = '\\';
1217
0
    }
1218
127
        else
1219
127
    ch = process_escape (ch);
1220
127
      }
1221
18.1k
    sprintf (out_buf, "%d", (int) (unsigned char) ch);
1222
1223
    /* None of these 'x constants for us.  We want 'x'.  */
1224
18.1k
    if ((ch = GET ()) != '\'')
1225
17.3k
      {
1226
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1227
        as_warn (_("missing close quote; (assumed)"));
1228
#else
1229
17.3k
        if (ch != EOF)
1230
17.3k
    UNGET (ch);
1231
17.3k
#endif
1232
17.3k
      }
1233
18.1k
    if (strlen (out_buf) == 1)
1234
1.01k
      {
1235
1.01k
        PUT (out_buf[0]);
1236
1.01k
        break;
1237
1.01k
      }
1238
17.1k
    if (state == 9)
1239
8.01k
      old_state = 3;
1240
9.10k
    else
1241
9.10k
      old_state = state;
1242
17.1k
    state = -1;
1243
17.1k
    out_string = out_buf;
1244
17.1k
    PUT (*out_string++);
1245
17.1k
    break;
1246
1247
35.3k
  case LEX_IS_COLON:
1248
#ifdef KEEP_WHITE_AROUND_COLON
1249
    state = 9;
1250
#else
1251
35.3k
    if (state == 9 || state == 10)
1252
8.93k
      state = 3;
1253
26.3k
    else if (state != 3)
1254
23.6k
      state = 1;
1255
35.3k
#endif
1256
35.3k
    PUT (ch);
1257
35.3k
    break;
1258
1259
617k
  case LEX_IS_NEWLINE:
1260
    /* Roll out a bunch of newlines from inside comments, etc.  */
1261
617k
    if (add_newlines)
1262
7.23k
      {
1263
7.23k
        --add_newlines;
1264
7.23k
        UNGET (ch);
1265
7.23k
      }
1266
    /* Fall through.  */
1267
1268
695k
  case LEX_IS_LINE_SEPARATOR:
1269
695k
    state = 0;
1270
695k
    PUT (ch);
1271
695k
    break;
1272
1273
695k
  case LEX_IS_PARALLEL_SEPARATOR:
1274
0
    state = 1;
1275
0
    PUT (ch);
1276
0
    break;
1277
1278
#ifdef TC_V850
1279
  case LEX_IS_DOUBLEDASH_1ST:
1280
    ch2 = GET ();
1281
    if (ch2 != '-')
1282
      {
1283
        if (ch2 != EOF)
1284
    UNGET (ch2);
1285
        goto de_fault;
1286
      }
1287
    /* Read and skip to end of line.  */
1288
    do
1289
      {
1290
        ch = GET ();
1291
      }
1292
    while (ch != EOF && ch != '\n');
1293
1294
    if (ch == EOF)
1295
      as_warn (_("end of file in comment; newline inserted"));
1296
1297
    state = 0;
1298
    PUT ('\n');
1299
    break;
1300
#endif
1301
#ifdef DOUBLEBAR_PARALLEL
1302
  case LEX_IS_DOUBLEBAR_1ST:
1303
    ch2 = GET ();
1304
    if (ch2 != EOF)
1305
      UNGET (ch2);
1306
    if (ch2 != '|')
1307
      goto de_fault;
1308
1309
    /* Handle '||' in two states as invoking PUT twice might
1310
       result in the first one jumping out of this loop.  We'd
1311
       then lose track of the state and one '|' char.  */
1312
    state = 13;
1313
    PUT ('|');
1314
    break;
1315
#endif
1316
46.9k
  case LEX_IS_LINE_COMMENT_START:
1317
    /* FIXME-someday: The two character comment stuff was badly
1318
       thought out.  On i386, we want '/' as line comment start
1319
       AND we want C style comments.  hence this hack.  The
1320
       whole lexical process should be reworked.  xoxorich.  */
1321
46.9k
    if (ch == '/')
1322
26.6k
      {
1323
26.6k
        ch2 = GET ();
1324
26.6k
        if (ch2 == '*')
1325
375
    goto twochar_comment;
1326
26.2k
        if (ch2 != EOF)
1327
26.2k
    UNGET (ch2);
1328
26.2k
      }
1329
1330
46.5k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1331
5.92k
      {
1332
5.92k
        int startch;
1333
1334
5.92k
        startch = ch;
1335
1336
5.92k
        do
1337
5.93k
    {
1338
5.93k
      ch = GET ();
1339
5.93k
    }
1340
5.93k
        while (ch != EOF && IS_WHITESPACE (ch));
1341
1342
5.92k
        if (ch == EOF)
1343
0
    {
1344
0
      as_warn (_("end of file in comment; newline inserted"));
1345
0
      PUT ('\n');
1346
0
      break;
1347
0
    }
1348
1349
5.92k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1350
3.83k
    {
1351
      /* Not a cpp line.  */
1352
283k
      while (ch != EOF && !IS_NEWLINE (ch))
1353
280k
        ch = GET ();
1354
3.83k
      if (ch == EOF)
1355
0
        {
1356
0
          as_warn (_("end of file in comment; newline inserted"));
1357
0
          PUT ('\n');
1358
0
        }
1359
3.83k
      else /* IS_NEWLINE (ch) */
1360
3.83k
        {
1361
          /* To process non-zero add_newlines.  */
1362
3.83k
          UNGET (ch);
1363
3.83k
        }
1364
3.83k
      state = 0;
1365
3.83k
      break;
1366
3.83k
    }
1367
        /* Looks like `# 123 "filename"' from cpp.  */
1368
2.09k
        UNGET (ch);
1369
2.09k
        old_state = 4;
1370
2.09k
        state = -1;
1371
2.09k
        if (scrub_m68k_mri)
1372
0
    out_string = "\tlinefile ";
1373
2.09k
        else
1374
2.09k
    out_string = "\t.linefile ";
1375
2.09k
        PUT (*out_string++);
1376
2.09k
        break;
1377
2.09k
      }
1378
1379
#ifdef TC_D10V
1380
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1381
       Trap is the only short insn that has a first operand that is
1382
       neither register nor label.
1383
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1384
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1385
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1386
       only character in line_comment_chars for d10v, hence we
1387
       can recognize it as such.  */
1388
    /* An alternative approach would be to reset the state to 1 when
1389
       we see '||', '<'- or '->', but that seems to be overkill.  */
1390
    if (state == 10)
1391
      PUT (' ');
1392
#endif
1393
    /* We have a line comment character which is not at the
1394
       start of a line.  If this is also a normal comment
1395
       character, fall through.  Otherwise treat it as a default
1396
       character.  */
1397
40.6k
    if (strchr (tc_comment_chars, ch) == NULL)
1398
24.7k
      goto de_fault;
1399
15.8k
    if (scrub_m68k_mri
1400
15.8k
        && (ch == '!' || ch == '*' || ch == '#'))
1401
0
      goto de_fault;
1402
    /* Fall through.  */
1403
15.8k
  case LEX_IS_COMMENT_START:
1404
#if defined TC_ARM && defined OBJ_ELF
1405
    /* On the ARM, `@' is the comment character.
1406
       Unfortunately this is also a special character in ELF .symver
1407
       directives (and .type, though we deal with those another way).
1408
       So we check if this line is such a directive, and treat
1409
       the character as default if so.  This is a hack.  */
1410
    if ((symver_state != NULL) && (*symver_state == 0))
1411
      goto de_fault;
1412
#endif
1413
1414
    /* Care is needed not to damage occurrences of \<comment-char>
1415
       by stripping the <comment-char> onwards.  Yuck.  */
1416
15.8k
    if ((to > tostart ? to[-1] : last_char) == '\\')
1417
      /* Do not treat the <comment-char> as a start-of-comment.  */
1418
55
      goto de_fault;
1419
1420
#ifdef WARN_COMMENTS
1421
    if (!found_comment)
1422
      found_comment_file = as_where (&found_comment);
1423
#endif
1424
15.8k
    do
1425
3.68M
      {
1426
3.68M
        ch = GET ();
1427
3.68M
      }
1428
3.68M
    while (ch != EOF && !IS_NEWLINE (ch));
1429
15.8k
    if (ch == EOF)
1430
3
      as_warn (_("end of file in comment; newline inserted"));
1431
15.8k
    state = 0;
1432
15.8k
    PUT ('\n');
1433
15.8k
    break;
1434
1435
#ifdef H_TICK_HEX
1436
  case LEX_IS_H:
1437
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1438
       the H' with 0x to make them gas-style hex characters.  */
1439
    if (enable_h_tick_hex)
1440
      {
1441
        char quot;
1442
1443
        quot = GET ();
1444
        if (quot == '\'')
1445
    {
1446
      UNGET ('x');
1447
      ch = '0';
1448
    }
1449
        else
1450
    UNGET (quot);
1451
      }
1452
#endif
1453
    /* Fall through.  */
1454
1455
1.16M
  case LEX_IS_SYMBOL_COMPONENT:
1456
1.16M
    if (state == 10)
1457
155k
      {
1458
        /* This is a symbol character following another symbol
1459
     character, with whitespace in between.  We skipped
1460
     the whitespace earlier, so output it now.  */
1461
155k
        UNGET (ch);
1462
155k
        state = 3;
1463
155k
        PUT (' ');
1464
155k
        break;
1465
155k
      }
1466
1467
#ifdef TC_Z80
1468
    /* "af'" is a symbol containing '\''.  */
1469
    if (state == 3 && (ch == 'a' || ch == 'A'))
1470
      {
1471
        state = 16;
1472
        PUT (ch);
1473
        ch = GET ();
1474
        if (ch == 'f' || ch == 'F')
1475
    {
1476
      state = 17;
1477
      PUT (ch);
1478
      break;
1479
    }
1480
        else
1481
    {
1482
      state = 9;
1483
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1484
        {
1485
          if (ch != EOF)
1486
      UNGET (ch);
1487
          break;
1488
        }
1489
    }
1490
      }
1491
#endif
1492
1.00M
    if (state == 3)
1493
396k
      state = 9;
1494
1495
    /* This is a common case.  Quickly copy CH and all the
1496
       following symbol component or normal characters.  */
1497
1.00M
    if (to + 1 < toend
1498
#ifdef TC_M68K
1499
        && mri_state == NULL
1500
#endif
1501
#if defined TC_ARM && defined OBJ_ELF
1502
        && symver_state == NULL
1503
#endif
1504
1.00M
        && end_state == NULL)
1505
883k
      {
1506
883k
        char *s;
1507
883k
        ptrdiff_t len;
1508
1509
19.8M
        for (s = from; s < fromend; s++)
1510
19.8M
    {
1511
19.8M
      int type;
1512
1513
19.8M
      ch2 = *(unsigned char *) s;
1514
19.8M
      type = lex[ch2];
1515
19.8M
      if (type != 0
1516
19.8M
          && type != LEX_IS_SYMBOL_COMPONENT)
1517
883k
        break;
1518
19.8M
    }
1519
1520
883k
        if (s > from)
1521
    /* Handle the last character normally, for
1522
       simplicity.  */
1523
742k
    --s;
1524
1525
883k
        len = s - from;
1526
1527
883k
        if (len > (toend - to) - 1)
1528
210
    len = (toend - to) - 1;
1529
1530
883k
        if (len > 0)
1531
673k
    {
1532
673k
      PUT (ch);
1533
673k
      memcpy (to, from, len);
1534
673k
      to += len;
1535
673k
      from += len;
1536
673k
      if (to >= toend)
1537
215
        goto tofull;
1538
672k
      ch = GET ();
1539
672k
    }
1540
883k
      }
1541
1542
    /* Fall through.  */
1543
3.50M
  default:
1544
3.52M
  de_fault:
1545
    /* Some relatively `normal' character.  */
1546
3.52M
    if (state == 0)
1547
376k
      {
1548
376k
        state = 11; /* Now seeing label definition.  */
1549
376k
      }
1550
3.15M
    else if (state == 1)
1551
67.2k
      {
1552
67.2k
        state = 2;  /* Ditto.  */
1553
67.2k
      }
1554
3.08M
    else if (state == 9)
1555
441k
      {
1556
441k
        if (!IS_SYMBOL_COMPONENT (ch))
1557
52.3k
    state = 3;
1558
441k
      }
1559
2.64M
    else if (state == 10)
1560
8.95k
      {
1561
8.95k
        if (ch == '\\')
1562
722
    {
1563
      /* Special handling for backslash: a backslash may
1564
         be the beginning of a formal parameter (of a
1565
         macro) following another symbol character, with
1566
         whitespace in between.  If that is the case, we
1567
         output a space before the parameter.  Strictly
1568
         speaking, correct handling depends upon what the
1569
         macro parameter expands into; if the parameter
1570
         expands into something which does not start with
1571
         an operand character, then we don't want to keep
1572
         the space.  We don't have enough information to
1573
         make the right choice, so here we are making the
1574
         choice which is more likely to be correct.  */
1575
722
      if (to + 1 >= toend)
1576
0
        {
1577
          /* If we're near the end of the buffer, save the
1578
             character for the next time round.  Otherwise
1579
             we'll lose our state.  */
1580
0
          UNGET (ch);
1581
0
          goto tofull;
1582
0
        }
1583
722
      *to++ = ' ';
1584
722
    }
1585
1586
8.95k
        state = 3;
1587
8.95k
      }
1588
3.52M
    PUT (ch);
1589
3.52M
    break;
1590
5.36M
  }
1591
5.36M
    }
1592
1593
  /*NOTREACHED*/
1594
1595
1.15k
 fromeof:
1596
  /* We have reached the end of the input.  */
1597
1.15k
  if (to > tostart)
1598
1.15k
    last_char = to[-1];
1599
1.15k
  return to - tostart;
1600
1601
707
 tofull:
1602
  /* The output buffer is full.  Save any input we have not yet
1603
     processed.  */
1604
707
  if (fromend > from)
1605
612
    {
1606
612
      saved_input = from;
1607
612
      saved_input_len = fromend - from;
1608
612
    }
1609
95
  else
1610
95
    saved_input = NULL;
1611
1612
707
  if (to > tostart)
1613
707
    last_char = to[-1];
1614
707
  return to - tostart;
1615
1.86k
}
1616
1617
/* Return amount of pending input.  */
1618
1619
size_t
1620
do_scrub_pending (void)
1621
2.56k
{
1622
2.56k
  size_t len = 0;
1623
2.56k
  if (saved_input)
1624
51
    len += saved_input_len;
1625
2.56k
  if (state == -1)
1626
0
    len += strlen (out_string);
1627
2.56k
  return len;
1628
2.56k
}