Coverage Report

Created: 2025-07-08 11:15

/src/binutils-gdb/gas/app.c
Line
Count
Source (jump to first uncovered line)
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2025 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
static const char *mri_state;
49
static char mri_last_ch;
50
#else
51
0
#define scrub_m68k_mri 0
52
#endif
53
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
61
/* The pseudo-op (without leading dot) at which we want to (perhaps just
62
   temporarily) stop processing.  See the comments in do_scrub_chars().  */
63
static const char   end_pseudo[] = "end ";
64
static const char * end_state;
65
66
/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67
   active.  */
68
static bool no_pseudo_dot;
69
70
static char last_char;
71
72
0
#define LEX_IS_SYMBOL_COMPONENT   1
73
0
#define LEX_IS_WHITESPACE   2
74
0
#define LEX_IS_LINE_SEPARATOR   3
75
0
#define LEX_IS_COMMENT_START    4
76
0
#define LEX_IS_LINE_COMMENT_START 5
77
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
78
0
#define LEX_IS_STRINGQUOTE    8
79
0
#define LEX_IS_COLON      9
80
0
#define LEX_IS_NEWLINE      10
81
0
#define LEX_IS_ONECHAR_QUOTE    11
82
#ifdef TC_V850
83
#define LEX_IS_DOUBLEDASH_1ST   12
84
#endif
85
#ifdef DOUBLEBAR_PARALLEL
86
#define LEX_IS_DOUBLEBAR_1ST    13
87
#endif
88
0
#define LEX_IS_PARALLEL_SEPARATOR 14
89
#ifdef H_TICK_HEX
90
#define LEX_IS_H      15
91
#endif
92
0
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93
0
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
94
0
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
95
0
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96
0
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
97
0
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
98
#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99
0
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
100
101
static char lex[256] = {
102
  [' ']  = LEX_IS_WHITESPACE,
103
  ['\t'] = LEX_IS_WHITESPACE,
104
#ifdef CR_EOL
105
  ['\r'] = LEX_IS_LINE_SEPARATOR,
106
#else
107
  ['\r'] = LEX_IS_WHITESPACE,
108
#endif
109
  ['\n'] = LEX_IS_NEWLINE,
110
  [':'] = LEX_IS_COLON,
111
  ['$'] = LEX_IS_SYMBOL_COMPONENT,
112
  ['.'] = LEX_IS_SYMBOL_COMPONENT,
113
  ['_'] = LEX_IS_SYMBOL_COMPONENT,
114
  ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115
  ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116
  ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117
  ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118
  ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119
  ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120
  ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121
  ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122
  ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123
  ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124
  ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125
  ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126
  ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127
  ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128
  ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129
  ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130
  ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131
  ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132
  ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133
  ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134
  ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135
  ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136
  ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137
  ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138
  ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139
  ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140
  ['0'] = LEX_IS_SYMBOL_COMPONENT,
141
  ['1'] = LEX_IS_SYMBOL_COMPONENT,
142
  ['2'] = LEX_IS_SYMBOL_COMPONENT,
143
  ['3'] = LEX_IS_SYMBOL_COMPONENT,
144
  ['4'] = LEX_IS_SYMBOL_COMPONENT,
145
  ['5'] = LEX_IS_SYMBOL_COMPONENT,
146
  ['6'] = LEX_IS_SYMBOL_COMPONENT,
147
  ['7'] = LEX_IS_SYMBOL_COMPONENT,
148
  ['8'] = LEX_IS_SYMBOL_COMPONENT,
149
  ['9'] = LEX_IS_SYMBOL_COMPONENT,
150
#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151
     [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152
#define INIT4(n)    INIT2 (n),  INIT2 ((n) +  2)
153
#define INIT8(n)    INIT4 (n),  INIT4 ((n) +  4)
154
#define INIT16(n)   INIT8 (n),  INIT8 ((n) +  8)
155
#define INIT32(n)  INIT16 (n), INIT16 ((n) + 16)
156
#define INIT64(n)  INIT32 (n), INIT32 ((n) + 32)
157
#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158
  INIT128 (128),
159
#undef INIT128
160
#undef INIT64
161
#undef INIT32
162
#undef INIT16
163
#undef INIT8
164
#undef INIT4
165
#undef INIT2
166
};
167
168
void
169
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
170
0
{
171
0
  const char *p;
172
173
  /* Latch this once at start.  xtensa uses a hook function, yet context isn't
174
     meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175
     state changes).  */
176
0
  if (lex['/'] == 0)
177
0
    no_pseudo_dot = NO_PSEUDO_DOT;
178
179
#ifdef TC_M68K
180
  scrub_m68k_mri = m68k_mri;
181
182
  if (! m68k_mri)
183
#endif
184
0
    {
185
0
      lex['"'] = LEX_IS_STRINGQUOTE;
186
187
0
#if ! defined (TC_HPPA)
188
0
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
189
0
#endif
190
191
#ifdef SINGLE_QUOTE_STRINGS
192
      lex['\''] = LEX_IS_STRINGQUOTE;
193
#endif
194
0
    }
195
196
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197
     in state 5 of do_scrub_chars must be changed.  */
198
199
  /* Note that these override the previous defaults, e.g. if ';' is a
200
     comment char, then it isn't a line separator.  */
201
202
0
#ifdef tc_symbol_chars
203
  /* This macro permits the processor to specify all characters which
204
     may appears in an operand.  This will prevent the scrubber from
205
     discarding meaningful whitespace in certain cases.  The i386
206
     backend uses this to support prefixes, which can confuse the
207
     scrubber as to whether it is parsing operands or opcodes.  */
208
0
  for (p = tc_symbol_chars; *p; ++p)
209
0
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210
0
#endif
211
212
  /* The m68k backend wants to be able to change comment_chars.  */
213
#ifndef tc_comment_chars
214
#define tc_comment_chars comment_chars
215
#endif
216
0
  for (p = tc_comment_chars; *p; p++)
217
0
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
218
219
  /* While counter intuitive to have more special purpose line comment chars
220
     override more general purpose ordinary ones, logic in do_scrub_chars()
221
     depends on this ordering.   */
222
0
  for (p = line_comment_chars; *p; p++)
223
0
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
224
225
0
#ifndef tc_line_separator_chars
226
0
#define tc_line_separator_chars line_separator_chars
227
0
#endif
228
0
  for (p = tc_line_separator_chars; *p; p++)
229
0
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
230
231
#ifdef tc_parallel_separator_chars
232
  /* This macro permits the processor to specify all characters which
233
     separate parallel insns on the same line.  */
234
  for (p = tc_parallel_separator_chars; *p; p++)
235
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236
#endif
237
238
  /* Only allow slash-star comments if slash is not in use.  Certain
239
     other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240
     FIXME: This isn't right.  We should always permit them.  */
241
0
  if (lex['/'] == 0)
242
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
243
244
#ifdef TC_M68K
245
  if (m68k_mri)
246
    {
247
      lex['\''] = LEX_IS_STRINGQUOTE;
248
      lex[';'] = LEX_IS_COMMENT_START;
249
      lex['*'] = LEX_IS_LINE_COMMENT_START;
250
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251
   then it can't be used in an expression.  */
252
      lex['!'] = LEX_IS_LINE_COMMENT_START;
253
    }
254
#endif
255
256
#ifdef TC_V850
257
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258
#endif
259
#ifdef DOUBLEBAR_PARALLEL
260
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261
#endif
262
#ifdef TC_D30V
263
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
264
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
265
#endif
266
267
#ifdef H_TICK_HEX
268
  if (enable_h_tick_hex)
269
    {
270
      lex['h'] = LEX_IS_H;
271
      lex['H'] = LEX_IS_H;
272
    }
273
#endif
274
0
}
275
276
/* Saved state of the scrubber.  */
277
static int state;
278
static int old_state;
279
static const char *out_string;
280
static char out_buf[20];
281
static int add_newlines;
282
static char *saved_input;
283
static size_t saved_input_len;
284
static char input_buffer[32 * 1024];
285
286
/* Data structure for saving the state of app across #include's.  Note that
287
   app is called asynchronously to the parsing of the .include's, so our
288
   state at the time .include is interpreted is completely unrelated.
289
   That's why we have to save it all.  */
290
291
struct app_save
292
{
293
  int          state;
294
  int          old_state;
295
  const char * out_string;
296
  char         out_buf[sizeof (out_buf)];
297
  int          add_newlines;
298
  char *       saved_input;
299
  size_t       saved_input_len;
300
  const char * end_state;
301
#ifdef TC_M68K
302
  int          scrub_m68k_mri;
303
  const char * mri_state;
304
  char         mri_last_ch;
305
#endif
306
#if defined TC_ARM && defined OBJ_ELF
307
  const char * symver_state;
308
#endif
309
  char         last_char;
310
};
311
312
char *
313
app_push (void)
314
0
{
315
0
  struct app_save *saved;
316
317
0
  saved = XNEW (struct app_save);
318
0
  saved->state = state;
319
0
  saved->old_state = old_state;
320
0
  saved->out_string = out_string;
321
0
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
322
0
  saved->add_newlines = add_newlines;
323
0
  if (saved_input == NULL)
324
0
    saved->saved_input = NULL;
325
0
  else
326
0
    {
327
0
      saved->saved_input = XNEWVEC (char, saved_input_len);
328
0
      memcpy (saved->saved_input, saved_input, saved_input_len);
329
0
      saved->saved_input_len = saved_input_len;
330
0
    }
331
0
  saved->end_state = end_state;
332
#ifdef TC_M68K
333
  saved->scrub_m68k_mri = scrub_m68k_mri;
334
  saved->mri_state = mri_state;
335
  saved->mri_last_ch = mri_last_ch;
336
#endif
337
#if defined TC_ARM && defined OBJ_ELF
338
  saved->symver_state = symver_state;
339
#endif
340
0
  saved->last_char = last_char;
341
342
  /* do_scrub_begin() is not useful, just wastes time.  */
343
344
0
  state = 0;
345
0
  saved_input = NULL;
346
0
  add_newlines = 0;
347
348
0
  return (char *) saved;
349
0
}
350
351
void
352
app_pop (char *arg)
353
0
{
354
0
  struct app_save *saved = (struct app_save *) arg;
355
356
  /* There is no do_scrub_end ().  */
357
0
  state = saved->state;
358
0
  old_state = saved->old_state;
359
0
  out_string = saved->out_string;
360
0
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
361
0
  add_newlines = saved->add_newlines;
362
0
  if (saved->saved_input == NULL)
363
0
    saved_input = NULL;
364
0
  else
365
0
    {
366
0
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
367
0
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
368
0
      saved_input = input_buffer;
369
0
      saved_input_len = saved->saved_input_len;
370
0
      free (saved->saved_input);
371
0
    }
372
0
  end_state = saved->end_state;
373
#ifdef TC_M68K
374
  scrub_m68k_mri = saved->scrub_m68k_mri;
375
  mri_state = saved->mri_state;
376
  mri_last_ch = saved->mri_last_ch;
377
#endif
378
#if defined TC_ARM && defined OBJ_ELF
379
  symver_state = saved->symver_state;
380
#endif
381
0
  last_char = saved->last_char;
382
383
0
  free (arg);
384
0
}
385
386
/* @@ This assumes that \n &c are the same on host and target.  This is not
387
   necessarily true.  */
388
389
static int
390
process_escape (int ch)
391
0
{
392
0
  switch (ch)
393
0
    {
394
0
    case 'b':
395
0
      return '\b';
396
0
    case 'f':
397
0
      return '\f';
398
0
    case 'n':
399
0
      return '\n';
400
0
    case 'r':
401
0
      return '\r';
402
0
    case 't':
403
0
      return '\t';
404
0
    case '\'':
405
0
      return '\'';
406
0
    case '"':
407
0
      return '\"';
408
0
    default:
409
0
      return ch;
410
0
    }
411
0
}
412
413
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
414
static unsigned int multibyte_warn_count = 0;
415
416
bool
417
scan_for_multibyte_characters (const unsigned char *  start,
418
             const unsigned char *  end,
419
             bool                   warn)
420
0
{
421
0
  if (end <= start)
422
0
    return false;
423
424
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
425
0
    return false;
426
427
0
  bool found = false;
428
429
0
  while (start < end)
430
0
    {
431
0
      unsigned char c;
432
433
0
      if ((c = * start++) <= 0x7f)
434
0
  continue;
435
436
0
      if (!warn)
437
0
  return true;
438
439
0
      found = true;
440
441
0
      const char * filename;
442
0
      unsigned int lineno;
443
444
0
      filename = as_where (& lineno);
445
0
      if (filename == NULL)
446
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
447
0
      else if (lineno == 0)
448
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
449
0
      else
450
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
451
452
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
453
0
  {
454
0
    as_warn (_("further multibyte character warnings suppressed"));
455
0
    break;
456
0
  }
457
0
    }
458
459
0
  return found;
460
0
}
461
462
/* This function is called to process input characters.  The GET
463
   parameter is used to retrieve more input characters.  GET should
464
   set its parameter to point to a buffer, and return the length of
465
   the buffer; it should return 0 at end of file.  The scrubbed output
466
   characters are put into the buffer starting at TOSTART; the TOSTART
467
   buffer is TOLEN bytes in length.  The function returns the number
468
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
469
   end of file was seen.  This function is arranged as a state
470
   machine, and saves its state so that it may return at any point.
471
   This is the way the old code used to work.  */
472
473
size_t
474
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
475
    bool check_multibyte)
476
0
{
477
0
  char *to = tostart;
478
0
  char *toend = tostart + tolen;
479
0
  char *from;
480
0
  char *fromend;
481
0
  size_t fromlen;
482
0
  int ch, ch2 = 0;
483
  /* Character that started the string we're working on.  */
484
0
  static char quotechar;
485
486
  /*State 0: beginning of normal line
487
    1: After first whitespace on line (flush more white)
488
    2: After first non-white (opcode) on line (keep 1white)
489
    3: after second white on line (into operands) (flush white)
490
    4: after putting out a .linefile, put out digits
491
    5: parsing a string, then go to old-state
492
    6: putting out \ escape in a "d string.
493
    7: no longer used
494
    8: no longer used
495
    9: After seeing symbol char in state 3 (keep 1white after symchar)
496
   10: After seeing whitespace in state 9 (keep white before symchar)
497
   11: After seeing a symbol character in state 0 (eg a label definition)
498
   -1: output string in out_string and go to the state in old_state
499
   12: no longer used
500
#ifdef DOUBLEBAR_PARALLEL
501
   13: After seeing a vertical bar, looking for a second
502
       vertical bar as a parallel expression separator.
503
#endif
504
#ifdef TC_PREDICATE_START_CHAR
505
   14: After seeing a predicate start character at state 0, looking
506
       for a predicate end character as predicate.
507
   15: After seeing a predicate start character at state 1, looking
508
       for a predicate end character as predicate.
509
#endif
510
#ifdef TC_Z80
511
   16: After seeing an 'a' or an 'A' at the start of a symbol
512
   17: After seeing an 'f' or an 'F' in state 16
513
#endif
514
    */
515
516
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
517
     constructs like ``.loc 1 20''.  This was turning into ``.loc
518
     120''.  States 9 and 10 ensure that a space is never dropped in
519
     between characters which could appear in an identifier.  Ian
520
     Taylor, ian@cygnus.com.
521
522
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
523
     correctly on the PA (and any other target where colons are optional).
524
     Jeff Law, law@cs.utah.edu.
525
526
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
527
     get squashed into "cmp r1,r2||trap#1", with the all important space
528
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
529
530
  /* This macro gets the next input character.  */
531
532
0
#define GET()             \
533
0
  (from < fromend            \
534
0
   ? * (unsigned char *) (from++)        \
535
0
   : (saved_input = NULL,          \
536
0
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
537
0
      from = input_buffer,          \
538
0
      fromend = from + fromlen,         \
539
0
      (fromlen == 0            \
540
0
       ? EOF              \
541
0
       : * (unsigned char *) (from++))))
542
543
  /* This macro pushes a character back on the input stream.  */
544
545
0
#define UNGET(uch) (*--from = (uch))
546
547
  /* This macro puts a character into the output buffer.  If this
548
     character fills the output buffer, this macro jumps to the label
549
     TOFULL.  We use this rather ugly approach because we need to
550
     handle two different termination conditions: EOF on the input
551
     stream, and a full output buffer.  It would be simpler if we
552
     always read in the entire input stream before processing it, but
553
     I don't want to make such a significant change to the assembler's
554
     memory usage.  */
555
556
0
#define PUT(pch)        \
557
0
  do            \
558
0
    {           \
559
0
      *to++ = (pch);        \
560
0
      if (to >= toend)       \
561
0
  goto tofull;       \
562
0
    }           \
563
0
  while (0)
564
565
0
  if (saved_input != NULL)
566
0
    {
567
0
      from = saved_input;
568
0
      fromend = from + saved_input_len;
569
0
    }
570
0
  else
571
0
    {
572
0
      fromlen = (*get) (input_buffer, sizeof input_buffer);
573
0
      if (fromlen == 0)
574
0
  return 0;
575
0
      from = input_buffer;
576
0
      fromend = from + fromlen;
577
578
0
      if (check_multibyte)
579
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
580
0
                (const unsigned char* ) fromend,
581
0
                true /* Generate warnings.  */);
582
0
    }
583
584
0
  while (1)
585
0
    {
586
      /* The cases in this switch end with continue, in order to
587
   branch back to the top of this while loop and generate the
588
   next output character in the appropriate state.  */
589
0
      switch (state)
590
0
  {
591
0
  case -1:
592
0
    ch = *out_string++;
593
0
    if (*out_string == '\0')
594
0
      {
595
0
        state = old_state;
596
0
        old_state = 3;
597
0
      }
598
0
    PUT (ch);
599
0
    continue;
600
601
0
  case 4:
602
0
    ch = GET ();
603
0
    if (ch == EOF)
604
0
      goto fromeof;
605
0
    else if (ch >= '0' && ch <= '9')
606
0
      PUT (ch);
607
0
    else
608
0
      {
609
0
        while (ch != EOF && IS_WHITESPACE (ch))
610
0
    ch = GET ();
611
0
        if (ch == '"')
612
0
    {
613
0
      quotechar = ch;
614
0
      state = 5;
615
0
      old_state = 3;
616
0
      PUT (ch);
617
0
    }
618
0
        else
619
0
    {
620
0
      while (ch != EOF && ch != '\n')
621
0
        ch = GET ();
622
0
      state = 0;
623
0
      PUT (ch);
624
0
    }
625
0
      }
626
0
    continue;
627
628
0
  case 5:
629
    /* We are going to copy everything up to a quote character,
630
       with special handling for a backslash.  We try to
631
       optimize the copying in the simple case without using the
632
       GET and PUT macros.  */
633
0
    {
634
0
      char *s;
635
0
      ptrdiff_t len;
636
637
0
      for (s = from; s < fromend; s++)
638
0
        {
639
0
    ch = *s;
640
0
    if (ch == '\\'
641
0
        || ch == quotechar
642
0
        || ch == '\n')
643
0
      break;
644
0
        }
645
0
      len = s - from;
646
0
      if (len > toend - to)
647
0
        len = toend - to;
648
0
      if (len > 0)
649
0
        {
650
0
    memcpy (to, from, len);
651
0
    to += len;
652
0
    from += len;
653
0
    if (to >= toend)
654
0
      goto tofull;
655
0
        }
656
0
    }
657
658
0
    ch = GET ();
659
0
    if (ch == EOF)
660
0
      {
661
        /* This buffer is here specifically so
662
     that the UNGET below will work.  */
663
0
        static char one_char_buf[1];
664
665
0
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
666
0
        state = old_state;
667
0
        from = fromend = one_char_buf + 1;
668
0
        fromlen = 1;
669
0
        UNGET ('\n');
670
0
        PUT (quotechar);
671
0
      }
672
0
    else if (ch == quotechar)
673
0
      {
674
0
        state = old_state;
675
0
        PUT (ch);
676
0
      }
677
0
    else if (TC_STRING_ESCAPES && ch == '\\')
678
0
      {
679
0
        state = 6;
680
0
        PUT (ch);
681
0
      }
682
0
    else if (scrub_m68k_mri && ch == '\n')
683
0
      {
684
        /* Just quietly terminate the string.  This permits lines like
685
       bne  label loop if we haven't reach end yet.  */
686
0
        state = old_state;
687
0
        UNGET (ch);
688
0
        PUT ('\'');
689
0
      }
690
0
    else
691
0
      {
692
0
        PUT (ch);
693
0
      }
694
0
    continue;
695
696
0
  case 6:
697
0
    state = 5;
698
0
    ch = GET ();
699
0
    switch (ch)
700
0
      {
701
        /* Handle strings broken across lines, by turning '\n' into
702
     '\\' and 'n'.  */
703
0
      case '\n':
704
0
        UNGET ('n');
705
0
        add_newlines++;
706
0
        PUT ('\\');
707
0
        continue;
708
709
0
      case EOF:
710
0
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
711
0
        PUT (quotechar);
712
0
        continue;
713
714
        /* These two are used inside macros.  */
715
0
      case '@':
716
0
      case '+':
717
0
        break;
718
719
0
      case '"':
720
0
      case '\\':
721
0
      case 'b':
722
0
      case 'f':
723
0
      case 'n':
724
0
      case 'r':
725
0
      case 't':
726
0
      case 'v':
727
0
      case 'x':
728
0
      case 'X':
729
0
      case '0':
730
0
      case '1':
731
0
      case '2':
732
0
      case '3':
733
0
      case '4':
734
0
      case '5':
735
0
      case '6':
736
0
      case '7':
737
0
        break;
738
739
0
      default:
740
#ifdef ONLY_STANDARD_ESCAPES
741
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
742
#endif
743
0
        break;
744
0
      }
745
0
    PUT (ch);
746
0
    continue;
747
748
#ifdef DOUBLEBAR_PARALLEL
749
  case 13:
750
    ch = GET ();
751
    if (ch != '|')
752
      abort ();
753
754
    /* Reset back to state 1 and pretend that we are parsing a
755
       line from just after the first white space.  */
756
    state = 1;
757
    PUT ('|');
758
    continue;
759
#endif
760
#ifdef TC_Z80
761
  case 16:
762
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
763
    ch = GET ();
764
    if (ch == 'f' || ch == 'F')
765
      {
766
        state = 17;
767
        PUT (ch);
768
      }
769
    else
770
      {
771
        if (ch != EOF)
772
    UNGET (ch);
773
        state = 9;
774
        break;
775
      }
776
    /* Fall through.  */
777
  case 17:
778
    /* We have seen "af" at the start of a symbol,
779
       a ' here is a part of that symbol.  */
780
    ch = GET ();
781
    state = 9;
782
    if (ch == '\'')
783
      /* Change to avoid warning about unclosed string.  */
784
      PUT ('`');
785
    else if (ch != EOF)
786
      UNGET (ch);
787
    break;
788
#endif
789
0
  }
790
791
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
792
793
      /* flushchar: */
794
0
      ch = GET ();
795
796
#ifdef TC_PREDICATE_START_CHAR
797
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
798
  {
799
    state += 14;
800
    PUT (ch);
801
    continue;
802
  }
803
      else if (state == 14 || state == 15)
804
  {
805
    if (ch == TC_PREDICATE_END_CHAR)
806
      {
807
        state -= 14;
808
        PUT (ch);
809
        ch = GET ();
810
      }
811
    else
812
      {
813
        PUT (ch);
814
        continue;
815
      }
816
  }
817
#endif
818
819
0
    recycle:
820
821
      /* We need to watch out for .end directives: We should in particular not
822
   issue diagnostics for anything after an active one.  */
823
0
      if (ch == EOF)
824
0
  end_state = NULL;
825
0
      else if (end_state == NULL)
826
0
  {
827
0
    if ((state == 0 || state == 1)
828
0
        && (ch == '.'
829
0
      || (no_pseudo_dot && ch == end_pseudo[0])))
830
0
      end_state = end_pseudo + (ch != '.');
831
0
  }
832
0
      else if (ch != '\0'
833
0
         && (*end_state == ch
834
       /* Avoid triggering on directives like .endif or .endr.  */
835
0
       || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
836
0
  {
837
0
    if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
838
0
      goto end_end;
839
0
    ++end_state;
840
0
  }
841
0
      else if (*end_state != '\0')
842
  /* We did not get the expected character, or we didn't
843
     get a valid terminating character after seeing the
844
     entire pseudo-op, so we must go back to the beginning.  */
845
0
  end_state = NULL;
846
0
      else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
847
0
  {
848
0
  end_end:
849
    /* We've read the entire pseudo-op.  If this is the end of the line,
850
       bail out now by (ab)using the output-full path.  This allows the
851
       caller to process input up to here and terminate processing if this
852
       directive is actually active (not on the false branch of a
853
       conditional and not in a macro definition).  */
854
0
    end_state = NULL;
855
0
    state = 0;
856
0
    PUT (ch);
857
0
    goto tofull;
858
0
  }
859
860
#if defined TC_ARM && defined OBJ_ELF
861
      /* We need to watch out for .symver directives.  See the comment later
862
   in this function.  */
863
      if (ch == EOF)
864
  symver_state = NULL;
865
      else if (symver_state == NULL)
866
  {
867
    if ((state == 0 || state == 1)
868
        && strchr (tc_comment_chars, '@') != NULL
869
        && ch == symver_pseudo[0])
870
      symver_state = symver_pseudo + 1;
871
  }
872
      else
873
  {
874
    /* We advance to the next state if we find the right
875
       character.  */
876
    if (ch != '\0' && (*symver_state == ch))
877
      ++symver_state;
878
    else if (*symver_state != '\0')
879
      /* We did not get the expected character, or we didn't
880
         get a valid terminating character after seeing the
881
         entire pseudo-op, so we must go back to the beginning.  */
882
      symver_state = NULL;
883
    else
884
      {
885
        /* We've read the entire pseudo-op.  If this is the end
886
     of the line, go back to the beginning.  */
887
        if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
888
    symver_state = NULL;
889
      }
890
  }
891
#endif /* TC_ARM && OBJ_ELF */
892
893
#ifdef TC_M68K
894
      /* We want to have pseudo-ops which control whether we are in
895
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
896
   the scrubber, that means that we need a special purpose
897
   recognizer here.  */
898
      if (ch == EOF)
899
  mri_state = NULL;
900
      else if (mri_state == NULL)
901
  {
902
    if ((state == 0 || state == 1)
903
        && ch == mri_pseudo[0])
904
      mri_state = mri_pseudo + 1;
905
  }
906
      else
907
  {
908
    /* We advance to the next state if we find the right
909
       character, or if we need a space character and we get any
910
       whitespace character, or if we need a '0' and we get a
911
       '1' (this is so that we only need one state to handle
912
       ``.mri 0'' and ``.mri 1'').  */
913
    if (ch != '\0'
914
        && (*mri_state == ch
915
      || (*mri_state == ' '
916
          && IS_WHITESPACE (ch))
917
      || (*mri_state == '0'
918
          && ch == '1')))
919
      {
920
        mri_last_ch = ch;
921
        ++mri_state;
922
      }
923
    else if (*mri_state != '\0'
924
       || (!IS_WHITESPACE (ch)
925
           && !IS_LINE_SEPARATOR (ch)
926
           && !IS_NEWLINE (ch)))
927
      {
928
        /* We did not get the expected character, or we didn't
929
     get a valid terminating character after seeing the
930
     entire pseudo-op, so we must go back to the
931
     beginning.  */
932
        mri_state = NULL;
933
      }
934
    else
935
      {
936
        /* We've read the entire pseudo-op.  mri_last_ch is
937
     either '0' or '1' indicating whether to enter or
938
     leave MRI mode.  */
939
        do_scrub_begin (mri_last_ch == '1');
940
        mri_state = NULL;
941
942
        /* We continue handling the character as usual.  The
943
     main gas reader must also handle the .mri pseudo-op
944
     to control expression parsing and the like.  */
945
      }
946
  }
947
#endif
948
949
0
      if (ch == EOF)
950
0
  {
951
0
    if (state != 0)
952
0
      {
953
0
        as_warn (_("end of file not at end of a line; newline inserted"));
954
0
        state = 0;
955
0
        PUT ('\n');
956
0
      }
957
0
    goto fromeof;
958
0
  }
959
960
0
      switch (lex[ch])
961
0
  {
962
0
  case LEX_IS_WHITESPACE:
963
0
    do
964
0
      {
965
0
        ch = GET ();
966
0
      }
967
0
    while (ch != EOF && IS_WHITESPACE (ch));
968
0
    if (ch == EOF)
969
0
      goto fromeof;
970
971
0
    if (state == 0)
972
0
      {
973
        /* Preserve a single whitespace character at the
974
     beginning of a line.  */
975
0
        state = 1;
976
0
        UNGET (ch);
977
0
        PUT (' ');
978
0
        break;
979
0
      }
980
981
#ifdef KEEP_WHITE_AROUND_COLON
982
    if (lex[ch] == LEX_IS_COLON)
983
      {
984
        /* Only keep this white if there's no white *after* the
985
     colon.  */
986
        ch2 = GET ();
987
        if (ch2 != EOF)
988
    UNGET (ch2);
989
        if (!IS_WHITESPACE (ch2))
990
    {
991
      state = 9;
992
      UNGET (ch);
993
      PUT (' ');
994
      break;
995
    }
996
      }
997
#endif
998
999
    /* Prune trailing whitespace.  */
1000
0
    if (IS_COMMENT (ch)
1001
0
        || (IS_LINE_COMMENT (ch)
1002
0
            && (state < 1 || strchr (tc_comment_chars, ch)))
1003
0
        || IS_NEWLINE (ch)
1004
0
        || IS_LINE_SEPARATOR (ch)
1005
0
        || IS_PARALLEL_SEPARATOR (ch))
1006
0
      {
1007
0
        if (scrub_m68k_mri)
1008
0
    {
1009
      /* In MRI mode, we keep these spaces.  */
1010
0
      UNGET (ch);
1011
0
      PUT (' ');
1012
0
      break;
1013
0
    }
1014
0
        goto recycle;
1015
0
      }
1016
#ifdef DOUBLESLASH_LINE_COMMENTS
1017
    if (IS_TWOCHAR_COMMENT_1ST (ch))
1018
      {
1019
        ch2 = GET ();
1020
        if (ch2 != EOF)
1021
          UNGET (ch2);
1022
        if (ch2 == '/')
1023
    goto recycle;
1024
      }
1025
#endif
1026
1027
    /* If we're in state 2 or 11, we've seen a non-white
1028
       character followed by whitespace.  If the next character
1029
       is ':', this is whitespace after a label name which we
1030
       normally must ignore.  In MRI mode, though, spaces are
1031
       not permitted between the label and the colon.  */
1032
0
    if ((state == 2 || state == 11)
1033
0
        && lex[ch] == LEX_IS_COLON
1034
0
        && ! scrub_m68k_mri)
1035
0
      {
1036
0
        state = 1;
1037
0
        PUT (ch);
1038
0
        break;
1039
0
      }
1040
1041
0
    switch (state)
1042
0
      {
1043
0
      case 1:
1044
        /* We can arrive here if we leave a leading whitespace
1045
     character at the beginning of a line.  */
1046
0
        goto recycle;
1047
0
      case 2:
1048
0
        state = 3;
1049
0
        if (to + 1 < toend)
1050
0
    {
1051
      /* Optimize common case by skipping UNGET/GET.  */
1052
0
      PUT (' '); /* Sp after opco */
1053
0
      goto recycle;
1054
0
    }
1055
0
        UNGET (ch);
1056
0
        PUT (' ');
1057
0
        break;
1058
0
      case 3:
1059
0
#ifndef TC_KEEP_OPERAND_SPACES
1060
        /* For TI C6X, we keep these spaces as they may separate
1061
     functional unit specifiers from operands.  */
1062
0
        if (scrub_m68k_mri)
1063
0
#endif
1064
0
    {
1065
      /* In MRI mode, we keep these spaces.  */
1066
0
      UNGET (ch);
1067
0
      PUT (' ');
1068
0
      break;
1069
0
    }
1070
0
        goto recycle; /* Sp in operands */
1071
0
      case 9:
1072
0
      case 10:
1073
0
#ifndef TC_KEEP_OPERAND_SPACES
1074
0
        if (scrub_m68k_mri)
1075
0
#endif
1076
0
    {
1077
      /* In MRI mode, we keep these spaces.  */
1078
0
      state = 3;
1079
0
      UNGET (ch);
1080
0
      PUT (' ');
1081
0
      break;
1082
0
    }
1083
0
        state = 10; /* Sp after symbol char */
1084
0
        goto recycle;
1085
0
      case 11:
1086
0
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1087
0
    state = 1;
1088
0
        else
1089
0
    {
1090
      /* We know that ch is not ':', since we tested that
1091
         case above.  Therefore this is not a label, so it
1092
         must be the opcode, and we've just seen the
1093
         whitespace after it.  */
1094
0
      state = 3;
1095
0
    }
1096
0
        UNGET (ch);
1097
0
        PUT (' '); /* Sp after label definition.  */
1098
0
        break;
1099
0
      default:
1100
0
        BAD_CASE (state);
1101
0
      }
1102
0
    break;
1103
1104
0
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1105
0
    ch2 = GET ();
1106
0
    if (ch2 == '*')
1107
0
      {
1108
0
  twochar_comment:
1109
0
        for (;;)
1110
0
    {
1111
0
      do
1112
0
        {
1113
0
          ch2 = GET ();
1114
0
          if (ch2 != EOF && IS_NEWLINE (ch2))
1115
0
      add_newlines++;
1116
0
        }
1117
0
      while (ch2 != EOF && ch2 != '*');
1118
1119
0
      while (ch2 == '*')
1120
0
        ch2 = GET ();
1121
1122
0
      if (ch2 == EOF || ch2 == '/')
1123
0
        break;
1124
1125
      /* This UNGET will ensure that we count newlines
1126
         correctly.  */
1127
0
      UNGET (ch2);
1128
0
    }
1129
1130
0
        if (ch2 == EOF)
1131
0
    as_warn (_("end of file in multiline comment"));
1132
1133
0
        ch = ' ';
1134
0
        goto recycle;
1135
0
      }
1136
#ifdef DOUBLESLASH_LINE_COMMENTS
1137
    else if (ch2 == '/')
1138
      {
1139
        do
1140
    {
1141
      ch = GET ();
1142
    }
1143
        while (ch != EOF && !IS_NEWLINE (ch));
1144
        if (ch == EOF)
1145
    as_warn ("end of file in comment; newline inserted");
1146
        state = 0;
1147
        PUT ('\n');
1148
        break;
1149
      }
1150
#endif
1151
0
    else
1152
0
      {
1153
0
        if (ch2 != EOF)
1154
0
    UNGET (ch2);
1155
0
        if (state == 9 || state == 10)
1156
0
    state = 3;
1157
0
        PUT (ch);
1158
0
      }
1159
0
    break;
1160
1161
0
  case LEX_IS_STRINGQUOTE:
1162
0
    quotechar = ch;
1163
0
    if (state == 10)
1164
0
      {
1165
        /* Preserve the whitespace in foo "bar".  */
1166
0
        UNGET (ch);
1167
0
        state = 3;
1168
0
        PUT (' ');
1169
1170
        /* PUT didn't jump out.  We could just break, but we
1171
     know what will happen, so optimize a bit.  */
1172
0
        ch = GET ();
1173
0
        old_state = 9;
1174
0
      }
1175
0
    else if (state == 3)
1176
0
      old_state = 9;
1177
0
    else if (state == 0)
1178
0
      old_state = 11; /* Now seeing label definition.  */
1179
0
    else
1180
0
      old_state = state;
1181
0
    state = 5;
1182
0
    PUT (ch);
1183
0
    break;
1184
1185
0
  case LEX_IS_ONECHAR_QUOTE:
1186
#ifdef H_TICK_HEX
1187
    if (state == 9 && enable_h_tick_hex)
1188
      {
1189
        char c;
1190
1191
        c = GET ();
1192
        as_warn ("'%c found after symbol", c);
1193
        UNGET (c);
1194
      }
1195
#endif
1196
0
    if (state == 10)
1197
0
      {
1198
        /* Preserve the whitespace in foo 'b'.  */
1199
0
        UNGET (ch);
1200
0
        state = 3;
1201
0
        PUT (' ');
1202
0
        break;
1203
0
      }
1204
0
    ch = GET ();
1205
0
    if (ch == EOF)
1206
0
      {
1207
0
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1208
0
        ch = 0;
1209
0
      }
1210
0
    if (ch == '\\')
1211
0
      {
1212
0
        ch = GET ();
1213
0
        if (ch == EOF)
1214
0
    {
1215
0
      as_warn (_("end of file in escape character"));
1216
0
      ch = '\\';
1217
0
    }
1218
0
        else
1219
0
    ch = process_escape (ch);
1220
0
      }
1221
0
    sprintf (out_buf, "%d", (int) (unsigned char) ch);
1222
1223
    /* None of these 'x constants for us.  We want 'x'.  */
1224
0
    if ((ch = GET ()) != '\'')
1225
0
      {
1226
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1227
        as_warn (_("missing close quote; (assumed)"));
1228
#else
1229
0
        if (ch != EOF)
1230
0
    UNGET (ch);
1231
0
#endif
1232
0
      }
1233
0
    if (strlen (out_buf) == 1)
1234
0
      {
1235
0
        PUT (out_buf[0]);
1236
0
        break;
1237
0
      }
1238
0
    if (state == 9)
1239
0
      old_state = 3;
1240
0
    else
1241
0
      old_state = state;
1242
0
    state = -1;
1243
0
    out_string = out_buf;
1244
0
    PUT (*out_string++);
1245
0
    break;
1246
1247
0
  case LEX_IS_COLON:
1248
#ifdef KEEP_WHITE_AROUND_COLON
1249
    state = 9;
1250
#else
1251
0
    if (state == 9 || state == 10)
1252
0
      state = 3;
1253
0
    else if (state != 3)
1254
0
      state = 1;
1255
0
#endif
1256
0
    PUT (ch);
1257
0
    break;
1258
1259
0
  case LEX_IS_NEWLINE:
1260
    /* Roll out a bunch of newlines from inside comments, etc.  */
1261
0
    if (add_newlines)
1262
0
      {
1263
0
        --add_newlines;
1264
0
        UNGET (ch);
1265
0
      }
1266
    /* Fall through.  */
1267
1268
0
  case LEX_IS_LINE_SEPARATOR:
1269
0
    state = 0;
1270
0
    PUT (ch);
1271
0
    break;
1272
1273
0
  case LEX_IS_PARALLEL_SEPARATOR:
1274
0
    state = 1;
1275
0
    PUT (ch);
1276
0
    break;
1277
1278
#ifdef TC_V850
1279
  case LEX_IS_DOUBLEDASH_1ST:
1280
    ch2 = GET ();
1281
    if (ch2 != '-')
1282
      {
1283
        if (ch2 != EOF)
1284
    UNGET (ch2);
1285
        goto de_fault;
1286
      }
1287
    /* Read and skip to end of line.  */
1288
    do
1289
      {
1290
        ch = GET ();
1291
      }
1292
    while (ch != EOF && ch != '\n');
1293
1294
    if (ch == EOF)
1295
      as_warn (_("end of file in comment; newline inserted"));
1296
1297
    state = 0;
1298
    PUT ('\n');
1299
    break;
1300
#endif
1301
#ifdef DOUBLEBAR_PARALLEL
1302
  case LEX_IS_DOUBLEBAR_1ST:
1303
    ch2 = GET ();
1304
    if (ch2 != EOF)
1305
      UNGET (ch2);
1306
    if (ch2 != '|')
1307
      goto de_fault;
1308
1309
    /* Handle '||' in two states as invoking PUT twice might
1310
       result in the first one jumping out of this loop.  We'd
1311
       then lose track of the state and one '|' char.  */
1312
    state = 13;
1313
    PUT ('|');
1314
    break;
1315
#endif
1316
0
  case LEX_IS_LINE_COMMENT_START:
1317
    /* FIXME-someday: The two character comment stuff was badly
1318
       thought out.  On i386, we want '/' as line comment start
1319
       AND we want C style comments.  hence this hack.  The
1320
       whole lexical process should be reworked.  xoxorich.  */
1321
0
    if (ch == '/')
1322
0
      {
1323
0
        ch2 = GET ();
1324
0
        if (ch2 == '*')
1325
0
    goto twochar_comment;
1326
0
        if (ch2 != EOF)
1327
0
    UNGET (ch2);
1328
0
      }
1329
1330
0
    if (state == 0 || state == 1) /* Only comment at start of line.  */
1331
0
      {
1332
0
        int startch;
1333
1334
0
        startch = ch;
1335
1336
0
        do
1337
0
    {
1338
0
      ch = GET ();
1339
0
    }
1340
0
        while (ch != EOF && IS_WHITESPACE (ch));
1341
1342
0
        if (ch == EOF)
1343
0
    {
1344
0
      as_warn (_("end of file in comment; newline inserted"));
1345
0
      PUT ('\n');
1346
0
      break;
1347
0
    }
1348
1349
0
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1350
0
    {
1351
      /* Not a cpp line.  */
1352
0
      while (ch != EOF && !IS_NEWLINE (ch))
1353
0
        ch = GET ();
1354
0
      if (ch == EOF)
1355
0
        {
1356
0
          as_warn (_("end of file in comment; newline inserted"));
1357
0
          PUT ('\n');
1358
0
        }
1359
0
      else /* IS_NEWLINE (ch) */
1360
0
        {
1361
          /* To process non-zero add_newlines.  */
1362
0
          UNGET (ch);
1363
0
        }
1364
0
      state = 0;
1365
0
      break;
1366
0
    }
1367
        /* Looks like `# 123 "filename"' from cpp.  */
1368
0
        UNGET (ch);
1369
0
        old_state = 4;
1370
0
        state = -1;
1371
0
        if (scrub_m68k_mri)
1372
0
    out_string = "\tlinefile ";
1373
0
        else
1374
0
    out_string = "\t.linefile ";
1375
0
        PUT (*out_string++);
1376
0
        break;
1377
0
      }
1378
1379
#ifdef TC_D10V
1380
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1381
       Trap is the only short insn that has a first operand that is
1382
       neither register nor label.
1383
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1384
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1385
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1386
       only character in line_comment_chars for d10v, hence we
1387
       can recognize it as such.  */
1388
    /* An alternative approach would be to reset the state to 1 when
1389
       we see '||', '<'- or '->', but that seems to be overkill.  */
1390
    if (state == 10)
1391
      PUT (' ');
1392
#endif
1393
    /* We have a line comment character which is not at the
1394
       start of a line.  If this is also a normal comment
1395
       character, fall through.  Otherwise treat it as a default
1396
       character.  */
1397
0
    if (strchr (tc_comment_chars, ch) == NULL)
1398
0
      goto de_fault;
1399
0
    if (scrub_m68k_mri
1400
0
        && (ch == '!' || ch == '*' || ch == '#'))
1401
0
      goto de_fault;
1402
    /* Fall through.  */
1403
0
  case LEX_IS_COMMENT_START:
1404
#if defined TC_ARM && defined OBJ_ELF
1405
    /* On the ARM, `@' is the comment character.
1406
       Unfortunately this is also a special character in ELF .symver
1407
       directives (and .type, though we deal with those another way).
1408
       So we check if this line is such a directive, and treat
1409
       the character as default if so.  This is a hack.  */
1410
    if ((symver_state != NULL) && (*symver_state == 0))
1411
      goto de_fault;
1412
#endif
1413
1414
    /* Care is needed not to damage occurrences of \<comment-char>
1415
       by stripping the <comment-char> onwards.  Yuck.  */
1416
0
    if ((to > tostart ? to[-1] : last_char) == '\\')
1417
      /* Do not treat the <comment-char> as a start-of-comment.  */
1418
0
      goto de_fault;
1419
1420
#ifdef WARN_COMMENTS
1421
    if (!found_comment)
1422
      found_comment_file = as_where (&found_comment);
1423
#endif
1424
0
    do
1425
0
      {
1426
0
        ch = GET ();
1427
0
      }
1428
0
    while (ch != EOF && !IS_NEWLINE (ch));
1429
0
    if (ch == EOF)
1430
0
      as_warn (_("end of file in comment; newline inserted"));
1431
0
    state = 0;
1432
0
    PUT ('\n');
1433
0
    break;
1434
1435
#ifdef H_TICK_HEX
1436
  case LEX_IS_H:
1437
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1438
       the H' with 0x to make them gas-style hex characters.  */
1439
    if (enable_h_tick_hex)
1440
      {
1441
        char quot;
1442
1443
        quot = GET ();
1444
        if (quot == '\'')
1445
    {
1446
      UNGET ('x');
1447
      ch = '0';
1448
    }
1449
        else
1450
    UNGET (quot);
1451
      }
1452
#endif
1453
    /* Fall through.  */
1454
1455
0
  case LEX_IS_SYMBOL_COMPONENT:
1456
0
    if (state == 10)
1457
0
      {
1458
        /* This is a symbol character following another symbol
1459
     character, with whitespace in between.  We skipped
1460
     the whitespace earlier, so output it now.  */
1461
0
        UNGET (ch);
1462
0
        state = 3;
1463
0
        PUT (' ');
1464
0
        break;
1465
0
      }
1466
1467
#ifdef TC_Z80
1468
    /* "af'" is a symbol containing '\''.  */
1469
    if (state == 3 && (ch == 'a' || ch == 'A'))
1470
      {
1471
        state = 16;
1472
        PUT (ch);
1473
        ch = GET ();
1474
        if (ch == 'f' || ch == 'F')
1475
    {
1476
      state = 17;
1477
      PUT (ch);
1478
      break;
1479
    }
1480
        else
1481
    {
1482
      state = 9;
1483
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1484
        {
1485
          if (ch != EOF)
1486
      UNGET (ch);
1487
          break;
1488
        }
1489
    }
1490
      }
1491
#endif
1492
0
    if (state == 3)
1493
0
      state = 9;
1494
1495
    /* This is a common case.  Quickly copy CH and all the
1496
       following symbol component or normal characters.  */
1497
0
    if (to + 1 < toend
1498
#ifdef TC_M68K
1499
        && mri_state == NULL
1500
#endif
1501
#if defined TC_ARM && defined OBJ_ELF
1502
        && symver_state == NULL
1503
#endif
1504
0
        && end_state == NULL)
1505
0
      {
1506
0
        char *s;
1507
0
        ptrdiff_t len;
1508
1509
0
        for (s = from; s < fromend; s++)
1510
0
    {
1511
0
      int type;
1512
1513
0
      ch2 = *(unsigned char *) s;
1514
0
      type = lex[ch2];
1515
0
      if (type != 0
1516
0
          && type != LEX_IS_SYMBOL_COMPONENT)
1517
0
        break;
1518
0
    }
1519
1520
0
        if (s > from)
1521
    /* Handle the last character normally, for
1522
       simplicity.  */
1523
0
    --s;
1524
1525
0
        len = s - from;
1526
1527
0
        if (len > (toend - to) - 1)
1528
0
    len = (toend - to) - 1;
1529
1530
0
        if (len > 0)
1531
0
    {
1532
0
      PUT (ch);
1533
0
      memcpy (to, from, len);
1534
0
      to += len;
1535
0
      from += len;
1536
0
      if (to >= toend)
1537
0
        goto tofull;
1538
0
      ch = GET ();
1539
0
    }
1540
0
      }
1541
1542
    /* Fall through.  */
1543
0
  default:
1544
0
  de_fault:
1545
    /* Some relatively `normal' character.  */
1546
0
    if (state == 0)
1547
0
      {
1548
0
        state = 11; /* Now seeing label definition.  */
1549
0
      }
1550
0
    else if (state == 1)
1551
0
      {
1552
0
        state = 2;  /* Ditto.  */
1553
0
      }
1554
0
    else if (state == 9)
1555
0
      {
1556
0
        if (!IS_SYMBOL_COMPONENT (ch))
1557
0
    state = 3;
1558
0
      }
1559
0
    else if (state == 10)
1560
0
      {
1561
0
        if (ch == '\\')
1562
0
    {
1563
      /* Special handling for backslash: a backslash may
1564
         be the beginning of a formal parameter (of a
1565
         macro) following another symbol character, with
1566
         whitespace in between.  If that is the case, we
1567
         output a space before the parameter.  Strictly
1568
         speaking, correct handling depends upon what the
1569
         macro parameter expands into; if the parameter
1570
         expands into something which does not start with
1571
         an operand character, then we don't want to keep
1572
         the space.  We don't have enough information to
1573
         make the right choice, so here we are making the
1574
         choice which is more likely to be correct.  */
1575
0
      if (to + 1 >= toend)
1576
0
        {
1577
          /* If we're near the end of the buffer, save the
1578
             character for the next time round.  Otherwise
1579
             we'll lose our state.  */
1580
0
          UNGET (ch);
1581
0
          goto tofull;
1582
0
        }
1583
0
      *to++ = ' ';
1584
0
    }
1585
1586
0
        state = 3;
1587
0
      }
1588
0
    PUT (ch);
1589
0
    break;
1590
0
  }
1591
0
    }
1592
1593
  /*NOTREACHED*/
1594
1595
0
 fromeof:
1596
  /* We have reached the end of the input.  */
1597
0
  if (to > tostart)
1598
0
    last_char = to[-1];
1599
0
  return to - tostart;
1600
1601
0
 tofull:
1602
  /* The output buffer is full.  Save any input we have not yet
1603
     processed.  */
1604
0
  if (fromend > from)
1605
0
    {
1606
0
      saved_input = from;
1607
0
      saved_input_len = fromend - from;
1608
0
    }
1609
0
  else
1610
0
    saved_input = NULL;
1611
1612
0
  if (to > tostart)
1613
0
    last_char = to[-1];
1614
0
  return to - tostart;
1615
0
}
1616
1617
/* Return amount of pending input.  */
1618
1619
size_t
1620
do_scrub_pending (void)
1621
0
{
1622
0
  size_t len = 0;
1623
0
  if (saved_input)
1624
0
    len += saved_input_len;
1625
0
  if (state == -1)
1626
0
    len += strlen (out_string);
1627
0
  return len;
1628
0
}