Coverage Report

Created: 2024-05-21 06:29

/src/binutils-gdb/gas/app.c
Line
Count
Source (jump to first uncovered line)
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2024 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
#else
49
4.00M
#define scrub_m68k_mri 0
50
#endif
51
52
#if defined TC_ARM && defined OBJ_ELF
53
/* The pseudo-op for which we need to special-case `@' characters.
54
   See the comment in do_scrub_chars.  */
55
static const char   symver_pseudo[] = ".symver";
56
static const char * symver_state;
57
#endif
58
59
static char last_char;
60
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
65
51.0M
#define LEX_IS_SYMBOL_COMPONENT   1
66
5.64M
#define LEX_IS_WHITESPACE   2
67
8.28M
#define LEX_IS_LINE_SEPARATOR   3
68
1.79M
#define LEX_IS_COMMENT_START    4
69
357k
#define LEX_IS_LINE_COMMENT_START 5
70
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
71
715k
#define LEX_IS_STRINGQUOTE    8
72
2.82M
#define LEX_IS_COLON      9
73
20.0M
#define LEX_IS_NEWLINE      10
74
225k
#define LEX_IS_ONECHAR_QUOTE    11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST   12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST    13
83
#endif
84
1.68M
#define LEX_IS_PARALLEL_SEPARATOR 14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H      15
87
#endif
88
1.91M
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
3.65M
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
90
3.40M
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
91
1.68M
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
3.40M
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
94
14.4M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
95
96
static int process_escape (int);
97
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
736
{
105
736
  const char *p;
106
736
  int c;
107
108
736
  lex[' '] = LEX_IS_WHITESPACE;
109
736
  lex['\t'] = LEX_IS_WHITESPACE;
110
736
  lex['\r'] = LEX_IS_WHITESPACE;
111
736
  lex['\n'] = LEX_IS_NEWLINE;
112
736
  lex[':'] = LEX_IS_COLON;
113
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
117
  if (! m68k_mri)
118
#endif
119
736
    {
120
736
      lex['"'] = LEX_IS_STRINGQUOTE;
121
122
736
#if ! defined (TC_HPPA)
123
736
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
124
736
#endif
125
126
#ifdef SINGLE_QUOTE_STRINGS
127
      lex['\''] = LEX_IS_STRINGQUOTE;
128
#endif
129
736
    }
130
131
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
132
     in state 5 of do_scrub_chars must be changed.  */
133
134
  /* Note that these override the previous defaults, e.g. if ';' is a
135
     comment char, then it isn't a line separator.  */
136
48.5k
  for (p = symbol_chars; *p; ++p)
137
47.8k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
138
139
94.9k
  for (c = 128; c < 256; ++c)
140
94.2k
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
141
142
736
#ifdef tc_symbol_chars
143
  /* This macro permits the processor to specify all characters which
144
     may appears in an operand.  This will prevent the scrubber from
145
     discarding meaningful whitespace in certain cases.  The i386
146
     backend uses this to support prefixes, which can confuse the
147
     scrubber as to whether it is parsing operands or opcodes.  */
148
5.88k
  for (p = tc_symbol_chars; *p; ++p)
149
5.15k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
150
736
#endif
151
152
  /* The m68k backend wants to be able to change comment_chars.  */
153
#ifndef tc_comment_chars
154
#define tc_comment_chars comment_chars
155
#endif
156
1.47k
  for (p = tc_comment_chars; *p; p++)
157
736
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
158
159
2.20k
  for (p = line_comment_chars; *p; p++)
160
1.47k
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
161
162
736
#ifndef tc_line_separator_chars
163
736
#define tc_line_separator_chars line_separator_chars
164
736
#endif
165
1.47k
  for (p = tc_line_separator_chars; *p; p++)
166
736
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
167
168
#ifdef tc_parallel_separator_chars
169
  /* This macro permits the processor to specify all characters which
170
     separate parallel insns on the same line.  */
171
  for (p = tc_parallel_separator_chars; *p; p++)
172
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
173
#endif
174
175
  /* Only allow slash-star comments if slash is not in use.
176
     FIXME: This isn't right.  We should always permit them.  */
177
736
  if (lex['/'] == 0)
178
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
179
180
#ifdef TC_M68K
181
  if (m68k_mri)
182
    {
183
      lex['\''] = LEX_IS_STRINGQUOTE;
184
      lex[';'] = LEX_IS_COMMENT_START;
185
      lex['*'] = LEX_IS_LINE_COMMENT_START;
186
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
187
   then it can't be used in an expression.  */
188
      lex['!'] = LEX_IS_LINE_COMMENT_START;
189
    }
190
#endif
191
192
#ifdef TC_V850
193
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
194
#endif
195
#ifdef DOUBLEBAR_PARALLEL
196
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
197
#endif
198
#ifdef TC_D30V
199
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
200
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
201
#endif
202
203
#ifdef H_TICK_HEX
204
  if (enable_h_tick_hex)
205
    {
206
      lex['h'] = LEX_IS_H;
207
      lex['H'] = LEX_IS_H;
208
    }
209
#endif
210
736
}
211
212
/* Saved state of the scrubber.  */
213
static int state;
214
static int old_state;
215
static const char *out_string;
216
static char out_buf[20];
217
static int add_newlines;
218
static char *saved_input;
219
static size_t saved_input_len;
220
static char input_buffer[32 * 1024];
221
static const char *mri_state;
222
static char mri_last_ch;
223
224
/* Data structure for saving the state of app across #include's.  Note that
225
   app is called asynchronously to the parsing of the .include's, so our
226
   state at the time .include is interpreted is completely unrelated.
227
   That's why we have to save it all.  */
228
229
struct app_save
230
{
231
  int          state;
232
  int          old_state;
233
  const char * out_string;
234
  char         out_buf[sizeof (out_buf)];
235
  int          add_newlines;
236
  char *       saved_input;
237
  size_t       saved_input_len;
238
#ifdef TC_M68K
239
  int          scrub_m68k_mri;
240
#endif
241
  const char * mri_state;
242
  char         mri_last_ch;
243
#if defined TC_ARM && defined OBJ_ELF
244
  const char * symver_state;
245
#endif
246
  char         last_char;
247
};
248
249
char *
250
app_push (void)
251
17.8k
{
252
17.8k
  struct app_save *saved;
253
254
17.8k
  saved = XNEW (struct app_save);
255
17.8k
  saved->state = state;
256
17.8k
  saved->old_state = old_state;
257
17.8k
  saved->out_string = out_string;
258
17.8k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
259
17.8k
  saved->add_newlines = add_newlines;
260
17.8k
  if (saved_input == NULL)
261
13.4k
    saved->saved_input = NULL;
262
4.46k
  else
263
4.46k
    {
264
4.46k
      saved->saved_input = XNEWVEC (char, saved_input_len);
265
4.46k
      memcpy (saved->saved_input, saved_input, saved_input_len);
266
4.46k
      saved->saved_input_len = saved_input_len;
267
4.46k
    }
268
#ifdef TC_M68K
269
  saved->scrub_m68k_mri = scrub_m68k_mri;
270
#endif
271
17.8k
  saved->mri_state = mri_state;
272
17.8k
  saved->mri_last_ch = mri_last_ch;
273
#if defined TC_ARM && defined OBJ_ELF
274
  saved->symver_state = symver_state;
275
#endif
276
17.8k
  saved->last_char = last_char;
277
278
  /* do_scrub_begin() is not useful, just wastes time.  */
279
280
17.8k
  state = 0;
281
17.8k
  saved_input = NULL;
282
17.8k
  add_newlines = 0;
283
284
17.8k
  return (char *) saved;
285
17.8k
}
286
287
void
288
app_pop (char *arg)
289
17.8k
{
290
17.8k
  struct app_save *saved = (struct app_save *) arg;
291
292
  /* There is no do_scrub_end ().  */
293
17.8k
  state = saved->state;
294
17.8k
  old_state = saved->old_state;
295
17.8k
  out_string = saved->out_string;
296
17.8k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
297
17.8k
  add_newlines = saved->add_newlines;
298
17.8k
  if (saved->saved_input == NULL)
299
13.4k
    saved_input = NULL;
300
4.46k
  else
301
4.46k
    {
302
4.46k
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
303
4.46k
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
304
4.46k
      saved_input = input_buffer;
305
4.46k
      saved_input_len = saved->saved_input_len;
306
4.46k
      free (saved->saved_input);
307
4.46k
    }
308
#ifdef TC_M68K
309
  scrub_m68k_mri = saved->scrub_m68k_mri;
310
#endif
311
17.8k
  mri_state = saved->mri_state;
312
17.8k
  mri_last_ch = saved->mri_last_ch;
313
#if defined TC_ARM && defined OBJ_ELF
314
  symver_state = saved->symver_state;
315
#endif
316
17.8k
  last_char = saved->last_char;
317
318
17.8k
  free (arg);
319
17.8k
}
320
321
/* @@ This assumes that \n &c are the same on host and target.  This is not
322
   necessarily true.  */
323
324
static int
325
process_escape (int ch)
326
969
{
327
969
  switch (ch)
328
969
    {
329
0
    case 'b':
330
0
      return '\b';
331
580
    case 'f':
332
580
      return '\f';
333
36
    case 'n':
334
36
      return '\n';
335
4
    case 'r':
336
4
      return '\r';
337
3
    case 't':
338
3
      return '\t';
339
39
    case '\'':
340
39
      return '\'';
341
1
    case '"':
342
1
      return '\"';
343
306
    default:
344
306
      return ch;
345
969
    }
346
969
}
347
348
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
349
static unsigned int multibyte_warn_count = 0;
350
351
bool
352
scan_for_multibyte_characters (const unsigned char *  start,
353
             const unsigned char *  end,
354
             bool                   warn)
355
0
{
356
0
  if (end <= start)
357
0
    return false;
358
359
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
360
0
    return false;
361
362
0
  bool found = false;
363
364
0
  while (start < end)
365
0
    {
366
0
      unsigned char c;
367
368
0
      if ((c = * start++) <= 0x7f)
369
0
  continue;
370
371
0
      if (!warn)
372
0
  return true;
373
374
0
      found = true;
375
376
0
      const char * filename;
377
0
      unsigned int lineno;
378
379
0
      filename = as_where (& lineno);
380
0
      if (filename == NULL)
381
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
382
0
      else if (lineno == 0)
383
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
384
0
      else
385
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
386
387
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
388
0
  {
389
0
    as_warn (_("further multibyte character warnings suppressed"));
390
0
    break;
391
0
  }
392
0
    }
393
394
0
  return found;
395
0
}
396
397
/* This function is called to process input characters.  The GET
398
   parameter is used to retrieve more input characters.  GET should
399
   set its parameter to point to a buffer, and return the length of
400
   the buffer; it should return 0 at end of file.  The scrubbed output
401
   characters are put into the buffer starting at TOSTART; the TOSTART
402
   buffer is TOLEN bytes in length.  The function returns the number
403
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
404
   end of file was seen.  This function is arranged as a state
405
   machine, and saves its state so that it may return at any point.
406
   This is the way the old code used to work.  */
407
408
size_t
409
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
410
11.2k
{
411
11.2k
  char *to = tostart;
412
11.2k
  char *toend = tostart + tolen;
413
11.2k
  char *from;
414
11.2k
  char *fromend;
415
11.2k
  size_t fromlen;
416
11.2k
  int ch, ch2 = 0;
417
  /* Character that started the string we're working on.  */
418
11.2k
  static char quotechar;
419
420
  /*State 0: beginning of normal line
421
    1: After first whitespace on line (flush more white)
422
    2: After first non-white (opcode) on line (keep 1white)
423
    3: after second white on line (into operands) (flush white)
424
    4: after putting out a .linefile, put out digits
425
    5: parsing a string, then go to old-state
426
    6: putting out \ escape in a "d string.
427
    7: no longer used
428
    8: no longer used
429
    9: After seeing symbol char in state 3 (keep 1white after symchar)
430
   10: After seeing whitespace in state 9 (keep white before symchar)
431
   11: After seeing a symbol character in state 0 (eg a label definition)
432
   -1: output string in out_string and go to the state in old_state
433
   -2: flush text until a '*' '/' is seen, then go to state old_state
434
#ifdef TC_V850
435
   12: After seeing a dash, looking for a second dash as a start
436
       of comment.
437
#endif
438
#ifdef DOUBLEBAR_PARALLEL
439
   13: After seeing a vertical bar, looking for a second
440
       vertical bar as a parallel expression separator.
441
#endif
442
#ifdef TC_PREDICATE_START_CHAR
443
   14: After seeing a predicate start character at state 0, looking
444
       for a predicate end character as predicate.
445
   15: After seeing a predicate start character at state 1, looking
446
       for a predicate end character as predicate.
447
#endif
448
#ifdef TC_Z80
449
   16: After seeing an 'a' or an 'A' at the start of a symbol
450
   17: After seeing an 'f' or an 'F' in state 16
451
#endif
452
    */
453
454
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
455
     constructs like ``.loc 1 20''.  This was turning into ``.loc
456
     120''.  States 9 and 10 ensure that a space is never dropped in
457
     between characters which could appear in an identifier.  Ian
458
     Taylor, ian@cygnus.com.
459
460
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
461
     correctly on the PA (and any other target where colons are optional).
462
     Jeff Law, law@cs.utah.edu.
463
464
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
465
     get squashed into "cmp r1,r2||trap#1", with the all important space
466
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
467
468
  /* This macro gets the next input character.  */
469
470
11.2k
#define GET()             \
471
58.2M
  (from < fromend            \
472
58.2M
   ? * (unsigned char *) (from++)        \
473
58.2M
   : (saved_input = NULL,          \
474
12.9k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
475
12.9k
      from = input_buffer,          \
476
12.9k
      fromend = from + fromlen,         \
477
12.9k
      (fromlen == 0            \
478
12.9k
       ? EOF              \
479
12.9k
       : * (unsigned char *) (from++))))
480
481
  /* This macro pushes a character back on the input stream.  */
482
483
2.05M
#define UNGET(uch) (*--from = (uch))
484
485
  /* This macro puts a character into the output buffer.  If this
486
     character fills the output buffer, this macro jumps to the label
487
     TOFULL.  We use this rather ugly approach because we need to
488
     handle two different termination conditions: EOF on the input
489
     stream, and a full output buffer.  It would be simpler if we
490
     always read in the entire input stream before processing it, but
491
     I don't want to make such a significant change to the assembler's
492
     memory usage.  */
493
494
11.2k
#define PUT(pch)        \
495
23.3M
  do            \
496
23.3M
    {           \
497
23.3M
      *to++ = (pch);        \
498
23.3M
      if (to >= toend)       \
499
23.3M
  goto tofull;       \
500
23.3M
    }           \
501
23.3M
  while (0)
502
503
11.2k
  if (saved_input != NULL)
504
1.82k
    {
505
1.82k
      from = saved_input;
506
1.82k
      fromend = from + saved_input_len;
507
1.82k
    }
508
9.43k
  else
509
9.43k
    {
510
9.43k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
511
9.43k
      if (fromlen == 0)
512
693
  return 0;
513
8.74k
      from = input_buffer;
514
8.74k
      fromend = from + fromlen;
515
516
8.74k
      if (multibyte_handling == multibyte_warn)
517
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
518
0
                (const unsigned char* ) fromend,
519
0
                true /* Generate warnings.  */);
520
8.74k
    }
521
522
20.3M
  while (1)
523
20.3M
    {
524
      /* The cases in this switch end with continue, in order to
525
   branch back to the top of this while loop and generate the
526
   next output character in the appropriate state.  */
527
20.3M
      switch (state)
528
20.3M
  {
529
588k
  case -1:
530
588k
    ch = *out_string++;
531
588k
    if (*out_string == '\0')
532
229k
      {
533
229k
        state = old_state;
534
229k
        old_state = 3;
535
229k
      }
536
588k
    PUT (ch);
537
588k
    continue;
538
539
588k
  case -2:
540
13.8k
    for (;;)
541
37.4k
      {
542
37.4k
        do
543
15.9M
    {
544
15.9M
      ch = GET ();
545
546
15.9M
      if (ch == EOF)
547
21
        {
548
21
          as_warn (_("end of file in comment"));
549
21
          goto fromeof;
550
21
        }
551
552
15.9M
      if (ch == '\n')
553
492k
        PUT ('\n');
554
15.9M
    }
555
15.9M
        while (ch != '*');
556
557
47.3k
        while ((ch = GET ()) == '*')
558
9.98k
    ;
559
560
37.3k
        if (ch == EOF)
561
1
    {
562
1
      as_warn (_("end of file in comment"));
563
1
      goto fromeof;
564
1
    }
565
566
37.3k
        if (ch == '/')
567
13.8k
    break;
568
569
23.5k
        UNGET (ch);
570
23.5k
      }
571
572
13.8k
    state = old_state;
573
13.8k
    UNGET (' ');
574
13.8k
    continue;
575
576
65.1k
  case 4:
577
65.1k
    ch = GET ();
578
65.1k
    if (ch == EOF)
579
30
      goto fromeof;
580
65.0k
    else if (ch >= '0' && ch <= '9')
581
42.9k
      PUT (ch);
582
22.1k
    else
583
22.1k
      {
584
22.1k
        while (ch != EOF && IS_WHITESPACE (ch))
585
10
    ch = GET ();
586
22.1k
        if (ch == '"')
587
16.8k
    {
588
16.8k
      quotechar = ch;
589
16.8k
      state = 5;
590
16.8k
      old_state = 3;
591
16.8k
      PUT (ch);
592
16.8k
    }
593
5.30k
        else
594
5.30k
    {
595
1.52M
      while (ch != EOF && ch != '\n')
596
1.52M
        ch = GET ();
597
5.30k
      state = 0;
598
5.30k
      PUT (ch);
599
5.30k
    }
600
22.1k
      }
601
65.0k
    continue;
602
603
2.86M
  case 5:
604
    /* We are going to copy everything up to a quote character,
605
       with special handling for a backslash.  We try to
606
       optimize the copying in the simple case without using the
607
       GET and PUT macros.  */
608
2.86M
    {
609
2.86M
      char *s;
610
2.86M
      ptrdiff_t len;
611
612
62.2M
      for (s = from; s < fromend; s++)
613
62.2M
        {
614
62.2M
    ch = *s;
615
62.2M
    if (ch == '\\'
616
62.2M
        || ch == quotechar
617
62.2M
        || ch == '\n')
618
2.86M
      break;
619
62.2M
        }
620
2.86M
      len = s - from;
621
2.86M
      if (len > toend - to)
622
733
        len = toend - to;
623
2.86M
      if (len > 0)
624
2.08M
        {
625
2.08M
    memcpy (to, from, len);
626
2.08M
    to += len;
627
2.08M
    from += len;
628
2.08M
    if (to >= toend)
629
872
      goto tofull;
630
2.08M
        }
631
2.86M
    }
632
633
2.86M
    ch = GET ();
634
2.86M
    if (ch == EOF)
635
370
      {
636
        /* This buffer is here specifically so
637
     that the UNGET below will work.  */
638
370
        static char one_char_buf[1];
639
640
370
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
641
370
        state = old_state;
642
370
        from = fromend = one_char_buf + 1;
643
370
        fromlen = 1;
644
370
        UNGET ('\n');
645
370
        PUT (quotechar);
646
370
      }
647
2.86M
    else if (ch == quotechar)
648
731k
      {
649
731k
        state = old_state;
650
731k
        PUT (ch);
651
731k
      }
652
2.13M
    else if (TC_STRING_ESCAPES && ch == '\\')
653
856k
      {
654
856k
        state = 6;
655
856k
        PUT (ch);
656
856k
      }
657
1.27M
    else if (scrub_m68k_mri && ch == '\n')
658
0
      {
659
        /* Just quietly terminate the string.  This permits lines like
660
       bne  label loop if we haven't reach end yet.  */
661
0
        state = old_state;
662
0
        UNGET (ch);
663
0
        PUT ('\'');
664
0
      }
665
1.27M
    else
666
1.27M
      {
667
1.27M
        PUT (ch);
668
1.27M
      }
669
2.86M
    continue;
670
671
2.86M
  case 6:
672
856k
    state = 5;
673
856k
    ch = GET ();
674
856k
    switch (ch)
675
856k
      {
676
        /* Handle strings broken across lines, by turning '\n' into
677
     '\\' and 'n'.  */
678
1.56k
      case '\n':
679
1.56k
        UNGET ('n');
680
1.56k
        add_newlines++;
681
1.56k
        PUT ('\\');
682
1.56k
        continue;
683
684
1.56k
      case EOF:
685
2
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
686
2
        PUT (quotechar);
687
1
        continue;
688
689
        /* These two are used inside macros.  */
690
40
      case '@':
691
77
      case '+':
692
77
        break;
693
694
6.14k
      case '"':
695
47.2k
      case '\\':
696
47.2k
      case 'b':
697
60.3k
      case 'f':
698
66.0k
      case 'n':
699
83.4k
      case 'r':
700
233k
      case 't':
701
233k
      case 'v':
702
236k
      case 'x':
703
239k
      case 'X':
704
246k
      case '0':
705
248k
      case '1':
706
248k
      case '2':
707
249k
      case '3':
708
249k
      case '4':
709
249k
      case '5':
710
250k
      case '6':
711
250k
      case '7':
712
250k
        break;
713
714
604k
      default:
715
#ifdef ONLY_STANDARD_ESCAPES
716
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
717
#endif
718
604k
        break;
719
856k
      }
720
854k
    PUT (ch);
721
854k
    continue;
722
723
#ifdef DOUBLEBAR_PARALLEL
724
  case 13:
725
    ch = GET ();
726
    if (ch != '|')
727
      abort ();
728
729
    /* Reset back to state 1 and pretend that we are parsing a
730
       line from just after the first white space.  */
731
    state = 1;
732
    PUT ('|');
733
#ifdef TC_TIC6X
734
    /* "||^" is used for SPMASKed instructions.  */
735
    ch = GET ();
736
    if (ch == EOF)
737
      goto fromeof;
738
    else if (ch == '^')
739
      PUT ('^');
740
    else
741
      UNGET (ch);
742
#endif
743
    continue;
744
#endif
745
#ifdef TC_Z80
746
  case 16:
747
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
748
    ch = GET ();
749
    if (ch == 'f' || ch == 'F')
750
      {
751
        state = 17;
752
        PUT (ch);
753
      }
754
    else
755
      {
756
        if (ch != EOF)
757
    UNGET (ch);
758
        state = 9;
759
        break;
760
      }
761
    /* Fall through.  */
762
  case 17:
763
    /* We have seen "af" at the start of a symbol,
764
       a ' here is a part of that symbol.  */
765
    ch = GET ();
766
    state = 9;
767
    if (ch == '\'')
768
      /* Change to avoid warning about unclosed string.  */
769
      PUT ('`');
770
    else if (ch != EOF)
771
      UNGET (ch);
772
    break;
773
#endif
774
20.3M
  }
775
776
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
777
778
      /* flushchar: */
779
15.9M
      ch = GET ();
780
781
#ifdef TC_PREDICATE_START_CHAR
782
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
783
  {
784
    state += 14;
785
    PUT (ch);
786
    continue;
787
  }
788
      else if (state == 14 || state == 15)
789
  {
790
    if (ch == TC_PREDICATE_END_CHAR)
791
      {
792
        state -= 14;
793
        PUT (ch);
794
        ch = GET ();
795
      }
796
    else
797
      {
798
        PUT (ch);
799
        continue;
800
      }
801
  }
802
#endif
803
804
17.2M
    recycle:
805
806
#if defined TC_ARM && defined OBJ_ELF
807
      /* We need to watch out for .symver directives.  See the comment later
808
   in this function.  */
809
      if (symver_state == NULL)
810
  {
811
    if ((state == 0 || state == 1) && ch == symver_pseudo[0])
812
      symver_state = symver_pseudo + 1;
813
  }
814
      else
815
  {
816
    /* We advance to the next state if we find the right
817
       character.  */
818
    if (ch != '\0' && (*symver_state == ch))
819
      ++symver_state;
820
    else if (*symver_state != '\0')
821
      /* We did not get the expected character, or we didn't
822
         get a valid terminating character after seeing the
823
         entire pseudo-op, so we must go back to the beginning.  */
824
      symver_state = NULL;
825
    else
826
      {
827
        /* We've read the entire pseudo-op.  If this is the end
828
     of the line, go back to the beginning.  */
829
        if (IS_NEWLINE (ch))
830
    symver_state = NULL;
831
      }
832
  }
833
#endif /* TC_ARM && OBJ_ELF */
834
835
#ifdef TC_M68K
836
      /* We want to have pseudo-ops which control whether we are in
837
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
838
   the scrubber, that means that we need a special purpose
839
   recognizer here.  */
840
      if (mri_state == NULL)
841
  {
842
    if ((state == 0 || state == 1)
843
        && ch == mri_pseudo[0])
844
      mri_state = mri_pseudo + 1;
845
  }
846
      else
847
  {
848
    /* We advance to the next state if we find the right
849
       character, or if we need a space character and we get any
850
       whitespace character, or if we need a '0' and we get a
851
       '1' (this is so that we only need one state to handle
852
       ``.mri 0'' and ``.mri 1'').  */
853
    if (ch != '\0'
854
        && (*mri_state == ch
855
      || (*mri_state == ' '
856
          && lex[ch] == LEX_IS_WHITESPACE)
857
      || (*mri_state == '0'
858
          && ch == '1')))
859
      {
860
        mri_last_ch = ch;
861
        ++mri_state;
862
      }
863
    else if (*mri_state != '\0'
864
       || (lex[ch] != LEX_IS_WHITESPACE
865
           && lex[ch] != LEX_IS_NEWLINE))
866
      {
867
        /* We did not get the expected character, or we didn't
868
     get a valid terminating character after seeing the
869
     entire pseudo-op, so we must go back to the
870
     beginning.  */
871
        mri_state = NULL;
872
      }
873
    else
874
      {
875
        /* We've read the entire pseudo-op.  mips_last_ch is
876
     either '0' or '1' indicating whether to enter or
877
     leave MRI mode.  */
878
        do_scrub_begin (mri_last_ch == '1');
879
        mri_state = NULL;
880
881
        /* We continue handling the character as usual.  The
882
     main gas reader must also handle the .mri pseudo-op
883
     to control expression parsing and the like.  */
884
      }
885
  }
886
#endif
887
888
17.2M
      if (ch == EOF)
889
8.40k
  {
890
8.40k
    if (state != 0)
891
556
      {
892
556
        as_warn (_("end of file not at end of a line; newline inserted"));
893
556
        state = 0;
894
556
        PUT ('\n');
895
556
      }
896
8.40k
    goto fromeof;
897
8.40k
  }
898
899
17.2M
      switch (lex[ch])
900
17.2M
  {
901
1.98M
  case LEX_IS_WHITESPACE:
902
1.98M
    do
903
3.49M
      {
904
3.49M
        ch = GET ();
905
3.49M
      }
906
3.49M
    while (ch != EOF && IS_WHITESPACE (ch));
907
1.98M
    if (ch == EOF)
908
22
      goto fromeof;
909
910
1.98M
    if (state == 0)
911
282k
      {
912
        /* Preserve a single whitespace character at the
913
     beginning of a line.  */
914
282k
        state = 1;
915
282k
        UNGET (ch);
916
282k
        PUT (' ');
917
282k
        break;
918
282k
      }
919
920
#ifdef KEEP_WHITE_AROUND_COLON
921
    if (lex[ch] == LEX_IS_COLON)
922
      {
923
        /* Only keep this white if there's no white *after* the
924
     colon.  */
925
        ch2 = GET ();
926
        if (ch2 != EOF)
927
    UNGET (ch2);
928
        if (!IS_WHITESPACE (ch2))
929
    {
930
      state = 9;
931
      UNGET (ch);
932
      PUT (' ');
933
      break;
934
    }
935
      }
936
#endif
937
1.70M
    if (IS_COMMENT (ch)
938
1.70M
        || IS_LINE_SEPARATOR (ch)
939
1.70M
        || IS_PARALLEL_SEPARATOR (ch))
940
21.1k
      {
941
21.1k
        if (scrub_m68k_mri)
942
0
    {
943
      /* In MRI mode, we keep these spaces.  */
944
0
      UNGET (ch);
945
0
      PUT (' ');
946
0
      break;
947
0
    }
948
21.1k
        goto recycle;
949
21.1k
      }
950
951
    /* If we're in state 2 or 11, we've seen a non-white
952
       character followed by whitespace.  If the next character
953
       is ':', this is whitespace after a label name which we
954
       normally must ignore.  In MRI mode, though, spaces are
955
       not permitted between the label and the colon.  */
956
1.68M
    if ((state == 2 || state == 11)
957
1.68M
        && lex[ch] == LEX_IS_COLON
958
1.68M
        && ! scrub_m68k_mri)
959
3.74k
      {
960
3.74k
        state = 1;
961
3.74k
        PUT (ch);
962
3.74k
        break;
963
3.74k
      }
964
965
1.67M
    switch (state)
966
1.67M
      {
967
32.2k
      case 1:
968
        /* We can arrive here if we leave a leading whitespace
969
     character at the beginning of a line.  */
970
32.2k
        goto recycle;
971
215k
      case 2:
972
215k
        state = 3;
973
215k
        if (to + 1 < toend)
974
215k
    {
975
      /* Optimize common case by skipping UNGET/GET.  */
976
215k
      PUT (' '); /* Sp after opco */
977
215k
      goto recycle;
978
215k
    }
979
5
        UNGET (ch);
980
5
        PUT (' ');
981
0
        break;
982
116k
      case 3:
983
116k
#ifndef TC_KEEP_OPERAND_SPACES
984
        /* For TI C6X, we keep these spaces as they may separate
985
     functional unit specifiers from operands.  */
986
116k
        if (scrub_m68k_mri)
987
0
#endif
988
0
    {
989
      /* In MRI mode, we keep these spaces.  */
990
0
      UNGET (ch);
991
0
      PUT (' ');
992
0
      break;
993
0
    }
994
116k
        goto recycle; /* Sp in operands */
995
883k
      case 9:
996
883k
      case 10:
997
883k
#ifndef TC_KEEP_OPERAND_SPACES
998
883k
        if (scrub_m68k_mri)
999
0
#endif
1000
0
    {
1001
      /* In MRI mode, we keep these spaces.  */
1002
0
      state = 3;
1003
0
      UNGET (ch);
1004
0
      PUT (' ');
1005
0
      break;
1006
0
    }
1007
883k
        state = 10; /* Sp after symbol char */
1008
883k
        goto recycle;
1009
431k
      case 11:
1010
431k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1011
0
    state = 1;
1012
431k
        else
1013
431k
    {
1014
      /* We know that ch is not ':', since we tested that
1015
         case above.  Therefore this is not a label, so it
1016
         must be the opcode, and we've just seen the
1017
         whitespace after it.  */
1018
431k
      state = 3;
1019
431k
    }
1020
431k
        UNGET (ch);
1021
431k
        PUT (' '); /* Sp after label definition.  */
1022
431k
        break;
1023
431k
      default:
1024
0
        BAD_CASE (state);
1025
1.67M
      }
1026
431k
    break;
1027
1028
431k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1029
0
    ch2 = GET ();
1030
0
    if (ch2 == '*')
1031
0
      {
1032
0
        for (;;)
1033
0
    {
1034
0
      do
1035
0
        {
1036
0
          ch2 = GET ();
1037
0
          if (ch2 != EOF && IS_NEWLINE (ch2))
1038
0
      add_newlines++;
1039
0
        }
1040
0
      while (ch2 != EOF && ch2 != '*');
1041
1042
0
      while (ch2 == '*')
1043
0
        ch2 = GET ();
1044
1045
0
      if (ch2 == EOF || ch2 == '/')
1046
0
        break;
1047
1048
      /* This UNGET will ensure that we count newlines
1049
         correctly.  */
1050
0
      UNGET (ch2);
1051
0
    }
1052
1053
0
        if (ch2 == EOF)
1054
0
    as_warn (_("end of file in multiline comment"));
1055
1056
0
        ch = ' ';
1057
0
        goto recycle;
1058
0
      }
1059
#ifdef DOUBLESLASH_LINE_COMMENTS
1060
    else if (ch2 == '/')
1061
      {
1062
        do
1063
    {
1064
      ch = GET ();
1065
    }
1066
        while (ch != EOF && !IS_NEWLINE (ch));
1067
        if (ch == EOF)
1068
    as_warn ("end of file in comment; newline inserted");
1069
        state = 0;
1070
        PUT ('\n');
1071
        break;
1072
      }
1073
#endif
1074
0
    else
1075
0
      {
1076
0
        if (ch2 != EOF)
1077
0
    UNGET (ch2);
1078
0
        if (state == 9 || state == 10)
1079
0
    state = 3;
1080
0
        PUT (ch);
1081
0
      }
1082
0
    break;
1083
1084
714k
  case LEX_IS_STRINGQUOTE:
1085
714k
    quotechar = ch;
1086
714k
    if (state == 10)
1087
265k
      {
1088
        /* Preserve the whitespace in foo "bar".  */
1089
265k
        UNGET (ch);
1090
265k
        state = 3;
1091
265k
        PUT (' ');
1092
1093
        /* PUT didn't jump out.  We could just break, but we
1094
     know what will happen, so optimize a bit.  */
1095
265k
        ch = GET ();
1096
265k
        old_state = 9;
1097
265k
      }
1098
448k
    else if (state == 3)
1099
45.4k
      old_state = 9;
1100
403k
    else
1101
403k
      old_state = state;
1102
714k
    state = 5;
1103
714k
    PUT (ch);
1104
714k
    break;
1105
1106
714k
  case LEX_IS_ONECHAR_QUOTE:
1107
#ifdef H_TICK_HEX
1108
    if (state == 9 && enable_h_tick_hex)
1109
      {
1110
        char c;
1111
1112
        c = GET ();
1113
        as_warn ("'%c found after symbol", c);
1114
        UNGET (c);
1115
      }
1116
#endif
1117
224k
    if (state == 10)
1118
999
      {
1119
        /* Preserve the whitespace in foo 'b'.  */
1120
999
        UNGET (ch);
1121
999
        state = 3;
1122
999
        PUT (' ');
1123
999
        break;
1124
999
      }
1125
223k
    ch = GET ();
1126
223k
    if (ch == EOF)
1127
2
      {
1128
2
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1129
2
        ch = 0;
1130
2
      }
1131
223k
    if (ch == '\\')
1132
969
      {
1133
969
        ch = GET ();
1134
969
        if (ch == EOF)
1135
0
    {
1136
0
      as_warn (_("end of file in escape character"));
1137
0
      ch = '\\';
1138
0
    }
1139
969
        else
1140
969
    ch = process_escape (ch);
1141
969
      }
1142
223k
    sprintf (out_buf, "%d", (int) (unsigned char) ch);
1143
1144
    /* None of these 'x constants for us.  We want 'x'.  */
1145
223k
    if ((ch = GET ()) != '\'')
1146
214k
      {
1147
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1148
        as_warn (_("missing close quote; (assumed)"));
1149
#else
1150
214k
        if (ch != EOF)
1151
214k
    UNGET (ch);
1152
214k
#endif
1153
214k
      }
1154
223k
    if (strlen (out_buf) == 1)
1155
16.2k
      {
1156
16.2k
        PUT (out_buf[0]);
1157
16.2k
        break;
1158
16.2k
      }
1159
207k
    if (state == 9)
1160
11.0k
      old_state = 3;
1161
196k
    else
1162
196k
      old_state = state;
1163
207k
    state = -1;
1164
207k
    out_string = out_buf;
1165
207k
    PUT (*out_string++);
1166
207k
    break;
1167
1168
490k
  case LEX_IS_COLON:
1169
#ifdef KEEP_WHITE_AROUND_COLON
1170
    state = 9;
1171
#else
1172
490k
    if (state == 9 || state == 10)
1173
29.6k
      state = 3;
1174
460k
    else if (state != 3)
1175
447k
      state = 1;
1176
490k
#endif
1177
490k
    PUT (ch);
1178
490k
    break;
1179
1180
5.63M
  case LEX_IS_NEWLINE:
1181
    /* Roll out a bunch of newlines from inside comments, etc.  */
1182
5.63M
    if (add_newlines)
1183
1.56k
      {
1184
1.56k
        --add_newlines;
1185
1.56k
        UNGET (ch);
1186
1.56k
      }
1187
    /* Fall through.  */
1188
1189
6.58M
  case LEX_IS_LINE_SEPARATOR:
1190
6.58M
    state = 0;
1191
6.58M
    PUT (ch);
1192
6.58M
    break;
1193
1194
6.58M
  case LEX_IS_PARALLEL_SEPARATOR:
1195
0
    state = 1;
1196
0
    PUT (ch);
1197
0
    break;
1198
1199
#ifdef TC_V850
1200
  case LEX_IS_DOUBLEDASH_1ST:
1201
    ch2 = GET ();
1202
    if (ch2 != '-')
1203
      {
1204
        if (ch2 != EOF)
1205
    UNGET (ch2);
1206
        goto de_fault;
1207
      }
1208
    /* Read and skip to end of line.  */
1209
    do
1210
      {
1211
        ch = GET ();
1212
      }
1213
    while (ch != EOF && ch != '\n');
1214
1215
    if (ch == EOF)
1216
      as_warn (_("end of file in comment; newline inserted"));
1217
1218
    state = 0;
1219
    PUT ('\n');
1220
    break;
1221
#endif
1222
#ifdef DOUBLEBAR_PARALLEL
1223
  case LEX_IS_DOUBLEBAR_1ST:
1224
    ch2 = GET ();
1225
    if (ch2 != EOF)
1226
      UNGET (ch2);
1227
    if (ch2 != '|')
1228
      goto de_fault;
1229
1230
    /* Handle '||' in two states as invoking PUT twice might
1231
       result in the first one jumping out of this loop.  We'd
1232
       then lose track of the state and one '|' char.  */
1233
    state = 13;
1234
    PUT ('|');
1235
    break;
1236
#endif
1237
356k
  case LEX_IS_LINE_COMMENT_START:
1238
    /* FIXME-someday: The two character comment stuff was badly
1239
       thought out.  On i386, we want '/' as line comment start
1240
       AND we want C style comments.  hence this hack.  The
1241
       whole lexical process should be reworked.  xoxorich.  */
1242
356k
    if (ch == '/')
1243
143k
      {
1244
143k
        ch2 = GET ();
1245
143k
        if (ch2 == '*')
1246
13.8k
    {
1247
13.8k
      old_state = 3;
1248
13.8k
      state = -2;
1249
13.8k
      break;
1250
13.8k
    }
1251
129k
        else if (ch2 != EOF)
1252
129k
    {
1253
129k
      UNGET (ch2);
1254
129k
    }
1255
143k
      }
1256
1257
342k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1258
135k
      {
1259
135k
        int startch;
1260
1261
135k
        startch = ch;
1262
1263
135k
        do
1264
138k
    {
1265
138k
      ch = GET ();
1266
138k
    }
1267
138k
        while (ch != EOF && IS_WHITESPACE (ch));
1268
1269
135k
        if (ch == EOF)
1270
5
    {
1271
5
      as_warn (_("end of file in comment; newline inserted"));
1272
5
      PUT ('\n');
1273
5
      break;
1274
5
    }
1275
1276
135k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1277
113k
    {
1278
      /* Not a cpp line.  */
1279
2.33M
      while (ch != EOF && !IS_NEWLINE (ch))
1280
2.22M
        ch = GET ();
1281
113k
      if (ch == EOF)
1282
78
        {
1283
78
          as_warn (_("end of file in comment; newline inserted"));
1284
78
          PUT ('\n');
1285
78
        }
1286
112k
      else /* IS_NEWLINE (ch) */
1287
112k
        {
1288
          /* To process non-zero add_newlines.  */
1289
112k
          UNGET (ch);
1290
112k
        }
1291
113k
      state = 0;
1292
113k
      break;
1293
113k
    }
1294
        /* Looks like `# 123 "filename"' from cpp.  */
1295
22.1k
        UNGET (ch);
1296
22.1k
        old_state = 4;
1297
22.1k
        state = -1;
1298
22.1k
        if (scrub_m68k_mri)
1299
0
    out_string = "\tlinefile ";
1300
22.1k
        else
1301
22.1k
    out_string = "\t.linefile ";
1302
22.1k
        PUT (*out_string++);
1303
22.1k
        break;
1304
22.1k
      }
1305
1306
#ifdef TC_D10V
1307
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1308
       Trap is the only short insn that has a first operand that is
1309
       neither register nor label.
1310
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1311
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1312
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1313
       only character in line_comment_chars for d10v, hence we
1314
       can recognize it as such.  */
1315
    /* An alternative approach would be to reset the state to 1 when
1316
       we see '||', '<'- or '->', but that seems to be overkill.  */
1317
    if (state == 10)
1318
      PUT (' ');
1319
#endif
1320
    /* We have a line comment character which is not at the
1321
       start of a line.  If this is also a normal comment
1322
       character, fall through.  Otherwise treat it as a default
1323
       character.  */
1324
207k
    if (strchr (tc_comment_chars, ch) == NULL
1325
207k
        && (! scrub_m68k_mri
1326
118k
      || (ch != '!' && ch != '*')))
1327
118k
      goto de_fault;
1328
88.3k
    if (scrub_m68k_mri
1329
88.3k
        && (ch == '!' || ch == '*' || ch == '#')
1330
88.3k
        && state != 1
1331
88.3k
        && state != 10)
1332
0
      goto de_fault;
1333
    /* Fall through.  */
1334
88.3k
  case LEX_IS_COMMENT_START:
1335
#if defined TC_ARM && defined OBJ_ELF
1336
    /* On the ARM, `@' is the comment character.
1337
       Unfortunately this is also a special character in ELF .symver
1338
       directives (and .type, though we deal with those another way).
1339
       So we check if this line is such a directive, and treat
1340
       the character as default if so.  This is a hack.  */
1341
    if ((symver_state != NULL) && (*symver_state == 0))
1342
      goto de_fault;
1343
#endif
1344
1345
    /* Care is needed not to damage occurrences of \<comment-char>
1346
       by stripping the <comment-char> onwards.  Yuck.  */
1347
88.3k
    if ((to > tostart ? to[-1] : last_char) == '\\')
1348
      /* Do not treat the <comment-char> as a start-of-comment.  */
1349
93
      goto de_fault;
1350
1351
#ifdef WARN_COMMENTS
1352
    if (!found_comment)
1353
      found_comment_file = as_where (&found_comment);
1354
#endif
1355
88.2k
    do
1356
12.0M
      {
1357
12.0M
        ch = GET ();
1358
12.0M
      }
1359
12.0M
    while (ch != EOF && !IS_NEWLINE (ch));
1360
88.2k
    if (ch == EOF)
1361
62
      as_warn (_("end of file in comment; newline inserted"));
1362
88.2k
    state = 0;
1363
88.2k
    PUT ('\n');
1364
88.2k
    break;
1365
1366
#ifdef H_TICK_HEX
1367
  case LEX_IS_H:
1368
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1369
       the H' with 0x to make them gas-style hex characters.  */
1370
    if (enable_h_tick_hex)
1371
      {
1372
        char quot;
1373
1374
        quot = GET ();
1375
        if (quot == '\'')
1376
    {
1377
      UNGET ('x');
1378
      ch = '0';
1379
    }
1380
        else
1381
    UNGET (quot);
1382
      }
1383
#endif
1384
    /* Fall through.  */
1385
1386
4.27M
  case LEX_IS_SYMBOL_COMPONENT:
1387
4.27M
    if (state == 10)
1388
556k
      {
1389
        /* This is a symbol character following another symbol
1390
     character, with whitespace in between.  We skipped
1391
     the whitespace earlier, so output it now.  */
1392
556k
        UNGET (ch);
1393
556k
        state = 3;
1394
556k
        PUT (' ');
1395
556k
        break;
1396
556k
      }
1397
1398
#ifdef TC_Z80
1399
    /* "af'" is a symbol containing '\''.  */
1400
    if (state == 3 && (ch == 'a' || ch == 'A'))
1401
      {
1402
        state = 16;
1403
        PUT (ch);
1404
        ch = GET ();
1405
        if (ch == 'f' || ch == 'F')
1406
    {
1407
      state = 17;
1408
      PUT (ch);
1409
      break;
1410
    }
1411
        else
1412
    {
1413
      state = 9;
1414
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1415
        {
1416
          if (ch != EOF)
1417
      UNGET (ch);
1418
          break;
1419
        }
1420
    }
1421
      }
1422
#endif
1423
3.72M
    if (state == 3)
1424
1.34M
      state = 9;
1425
1426
    /* This is a common case.  Quickly copy CH and all the
1427
       following symbol component or normal characters.  */
1428
3.72M
    if (to + 1 < toend
1429
3.72M
        && mri_state == NULL
1430
#if defined TC_ARM && defined OBJ_ELF
1431
        && symver_state == NULL
1432
#endif
1433
3.72M
        )
1434
3.72M
      {
1435
3.72M
        char *s;
1436
3.72M
        ptrdiff_t len;
1437
1438
53.1M
        for (s = from; s < fromend; s++)
1439
53.1M
    {
1440
53.1M
      int type;
1441
1442
53.1M
      ch2 = *(unsigned char *) s;
1443
53.1M
      type = lex[ch2];
1444
53.1M
      if (type != 0
1445
53.1M
          && type != LEX_IS_SYMBOL_COMPONENT)
1446
3.72M
        break;
1447
53.1M
    }
1448
1449
3.72M
        if (s > from)
1450
    /* Handle the last character normally, for
1451
       simplicity.  */
1452
2.48M
    --s;
1453
1454
3.72M
        len = s - from;
1455
1456
3.72M
        if (len > (toend - to) - 1)
1457
579
    len = (toend - to) - 1;
1458
1459
3.72M
        if (len > 0)
1460
2.11M
    {
1461
2.11M
      PUT (ch);
1462
2.11M
      memcpy (to, from, len);
1463
2.11M
      to += len;
1464
2.11M
      from += len;
1465
2.11M
      if (to >= toend)
1466
611
        goto tofull;
1467
2.11M
      ch = GET ();
1468
2.11M
    }
1469
3.72M
      }
1470
1471
    /* Fall through.  */
1472
6.34M
  default:
1473
6.46M
  de_fault:
1474
    /* Some relatively `normal' character.  */
1475
6.46M
    if (state == 0)
1476
1.17M
      {
1477
1.17M
        state = 11; /* Now seeing label definition.  */
1478
1.17M
      }
1479
5.29M
    else if (state == 1)
1480
399k
      {
1481
399k
        state = 2;  /* Ditto.  */
1482
399k
      }
1483
4.89M
    else if (state == 9)
1484
1.91M
      {
1485
1.91M
        if (!IS_SYMBOL_COMPONENT (ch))
1486
179k
    state = 3;
1487
1.91M
      }
1488
2.98M
    else if (state == 10)
1489
48.1k
      {
1490
48.1k
        if (ch == '\\')
1491
209
    {
1492
      /* Special handling for backslash: a backslash may
1493
         be the beginning of a formal parameter (of a
1494
         macro) following another symbol character, with
1495
         whitespace in between.  If that is the case, we
1496
         output a space before the parameter.  Strictly
1497
         speaking, correct handling depends upon what the
1498
         macro parameter expands into; if the parameter
1499
         expands into something which does not start with
1500
         an operand character, then we don't want to keep
1501
         the space.  We don't have enough information to
1502
         make the right choice, so here we are making the
1503
         choice which is more likely to be correct.  */
1504
209
      if (to + 1 >= toend)
1505
1
        {
1506
          /* If we're near the end of the buffer, save the
1507
             character for the next time round.  Otherwise
1508
             we'll lose our state.  */
1509
1
          UNGET (ch);
1510
1
          goto tofull;
1511
1
        }
1512
208
      *to++ = ' ';
1513
208
    }
1514
1515
48.1k
        state = 3;
1516
48.1k
      }
1517
6.46M
    PUT (ch);
1518
6.46M
    break;
1519
17.2M
  }
1520
17.2M
    }
1521
1522
  /*NOTREACHED*/
1523
1524
8.48k
 fromeof:
1525
  /* We have reached the end of the input.  */
1526
8.48k
  if (to > tostart)
1527
8.48k
    last_char = to[-1];
1528
8.48k
  return to - tostart;
1529
1530
2.08k
 tofull:
1531
  /* The output buffer is full.  Save any input we have not yet
1532
     processed.  */
1533
2.08k
  if (fromend > from)
1534
1.82k
    {
1535
1.82k
      saved_input = from;
1536
1.82k
      saved_input_len = fromend - from;
1537
1.82k
    }
1538
263
  else
1539
263
    saved_input = NULL;
1540
1541
2.08k
  if (to > tostart)
1542
2.08k
    last_char = to[-1];
1543
2.08k
  return to - tostart;
1544
10.5k
}
1545
1546
/* Return amount of pending input.  */
1547
1548
size_t
1549
do_scrub_pending (void)
1550
26.7k
{
1551
26.7k
  size_t len = 0;
1552
26.7k
  if (saved_input)
1553
10
    len += saved_input_len;
1554
26.7k
  if (state == -1)
1555
5
    len += strlen (out_string);
1556
26.7k
  return len;
1557
26.7k
}