Coverage Report

Created: 2023-08-28 06:31

/src/binutils-gdb/gas/app.c
Line
Count
Source (jump to first uncovered line)
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2023 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
#else
49
8.40M
#define scrub_m68k_mri 0
50
#endif
51
52
#if defined TC_ARM && defined OBJ_ELF
53
/* The pseudo-op for which we need to special-case `@' characters.
54
   See the comment in do_scrub_chars.  */
55
static const char   symver_pseudo[] = ".symver";
56
static const char * symver_state;
57
#endif
58
59
static char last_char;
60
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
65
86.0M
#define LEX_IS_SYMBOL_COMPONENT   1
66
9.65M
#define LEX_IS_WHITESPACE   2
67
9.08M
#define LEX_IS_LINE_SEPARATOR   3
68
3.52M
#define LEX_IS_COMMENT_START    4
69
564k
#define LEX_IS_LINE_COMMENT_START 5
70
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
71
1.25M
#define LEX_IS_STRINGQUOTE    8
72
6.64M
#define LEX_IS_COLON      9
73
9.36M
#define LEX_IS_NEWLINE      10
74
1.26M
#define LEX_IS_ONECHAR_QUOTE    11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST   12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST    13
83
#endif
84
3.29M
#define LEX_IS_PARALLEL_SEPARATOR 14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H      15
87
#endif
88
4.74M
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
5.22M
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
90
6.67M
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
91
3.29M
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
6.67M
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
94
5.69M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
95
96
static int process_escape (int);
97
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
1.15k
{
105
1.15k
  const char *p;
106
1.15k
  int c;
107
108
1.15k
  lex[' '] = LEX_IS_WHITESPACE;
109
1.15k
  lex['\t'] = LEX_IS_WHITESPACE;
110
1.15k
  lex['\r'] = LEX_IS_WHITESPACE;
111
1.15k
  lex['\n'] = LEX_IS_NEWLINE;
112
1.15k
  lex[':'] = LEX_IS_COLON;
113
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
117
  if (! m68k_mri)
118
#endif
119
1.15k
    {
120
1.15k
      lex['"'] = LEX_IS_STRINGQUOTE;
121
122
1.15k
#if ! defined (TC_HPPA)
123
1.15k
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
124
1.15k
#endif
125
126
#ifdef SINGLE_QUOTE_STRINGS
127
      lex['\''] = LEX_IS_STRINGQUOTE;
128
#endif
129
1.15k
    }
130
131
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
132
     in state 5 of do_scrub_chars must be changed.  */
133
134
  /* Note that these override the previous defaults, e.g. if ';' is a
135
     comment char, then it isn't a line separator.  */
136
76.0k
  for (p = symbol_chars; *p; ++p)
137
74.9k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
138
139
148k
  for (c = 128; c < 256; ++c)
140
147k
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
141
142
1.15k
#ifdef tc_symbol_chars
143
  /* This macro permits the processor to specify all characters which
144
     may appears in an operand.  This will prevent the scrubber from
145
     discarding meaningful whitespace in certain cases.  The i386
146
     backend uses this to support prefixes, which can confuse the
147
     scrubber as to whether it is parsing operands or opcodes.  */
148
9.22k
  for (p = tc_symbol_chars; *p; ++p)
149
8.07k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
150
1.15k
#endif
151
152
  /* The m68k backend wants to be able to change comment_chars.  */
153
#ifndef tc_comment_chars
154
#define tc_comment_chars comment_chars
155
#endif
156
2.30k
  for (p = tc_comment_chars; *p; p++)
157
1.15k
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
158
159
3.45k
  for (p = line_comment_chars; *p; p++)
160
2.30k
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
161
162
1.15k
#ifndef tc_line_separator_chars
163
1.15k
#define tc_line_separator_chars line_separator_chars
164
1.15k
#endif
165
2.30k
  for (p = tc_line_separator_chars; *p; p++)
166
1.15k
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
167
168
#ifdef tc_parallel_separator_chars
169
  /* This macro permits the processor to specify all characters which
170
     separate parallel insns on the same line.  */
171
  for (p = tc_parallel_separator_chars; *p; p++)
172
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
173
#endif
174
175
  /* Only allow slash-star comments if slash is not in use.
176
     FIXME: This isn't right.  We should always permit them.  */
177
1.15k
  if (lex['/'] == 0)
178
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
179
180
#ifdef TC_M68K
181
  if (m68k_mri)
182
    {
183
      lex['\''] = LEX_IS_STRINGQUOTE;
184
      lex[';'] = LEX_IS_COMMENT_START;
185
      lex['*'] = LEX_IS_LINE_COMMENT_START;
186
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
187
   then it can't be used in an expression.  */
188
      lex['!'] = LEX_IS_LINE_COMMENT_START;
189
    }
190
#endif
191
192
#ifdef TC_V850
193
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
194
#endif
195
#ifdef DOUBLEBAR_PARALLEL
196
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
197
#endif
198
#ifdef TC_D30V
199
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
200
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
201
#endif
202
203
#ifdef H_TICK_HEX
204
  if (enable_h_tick_hex)
205
    {
206
      lex['h'] = LEX_IS_H;
207
      lex['H'] = LEX_IS_H;
208
    }
209
#endif
210
1.15k
}
211
212
/* Saved state of the scrubber.  */
213
static int state;
214
static int old_state;
215
static const char *out_string;
216
static char out_buf[20];
217
static int add_newlines;
218
static char *saved_input;
219
static size_t saved_input_len;
220
static char input_buffer[32 * 1024];
221
static const char *mri_state;
222
static char mri_last_ch;
223
224
/* Data structure for saving the state of app across #include's.  Note that
225
   app is called asynchronously to the parsing of the .include's, so our
226
   state at the time .include is interpreted is completely unrelated.
227
   That's why we have to save it all.  */
228
229
struct app_save
230
{
231
  int          state;
232
  int          old_state;
233
  const char * out_string;
234
  char         out_buf[sizeof (out_buf)];
235
  int          add_newlines;
236
  char *       saved_input;
237
  size_t       saved_input_len;
238
#ifdef TC_M68K
239
  int          scrub_m68k_mri;
240
#endif
241
  const char * mri_state;
242
  char         mri_last_ch;
243
#if defined TC_ARM && defined OBJ_ELF
244
  const char * symver_state;
245
#endif
246
  char         last_char;
247
};
248
249
char *
250
app_push (void)
251
76.1k
{
252
76.1k
  struct app_save *saved;
253
254
76.1k
  saved = XNEW (struct app_save);
255
76.1k
  saved->state = state;
256
76.1k
  saved->old_state = old_state;
257
76.1k
  saved->out_string = out_string;
258
76.1k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
259
76.1k
  saved->add_newlines = add_newlines;
260
76.1k
  if (saved_input == NULL)
261
64.7k
    saved->saved_input = NULL;
262
11.4k
  else
263
11.4k
    {
264
11.4k
      saved->saved_input = XNEWVEC (char, saved_input_len);
265
11.4k
      memcpy (saved->saved_input, saved_input, saved_input_len);
266
11.4k
      saved->saved_input_len = saved_input_len;
267
11.4k
    }
268
#ifdef TC_M68K
269
  saved->scrub_m68k_mri = scrub_m68k_mri;
270
#endif
271
76.1k
  saved->mri_state = mri_state;
272
76.1k
  saved->mri_last_ch = mri_last_ch;
273
#if defined TC_ARM && defined OBJ_ELF
274
  saved->symver_state = symver_state;
275
#endif
276
76.1k
  saved->last_char = last_char;
277
278
  /* do_scrub_begin() is not useful, just wastes time.  */
279
280
76.1k
  state = 0;
281
76.1k
  saved_input = NULL;
282
76.1k
  add_newlines = 0;
283
284
76.1k
  return (char *) saved;
285
76.1k
}
286
287
void
288
app_pop (char *arg)
289
76.1k
{
290
76.1k
  struct app_save *saved = (struct app_save *) arg;
291
292
  /* There is no do_scrub_end ().  */
293
76.1k
  state = saved->state;
294
76.1k
  old_state = saved->old_state;
295
76.1k
  out_string = saved->out_string;
296
76.1k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
297
76.1k
  add_newlines = saved->add_newlines;
298
76.1k
  if (saved->saved_input == NULL)
299
64.7k
    saved_input = NULL;
300
11.4k
  else
301
11.4k
    {
302
11.4k
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
303
0
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
304
11.4k
      saved_input = input_buffer;
305
11.4k
      saved_input_len = saved->saved_input_len;
306
11.4k
      free (saved->saved_input);
307
11.4k
    }
308
#ifdef TC_M68K
309
  scrub_m68k_mri = saved->scrub_m68k_mri;
310
#endif
311
0
  mri_state = saved->mri_state;
312
76.1k
  mri_last_ch = saved->mri_last_ch;
313
#if defined TC_ARM && defined OBJ_ELF
314
  symver_state = saved->symver_state;
315
#endif
316
76.1k
  last_char = saved->last_char;
317
318
76.1k
  free (arg);
319
76.1k
}
320
321
/* @@ This assumes that \n &c are the same on host and target.  This is not
322
   necessarily true.  */
323
324
static int
325
process_escape (int ch)
326
141k
{
327
141k
  switch (ch)
328
141k
    {
329
3.97k
    case 'b':
330
3.97k
      return '\b';
331
65.9k
    case 'f':
332
65.9k
      return '\f';
333
4.11k
    case 'n':
334
4.11k
      return '\n';
335
1
    case 'r':
336
1
      return '\r';
337
6
    case 't':
338
6
      return '\t';
339
25
    case '\'':
340
25
      return '\'';
341
15
    case '"':
342
15
      return '\"';
343
67.6k
    default:
344
67.6k
      return ch;
345
141k
    }
346
141k
}
347
348
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
349
static unsigned int multibyte_warn_count = 0;
350
351
bool
352
scan_for_multibyte_characters (const unsigned char *  start,
353
             const unsigned char *  end,
354
             bool                   warn)
355
0
{
356
0
  if (end <= start)
357
0
    return false;
358
359
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
360
0
    return false;
361
362
0
  bool found = false;
363
364
0
  while (start < end)
365
0
    {
366
0
      unsigned char c;
367
368
0
      if ((c = * start++) <= 0x7f)
369
0
  continue;
370
371
0
      if (!warn)
372
0
  return true;
373
374
0
      found = true;
375
376
0
      const char * filename;
377
0
      unsigned int lineno;
378
379
0
      filename = as_where (& lineno);
380
0
      if (filename == NULL)
381
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
382
0
      else if (lineno == 0)
383
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
384
0
      else
385
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
386
387
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
388
0
  {
389
0
    as_warn (_("further multibyte character warnings suppressed"));
390
0
    break;
391
0
  }
392
0
    }
393
394
0
  return found;
395
0
}
396
397
/* This function is called to process input characters.  The GET
398
   parameter is used to retrieve more input characters.  GET should
399
   set its parameter to point to a buffer, and return the length of
400
   the buffer; it should return 0 at end of file.  The scrubbed output
401
   characters are put into the buffer starting at TOSTART; the TOSTART
402
   buffer is TOLEN bytes in length.  The function returns the number
403
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
404
   end of file was seen.  This function is arranged as a state
405
   machine, and saves its state so that it may return at any point.
406
   This is the way the old code used to work.  */
407
408
size_t
409
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
410
62.2k
{
411
62.2k
  char *to = tostart;
412
62.2k
  char *toend = tostart + tolen;
413
62.2k
  char *from;
414
62.2k
  char *fromend;
415
62.2k
  size_t fromlen;
416
62.2k
  int ch, ch2 = 0;
417
  /* Character that started the string we're working on.  */
418
62.2k
  static char quotechar;
419
420
  /*State 0: beginning of normal line
421
    1: After first whitespace on line (flush more white)
422
    2: After first non-white (opcode) on line (keep 1white)
423
    3: after second white on line (into operands) (flush white)
424
    4: after putting out a .linefile, put out digits
425
    5: parsing a string, then go to old-state
426
    6: putting out \ escape in a "d string.
427
    7: no longer used
428
    8: no longer used
429
    9: After seeing symbol char in state 3 (keep 1white after symchar)
430
   10: After seeing whitespace in state 9 (keep white before symchar)
431
   11: After seeing a symbol character in state 0 (eg a label definition)
432
   -1: output string in out_string and go to the state in old_state
433
   -2: flush text until a '*' '/' is seen, then go to state old_state
434
#ifdef TC_V850
435
   12: After seeing a dash, looking for a second dash as a start
436
       of comment.
437
#endif
438
#ifdef DOUBLEBAR_PARALLEL
439
   13: After seeing a vertical bar, looking for a second
440
       vertical bar as a parallel expression separator.
441
#endif
442
#ifdef TC_PREDICATE_START_CHAR
443
   14: After seeing a predicate start character at state 0, looking
444
       for a predicate end character as predicate.
445
   15: After seeing a predicate start character at state 1, looking
446
       for a predicate end character as predicate.
447
#endif
448
#ifdef TC_Z80
449
   16: After seeing an 'a' or an 'A' at the start of a symbol
450
   17: After seeing an 'f' or an 'F' in state 16
451
#endif
452
    */
453
454
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
455
     constructs like ``.loc 1 20''.  This was turning into ``.loc
456
     120''.  States 9 and 10 ensure that a space is never dropped in
457
     between characters which could appear in an identifier.  Ian
458
     Taylor, ian@cygnus.com.
459
460
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
461
     correctly on the PA (and any other target where colons are optional).
462
     Jeff Law, law@cs.utah.edu.
463
464
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
465
     get squashed into "cmp r1,r2||trap#1", with the all important space
466
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
467
468
  /* This macro gets the next input character.  */
469
470
62.2k
#define GET()             \
471
60.7M
  (from < fromend            \
472
60.7M
   ? * (unsigned char *) (from++)        \
473
60.7M
   : (saved_input = NULL,          \
474
65.5k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
475
65.5k
      from = input_buffer,          \
476
65.5k
      fromend = from + fromlen,         \
477
65.5k
      (fromlen == 0            \
478
65.5k
       ? EOF              \
479
65.5k
       : * (unsigned char *) (from++))))
480
481
  /* This macro pushes a character back on the input stream.  */
482
483
5.10M
#define UNGET(uch) (*--from = (uch))
484
485
  /* This macro puts a character into the output buffer.  If this
486
     character fills the output buffer, this macro jumps to the label
487
     TOFULL.  We use this rather ugly approach because we need to
488
     handle two different termination conditions: EOF on the input
489
     stream, and a full output buffer.  It would be simpler if we
490
     always read in the entire input stream before processing it, but
491
     I don't want to make such a significant change to the assembler's
492
     memory usage.  */
493
494
62.2k
#define PUT(pch)        \
495
45.3M
  do            \
496
45.3M
    {           \
497
45.3M
      *to++ = (pch);        \
498
45.3M
      if (to >= toend)       \
499
45.3M
  goto tofull;       \
500
45.3M
    }           \
501
45.3M
  while (0)
502
503
62.2k
  if (saved_input != NULL)
504
1.66k
    {
505
1.66k
      from = saved_input;
506
1.66k
      fromend = from + saved_input_len;
507
1.66k
    }
508
60.5k
  else
509
60.5k
    {
510
60.5k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
511
60.5k
      if (fromlen == 0)
512
1.03k
  return 0;
513
59.5k
      from = input_buffer;
514
59.5k
      fromend = from + fromlen;
515
516
59.5k
      if (multibyte_handling == multibyte_warn)
517
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
518
0
                (const unsigned char* ) fromend,
519
0
                true /* Generate warnings.  */);
520
59.5k
    }
521
522
38.4M
  while (1)
523
38.4M
    {
524
      /* The cases in this switch end with continue, in order to
525
   branch back to the top of this while loop and generate the
526
   next output character in the appropriate state.  */
527
38.4M
      switch (state)
528
38.4M
  {
529
2.69M
  case -1:
530
2.69M
    ch = *out_string++;
531
2.69M
    if (*out_string == '\0')
532
1.23M
      {
533
1.23M
        state = old_state;
534
1.23M
        old_state = 3;
535
1.23M
      }
536
2.69M
    PUT (ch);
537
2.69M
    continue;
538
539
2.69M
  case -2:
540
8.44k
    for (;;)
541
31.2k
      {
542
31.2k
        do
543
5.10M
    {
544
5.10M
      ch = GET ();
545
546
5.10M
      if (ch == EOF)
547
54
        {
548
54
          as_warn (_("end of file in comment"));
549
54
          goto fromeof;
550
54
        }
551
552
5.10M
      if (ch == '\n')
553
292k
        PUT ('\n');
554
5.10M
    }
555
5.10M
        while (ch != '*');
556
557
35.5k
        while ((ch = GET ()) == '*')
558
4.38k
    ;
559
560
31.1k
        if (ch == EOF)
561
0
    {
562
0
      as_warn (_("end of file in comment"));
563
0
      goto fromeof;
564
0
    }
565
566
31.1k
        if (ch == '/')
567
8.38k
    break;
568
569
22.8k
        UNGET (ch);
570
22.8k
      }
571
572
8.38k
    state = old_state;
573
8.38k
    UNGET (' ');
574
8.38k
    continue;
575
576
102k
  case 4:
577
102k
    ch = GET ();
578
102k
    if (ch == EOF)
579
41
      goto fromeof;
580
102k
    else if (ch >= '0' && ch <= '9')
581
59.7k
      PUT (ch);
582
42.5k
    else
583
42.5k
      {
584
42.7k
        while (ch != EOF && IS_WHITESPACE (ch))
585
240
    ch = GET ();
586
42.5k
        if (ch == '"')
587
38.4k
    {
588
38.4k
      quotechar = ch;
589
38.4k
      state = 5;
590
38.4k
      old_state = 3;
591
38.4k
      PUT (ch);
592
38.4k
    }
593
4.03k
        else
594
4.03k
    {
595
272k
      while (ch != EOF && ch != '\n')
596
268k
        ch = GET ();
597
4.03k
      state = 0;
598
4.03k
      PUT (ch);
599
4.03k
    }
600
42.5k
      }
601
102k
    continue;
602
603
4.54M
  case 5:
604
    /* We are going to copy everything up to a quote character,
605
       with special handling for a backslash.  We try to
606
       optimize the copying in the simple case without using the
607
       GET and PUT macros.  */
608
4.54M
    {
609
4.54M
      char *s;
610
4.54M
      ptrdiff_t len;
611
612
74.8M
      for (s = from; s < fromend; s++)
613
74.8M
        {
614
74.8M
    ch = *s;
615
74.8M
    if (ch == '\\'
616
74.8M
        || ch == quotechar
617
74.8M
        || ch == '\n')
618
4.53M
      break;
619
74.8M
        }
620
4.54M
      len = s - from;
621
4.54M
      if (len > toend - to)
622
530
        len = toend - to;
623
4.54M
      if (len > 0)
624
3.14M
        {
625
3.14M
    memcpy (to, from, len);
626
3.14M
    to += len;
627
3.14M
    from += len;
628
3.14M
    if (to >= toend)
629
632
      goto tofull;
630
3.14M
        }
631
4.54M
    }
632
633
4.53M
    ch = GET ();
634
4.53M
    if (ch == EOF)
635
1.94k
      {
636
        /* This buffer is here specifically so
637
     that the UNGET below will work.  */
638
1.94k
        static char one_char_buf[1];
639
640
1.94k
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
641
1.94k
        state = old_state;
642
1.94k
        from = fromend = one_char_buf + 1;
643
1.94k
        fromlen = 1;
644
1.94k
        UNGET ('\n');
645
1.94k
        PUT (quotechar);
646
1.94k
      }
647
4.53M
    else if (ch == quotechar)
648
1.28M
      {
649
1.28M
        state = old_state;
650
1.28M
        PUT (ch);
651
1.28M
      }
652
3.25M
    else if (TC_STRING_ESCAPES && ch == '\\')
653
286k
      {
654
286k
        state = 6;
655
286k
        PUT (ch);
656
286k
      }
657
2.96M
    else if (scrub_m68k_mri && ch == '\n')
658
0
      {
659
        /* Just quietly terminate the string.  This permits lines like
660
       bne  label loop if we haven't reach end yet.  */
661
0
        state = old_state;
662
0
        UNGET (ch);
663
0
        PUT ('\'');
664
0
      }
665
2.96M
    else
666
2.96M
      {
667
2.96M
        PUT (ch);
668
2.96M
      }
669
4.53M
    continue;
670
671
4.53M
  case 6:
672
286k
    state = 5;
673
286k
    ch = GET ();
674
286k
    switch (ch)
675
286k
      {
676
        /* Handle strings broken across lines, by turning '\n' into
677
     '\\' and 'n'.  */
678
1.09k
      case '\n':
679
1.09k
        UNGET ('n');
680
1.09k
        add_newlines++;
681
1.09k
        PUT ('\\');
682
1.09k
        continue;
683
684
1.09k
      case EOF:
685
1
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
686
1
        PUT (quotechar);
687
1
        continue;
688
689
3.28k
      case '"':
690
56.4k
      case '\\':
691
60.5k
      case 'b':
692
128k
      case 'f':
693
136k
      case 'n':
694
138k
      case 'r':
695
139k
      case 't':
696
204k
      case 'v':
697
205k
      case 'x':
698
207k
      case 'X':
699
210k
      case '0':
700
210k
      case '1':
701
210k
      case '2':
702
211k
      case '3':
703
211k
      case '4':
704
212k
      case '5':
705
213k
      case '6':
706
213k
      case '7':
707
213k
        break;
708
709
72.5k
      default:
710
#ifdef ONLY_STANDARD_ESCAPES
711
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
712
#endif
713
72.5k
        break;
714
286k
      }
715
285k
    PUT (ch);
716
285k
    continue;
717
718
#ifdef DOUBLEBAR_PARALLEL
719
  case 13:
720
    ch = GET ();
721
    if (ch != '|')
722
      abort ();
723
724
    /* Reset back to state 1 and pretend that we are parsing a
725
       line from just after the first white space.  */
726
    state = 1;
727
    PUT ('|');
728
#ifdef TC_TIC6X
729
    /* "||^" is used for SPMASKed instructions.  */
730
    ch = GET ();
731
    if (ch == EOF)
732
      goto fromeof;
733
    else if (ch == '^')
734
      PUT ('^');
735
    else
736
      UNGET (ch);
737
#endif
738
    continue;
739
#endif
740
#ifdef TC_Z80
741
  case 16:
742
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
743
    ch = GET ();
744
    if (ch == 'f' || ch == 'F')
745
      {
746
        state = 17;
747
        PUT (ch);
748
      }
749
    else
750
      {
751
        if (ch != EOF)
752
    UNGET (ch);
753
        state = 9;
754
        break;
755
      }
756
    /* Fall through.  */
757
  case 17:
758
    /* We have seen "af" at the start of a symbol,
759
       a ' here is a part of that symbol.  */
760
    ch = GET ();
761
    state = 9;
762
    if (ch == '\'')
763
      /* Change to avoid warning about unclosed string.  */
764
      PUT ('`');
765
    else if (ch != EOF)
766
      UNGET (ch);
767
    break;
768
#endif
769
38.4M
  }
770
771
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
772
773
      /* flushchar: */
774
30.8M
      ch = GET ();
775
776
#ifdef TC_PREDICATE_START_CHAR
777
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
778
  {
779
    state += 14;
780
    PUT (ch);
781
    continue;
782
  }
783
      else if (state == 14 || state == 15)
784
  {
785
    if (ch == TC_PREDICATE_END_CHAR)
786
      {
787
        state -= 14;
788
        PUT (ch);
789
        ch = GET ();
790
      }
791
    else
792
      {
793
        PUT (ch);
794
        continue;
795
      }
796
  }
797
#endif
798
799
33.1M
    recycle:
800
801
#if defined TC_ARM && defined OBJ_ELF
802
      /* We need to watch out for .symver directives.  See the comment later
803
   in this function.  */
804
      if (symver_state == NULL)
805
  {
806
    if ((state == 0 || state == 1) && ch == symver_pseudo[0])
807
      symver_state = symver_pseudo + 1;
808
  }
809
      else
810
  {
811
    /* We advance to the next state if we find the right
812
       character.  */
813
    if (ch != '\0' && (*symver_state == ch))
814
      ++symver_state;
815
    else if (*symver_state != '\0')
816
      /* We did not get the expected character, or we didn't
817
         get a valid terminating character after seeing the
818
         entire pseudo-op, so we must go back to the beginning.  */
819
      symver_state = NULL;
820
    else
821
      {
822
        /* We've read the entire pseudo-op.  If this is the end
823
     of the line, go back to the beginning.  */
824
        if (IS_NEWLINE (ch))
825
    symver_state = NULL;
826
      }
827
  }
828
#endif /* TC_ARM && OBJ_ELF */
829
830
#ifdef TC_M68K
831
      /* We want to have pseudo-ops which control whether we are in
832
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
833
   the scrubber, that means that we need a special purpose
834
   recognizer here.  */
835
      if (mri_state == NULL)
836
  {
837
    if ((state == 0 || state == 1)
838
        && ch == mri_pseudo[0])
839
      mri_state = mri_pseudo + 1;
840
  }
841
      else
842
  {
843
    /* We advance to the next state if we find the right
844
       character, or if we need a space character and we get any
845
       whitespace character, or if we need a '0' and we get a
846
       '1' (this is so that we only need one state to handle
847
       ``.mri 0'' and ``.mri 1'').  */
848
    if (ch != '\0'
849
        && (*mri_state == ch
850
      || (*mri_state == ' '
851
          && lex[ch] == LEX_IS_WHITESPACE)
852
      || (*mri_state == '0'
853
          && ch == '1')))
854
      {
855
        mri_last_ch = ch;
856
        ++mri_state;
857
      }
858
    else if (*mri_state != '\0'
859
       || (lex[ch] != LEX_IS_WHITESPACE
860
           && lex[ch] != LEX_IS_NEWLINE))
861
      {
862
        /* We did not get the expected character, or we didn't
863
     get a valid terminating character after seeing the
864
     entire pseudo-op, so we must go back to the
865
     beginning.  */
866
        mri_state = NULL;
867
      }
868
    else
869
      {
870
        /* We've read the entire pseudo-op.  mips_last_ch is
871
     either '0' or '1' indicating whether to enter or
872
     leave MRI mode.  */
873
        do_scrub_begin (mri_last_ch == '1');
874
        mri_state = NULL;
875
876
        /* We continue handling the character as usual.  The
877
     main gas reader must also handle the .mri pseudo-op
878
     to control expression parsing and the like.  */
879
      }
880
  }
881
#endif
882
883
33.1M
      if (ch == EOF)
884
59.0k
  {
885
59.0k
    if (state != 0)
886
2.49k
      {
887
2.49k
        as_warn (_("end of file not at end of a line; newline inserted"));
888
2.49k
        state = 0;
889
2.49k
        PUT ('\n');
890
2.49k
      }
891
59.0k
    goto fromeof;
892
59.0k
  }
893
894
33.1M
      switch (lex[ch])
895
33.1M
  {
896
4.42M
  case LEX_IS_WHITESPACE:
897
4.42M
    do
898
5.04M
      {
899
5.04M
        ch = GET ();
900
5.04M
      }
901
5.04M
    while (ch != EOF && IS_WHITESPACE (ch));
902
4.42M
    if (ch == EOF)
903
155
      goto fromeof;
904
905
4.42M
    if (state == 0)
906
1.08M
      {
907
        /* Preserve a single whitespace character at the
908
     beginning of a line.  */
909
1.08M
        state = 1;
910
1.08M
        UNGET (ch);
911
1.08M
        PUT (' ');
912
1.08M
        break;
913
1.08M
      }
914
915
#ifdef KEEP_WHITE_AROUND_COLON
916
    if (lex[ch] == LEX_IS_COLON)
917
      {
918
        /* Only keep this white if there's no white *after* the
919
     colon.  */
920
        ch2 = GET ();
921
        if (ch2 != EOF)
922
    UNGET (ch2);
923
        if (!IS_WHITESPACE (ch2))
924
    {
925
      state = 9;
926
      UNGET (ch);
927
      PUT (' ');
928
      break;
929
    }
930
      }
931
#endif
932
3.33M
    if (IS_COMMENT (ch)
933
3.33M
        || IS_LINE_SEPARATOR (ch)
934
3.33M
        || IS_PARALLEL_SEPARATOR (ch))
935
38.5k
      {
936
38.5k
        if (scrub_m68k_mri)
937
0
    {
938
      /* In MRI mode, we keep these spaces.  */
939
0
      UNGET (ch);
940
0
      PUT (' ');
941
0
      break;
942
0
    }
943
38.5k
        goto recycle;
944
38.5k
      }
945
946
    /* If we're in state 2 or 11, we've seen a non-white
947
       character followed by whitespace.  If the next character
948
       is ':', this is whitespace after a label name which we
949
       normally must ignore.  In MRI mode, though, spaces are
950
       not permitted between the label and the colon.  */
951
3.29M
    if ((state == 2 || state == 11)
952
3.29M
        && lex[ch] == LEX_IS_COLON
953
3.29M
        && ! scrub_m68k_mri)
954
6.97k
      {
955
6.97k
        state = 1;
956
6.97k
        PUT (ch);
957
6.97k
        break;
958
6.97k
      }
959
960
3.29M
    switch (state)
961
3.29M
      {
962
3.46k
      case 1:
963
        /* We can arrive here if we leave a leading whitespace
964
     character at the beginning of a line.  */
965
3.46k
        goto recycle;
966
760k
      case 2:
967
760k
        state = 3;
968
760k
        if (to + 1 < toend)
969
760k
    {
970
      /* Optimize common case by skipping UNGET/GET.  */
971
760k
      PUT (' '); /* Sp after opco */
972
760k
      goto recycle;
973
760k
    }
974
3
        UNGET (ch);
975
3
        PUT (' ');
976
0
        break;
977
77.0k
      case 3:
978
77.0k
#ifndef TC_KEEP_OPERAND_SPACES
979
        /* For TI C6X, we keep these spaces as they may separate
980
     functional unit specifiers from operands.  */
981
77.0k
        if (scrub_m68k_mri)
982
0
#endif
983
0
    {
984
      /* In MRI mode, we keep these spaces.  */
985
0
      UNGET (ch);
986
0
      PUT (' ');
987
0
      break;
988
0
    }
989
77.0k
        goto recycle; /* Sp in operands */
990
1.48M
      case 9:
991
1.48M
      case 10:
992
1.48M
#ifndef TC_KEEP_OPERAND_SPACES
993
1.48M
        if (scrub_m68k_mri)
994
0
#endif
995
0
    {
996
      /* In MRI mode, we keep these spaces.  */
997
0
      state = 3;
998
0
      UNGET (ch);
999
0
      PUT (' ');
1000
0
      break;
1001
0
    }
1002
1.48M
        state = 10; /* Sp after symbol char */
1003
1.48M
        goto recycle;
1004
961k
      case 11:
1005
961k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1006
0
    state = 1;
1007
961k
        else
1008
961k
    {
1009
      /* We know that ch is not ':', since we tested that
1010
         case above.  Therefore this is not a label, so it
1011
         must be the opcode, and we've just seen the
1012
         whitespace after it.  */
1013
961k
      state = 3;
1014
961k
    }
1015
961k
        UNGET (ch);
1016
961k
        PUT (' '); /* Sp after label definition.  */
1017
961k
        break;
1018
961k
      default:
1019
0
        BAD_CASE (state);
1020
3.29M
      }
1021
961k
    break;
1022
1023
961k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1024
0
    ch2 = GET ();
1025
0
    if (ch2 == '*')
1026
0
      {
1027
0
        for (;;)
1028
0
    {
1029
0
      do
1030
0
        {
1031
0
          ch2 = GET ();
1032
0
          if (ch2 != EOF && IS_NEWLINE (ch2))
1033
0
      add_newlines++;
1034
0
        }
1035
0
      while (ch2 != EOF && ch2 != '*');
1036
1037
0
      while (ch2 == '*')
1038
0
        ch2 = GET ();
1039
1040
0
      if (ch2 == EOF || ch2 == '/')
1041
0
        break;
1042
1043
      /* This UNGET will ensure that we count newlines
1044
         correctly.  */
1045
0
      UNGET (ch2);
1046
0
    }
1047
1048
0
        if (ch2 == EOF)
1049
0
    as_warn (_("end of file in multiline comment"));
1050
1051
0
        ch = ' ';
1052
0
        goto recycle;
1053
0
      }
1054
#ifdef DOUBLESLASH_LINE_COMMENTS
1055
    else if (ch2 == '/')
1056
      {
1057
        do
1058
    {
1059
      ch = GET ();
1060
    }
1061
        while (ch != EOF && !IS_NEWLINE (ch));
1062
        if (ch == EOF)
1063
    as_warn ("end of file in comment; newline inserted");
1064
        state = 0;
1065
        PUT ('\n');
1066
        break;
1067
      }
1068
#endif
1069
0
    else
1070
0
      {
1071
0
        if (ch2 != EOF)
1072
0
    UNGET (ch2);
1073
0
        if (state == 9 || state == 10)
1074
0
    state = 3;
1075
0
        PUT (ch);
1076
0
      }
1077
0
    break;
1078
1079
1.25M
  case LEX_IS_STRINGQUOTE:
1080
1.25M
    quotechar = ch;
1081
1.25M
    if (state == 10)
1082
9.25k
      {
1083
        /* Preserve the whitespace in foo "bar".  */
1084
9.25k
        UNGET (ch);
1085
9.25k
        state = 3;
1086
9.25k
        PUT (' ');
1087
1088
        /* PUT didn't jump out.  We could just break, but we
1089
     know what will happen, so optimize a bit.  */
1090
9.25k
        ch = GET ();
1091
9.25k
        old_state = 9;
1092
9.25k
      }
1093
1.24M
    else if (state == 3)
1094
61.8k
      old_state = 9;
1095
1.17M
    else
1096
1.17M
      old_state = state;
1097
1.25M
    state = 5;
1098
1.25M
    PUT (ch);
1099
1.25M
    break;
1100
1101
1.26M
  case LEX_IS_ONECHAR_QUOTE:
1102
#ifdef H_TICK_HEX
1103
    if (state == 9 && enable_h_tick_hex)
1104
      {
1105
        char c;
1106
1107
        c = GET ();
1108
        as_warn ("'%c found after symbol", c);
1109
        UNGET (c);
1110
      }
1111
#endif
1112
1.26M
    if (state == 10)
1113
66.9k
      {
1114
        /* Preserve the whitespace in foo 'b'.  */
1115
66.9k
        UNGET (ch);
1116
66.9k
        state = 3;
1117
66.9k
        PUT (' ');
1118
66.9k
        break;
1119
66.9k
      }
1120
1.20M
    ch = GET ();
1121
1.20M
    if (ch == EOF)
1122
8
      {
1123
8
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1124
8
        ch = 0;
1125
8
      }
1126
1.20M
    if (ch == '\\')
1127
141k
      {
1128
141k
        ch = GET ();
1129
141k
        if (ch == EOF)
1130
1
    {
1131
1
      as_warn (_("end of file in escape character"));
1132
1
      ch = '\\';
1133
1
    }
1134
141k
        else
1135
141k
    ch = process_escape (ch);
1136
141k
      }
1137
1.20M
    sprintf (out_buf, "%d", (int) (unsigned char) ch);
1138
1139
    /* None of these 'x constants for us.  We want 'x'.  */
1140
1.20M
    if ((ch = GET ()) != '\'')
1141
1.19M
      {
1142
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1143
        as_warn (_("missing close quote; (assumed)"));
1144
#else
1145
1.19M
        if (ch != EOF)
1146
1.19M
    UNGET (ch);
1147
1.19M
#endif
1148
1.19M
      }
1149
1.20M
    if (strlen (out_buf) == 1)
1150
7.91k
      {
1151
7.91k
        PUT (out_buf[0]);
1152
7.91k
        break;
1153
7.91k
      }
1154
1.19M
    if (state == 9)
1155
60.2k
      old_state = 3;
1156
1.13M
    else
1157
1.13M
      old_state = state;
1158
1.19M
    state = -1;
1159
1.19M
    out_string = out_buf;
1160
1.19M
    PUT (*out_string++);
1161
1.19M
    break;
1162
1163
1.61M
  case LEX_IS_COLON:
1164
#ifdef KEEP_WHITE_AROUND_COLON
1165
    state = 9;
1166
#else
1167
1.61M
    if (state == 9 || state == 10)
1168
97.3k
      state = 3;
1169
1.51M
    else if (state != 3)
1170
1.50M
      state = 1;
1171
1.61M
#endif
1172
1.61M
    PUT (ch);
1173
1.61M
    break;
1174
1175
3.66M
  case LEX_IS_NEWLINE:
1176
    /* Roll out a bunch of newlines from inside comments, etc.  */
1177
3.66M
    if (add_newlines)
1178
1.09k
      {
1179
1.09k
        --add_newlines;
1180
1.09k
        UNGET (ch);
1181
1.09k
      }
1182
    /* Fall through.  */
1183
1184
5.74M
  case LEX_IS_LINE_SEPARATOR:
1185
5.74M
    state = 0;
1186
5.74M
    PUT (ch);
1187
5.74M
    break;
1188
1189
5.74M
  case LEX_IS_PARALLEL_SEPARATOR:
1190
0
    state = 1;
1191
0
    PUT (ch);
1192
0
    break;
1193
1194
#ifdef TC_V850
1195
  case LEX_IS_DOUBLEDASH_1ST:
1196
    ch2 = GET ();
1197
    if (ch2 != '-')
1198
      {
1199
        if (ch2 != EOF)
1200
    UNGET (ch2);
1201
        goto de_fault;
1202
      }
1203
    /* Read and skip to end of line.  */
1204
    do
1205
      {
1206
        ch = GET ();
1207
      }
1208
    while (ch != EOF && ch != '\n');
1209
1210
    if (ch == EOF)
1211
      as_warn (_("end of file in comment; newline inserted"));
1212
1213
    state = 0;
1214
    PUT ('\n');
1215
    break;
1216
#endif
1217
#ifdef DOUBLEBAR_PARALLEL
1218
  case LEX_IS_DOUBLEBAR_1ST:
1219
    ch2 = GET ();
1220
    if (ch2 != EOF)
1221
      UNGET (ch2);
1222
    if (ch2 != '|')
1223
      goto de_fault;
1224
1225
    /* Handle '||' in two states as invoking PUT twice might
1226
       result in the first one jumping out of this loop.  We'd
1227
       then lose track of the state and one '|' char.  */
1228
    state = 13;
1229
    PUT ('|');
1230
    break;
1231
#endif
1232
561k
  case LEX_IS_LINE_COMMENT_START:
1233
    /* FIXME-someday: The two character comment stuff was badly
1234
       thought out.  On i386, we want '/' as line comment start
1235
       AND we want C style comments.  hence this hack.  The
1236
       whole lexical process should be reworked.  xoxorich.  */
1237
561k
    if (ch == '/')
1238
243k
      {
1239
243k
        ch2 = GET ();
1240
243k
        if (ch2 == '*')
1241
8.39k
    {
1242
8.39k
      old_state = 3;
1243
8.39k
      state = -2;
1244
8.39k
      break;
1245
8.39k
    }
1246
234k
        else if (ch2 != EOF)
1247
234k
    {
1248
234k
      UNGET (ch2);
1249
234k
    }
1250
243k
      }
1251
1252
553k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1253
139k
      {
1254
139k
        int startch;
1255
1256
139k
        startch = ch;
1257
1258
139k
        do
1259
140k
    {
1260
140k
      ch = GET ();
1261
140k
    }
1262
140k
        while (ch != EOF && IS_WHITESPACE (ch));
1263
1264
139k
        if (ch == EOF)
1265
307
    {
1266
307
      as_warn (_("end of file in comment; newline inserted"));
1267
307
      PUT ('\n');
1268
307
      break;
1269
307
    }
1270
1271
139k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1272
97.0k
    {
1273
      /* Not a cpp line.  */
1274
903k
      while (ch != EOF && !IS_NEWLINE (ch))
1275
805k
        ch = GET ();
1276
97.0k
      if (ch == EOF)
1277
44
        {
1278
44
          as_warn (_("end of file in comment; newline inserted"));
1279
44
          PUT ('\n');
1280
44
        }
1281
97.0k
      else /* IS_NEWLINE (ch) */
1282
97.0k
        {
1283
          /* To process non-zero add_newlines.  */
1284
97.0k
          UNGET (ch);
1285
97.0k
        }
1286
97.0k
      state = 0;
1287
97.0k
      break;
1288
97.0k
    }
1289
        /* Looks like `# 123 "filename"' from cpp.  */
1290
42.5k
        UNGET (ch);
1291
42.5k
        old_state = 4;
1292
42.5k
        state = -1;
1293
42.5k
        if (scrub_m68k_mri)
1294
0
    out_string = "\tlinefile ";
1295
42.5k
        else
1296
42.5k
    out_string = "\t.linefile ";
1297
42.5k
        PUT (*out_string++);
1298
42.5k
        break;
1299
42.5k
      }
1300
1301
#ifdef TC_D10V
1302
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1303
       Trap is the only short insn that has a first operand that is
1304
       neither register nor label.
1305
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1306
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1307
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1308
       only character in line_comment_chars for d10v, hence we
1309
       can recognize it as such.  */
1310
    /* An alternative approach would be to reset the state to 1 when
1311
       we see '||', '<'- or '->', but that seems to be overkill.  */
1312
    if (state == 10)
1313
      PUT (' ');
1314
#endif
1315
    /* We have a line comment character which is not at the
1316
       start of a line.  If this is also a normal comment
1317
       character, fall through.  Otherwise treat it as a default
1318
       character.  */
1319
413k
    if (strchr (tc_comment_chars, ch) == NULL
1320
413k
        && (! scrub_m68k_mri
1321
226k
      || (ch != '!' && ch != '*')))
1322
226k
      goto de_fault;
1323
186k
    if (scrub_m68k_mri
1324
186k
        && (ch == '!' || ch == '*' || ch == '#')
1325
186k
        && state != 1
1326
186k
        && state != 10)
1327
0
      goto de_fault;
1328
    /* Fall through.  */
1329
186k
  case LEX_IS_COMMENT_START:
1330
#if defined TC_ARM && defined OBJ_ELF
1331
    /* On the ARM, `@' is the comment character.
1332
       Unfortunately this is also a special character in ELF .symver
1333
       directives (and .type, though we deal with those another way).
1334
       So we check if this line is such a directive, and treat
1335
       the character as default if so.  This is a hack.  */
1336
    if ((symver_state != NULL) && (*symver_state == 0))
1337
      goto de_fault;
1338
#endif
1339
1340
    /* Care is needed not to damage occurrences of \<comment-char>
1341
       by stripping the <comment-char> onwards.  Yuck.  */
1342
186k
    if ((to > tostart ? to[-1] : last_char) == '\\')
1343
      /* Do not treat the <comment-char> as a start-of-comment.  */
1344
351
      goto de_fault;
1345
1346
#ifdef WARN_COMMENTS
1347
    if (!found_comment)
1348
      found_comment_file = as_where (&found_comment);
1349
#endif
1350
186k
    do
1351
4.79M
      {
1352
4.79M
        ch = GET ();
1353
4.79M
      }
1354
4.79M
    while (ch != EOF && !IS_NEWLINE (ch));
1355
186k
    if (ch == EOF)
1356
73
      as_warn (_("end of file in comment; newline inserted"));
1357
186k
    state = 0;
1358
186k
    PUT ('\n');
1359
186k
    break;
1360
1361
#ifdef H_TICK_HEX
1362
  case LEX_IS_H:
1363
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1364
       the H' with 0x to make them gas-style hex characters.  */
1365
    if (enable_h_tick_hex)
1366
      {
1367
        char quot;
1368
1369
        quot = GET ();
1370
        if (quot == '\'')
1371
    {
1372
      UNGET ('x');
1373
      ch = '0';
1374
    }
1375
        else
1376
    UNGET (quot);
1377
      }
1378
#endif
1379
    /* Fall through.  */
1380
1381
12.1M
  case LEX_IS_SYMBOL_COMPONENT:
1382
12.1M
    if (state == 10)
1383
1.38M
      {
1384
        /* This is a symbol character following another symbol
1385
     character, with whitespace in between.  We skipped
1386
     the whitespace earlier, so output it now.  */
1387
1.38M
        UNGET (ch);
1388
1.38M
        state = 3;
1389
1.38M
        PUT (' ');
1390
1.38M
        break;
1391
1.38M
      }
1392
1393
#ifdef TC_Z80
1394
    /* "af'" is a symbol containing '\''.  */
1395
    if (state == 3 && (ch == 'a' || ch == 'A'))
1396
      {
1397
        state = 16;
1398
        PUT (ch);
1399
        ch = GET ();
1400
        if (ch == 'f' || ch == 'F')
1401
    {
1402
      state = 17;
1403
      PUT (ch);
1404
      break;
1405
    }
1406
        else
1407
    {
1408
      state = 9;
1409
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1410
        {
1411
          if (ch != EOF)
1412
      UNGET (ch);
1413
          break;
1414
        }
1415
    }
1416
      }
1417
#endif
1418
10.7M
    if (state == 3)
1419
3.77M
      state = 9;
1420
1421
    /* This is a common case.  Quickly copy CH and all the
1422
       following symbol component or normal characters.  */
1423
10.7M
    if (to + 1 < toend
1424
10.7M
        && mri_state == NULL
1425
#if defined TC_ARM && defined OBJ_ELF
1426
        && symver_state == NULL
1427
#endif
1428
10.7M
        )
1429
10.7M
      {
1430
10.7M
        char *s;
1431
10.7M
        ptrdiff_t len;
1432
1433
81.0M
        for (s = from; s < fromend; s++)
1434
81.0M
    {
1435
81.0M
      int type;
1436
1437
81.0M
      ch2 = *(unsigned char *) s;
1438
81.0M
      type = lex[ch2];
1439
81.0M
      if (type != 0
1440
81.0M
          && type != LEX_IS_SYMBOL_COMPONENT)
1441
10.7M
        break;
1442
81.0M
    }
1443
1444
10.7M
        if (s > from)
1445
    /* Handle the last character normally, for
1446
       simplicity.  */
1447
7.54M
    --s;
1448
1449
10.7M
        len = s - from;
1450
1451
10.7M
        if (len > (toend - to) - 1)
1452
354
    len = (toend - to) - 1;
1453
1454
10.7M
        if (len > 0)
1455
5.99M
    {
1456
5.99M
      PUT (ch);
1457
5.99M
      memcpy (to, from, len);
1458
5.99M
      to += len;
1459
5.99M
      from += len;
1460
5.99M
      if (to >= toend)
1461
381
        goto tofull;
1462
5.99M
      ch = GET ();
1463
5.99M
    }
1464
10.7M
      }
1465
1466
    /* Fall through.  */
1467
16.8M
  default:
1468
17.1M
  de_fault:
1469
    /* Some relatively `normal' character.  */
1470
17.1M
    if (state == 0)
1471
2.82M
      {
1472
2.82M
        state = 11; /* Now seeing label definition.  */
1473
2.82M
      }
1474
14.2M
    else if (state == 1)
1475
2.52M
      {
1476
2.52M
        state = 2;  /* Ditto.  */
1477
2.52M
      }
1478
11.7M
    else if (state == 9)
1479
4.74M
      {
1480
4.74M
        if (!IS_SYMBOL_COMPONENT (ch))
1481
812k
    state = 3;
1482
4.74M
      }
1483
7.02M
    else if (state == 10)
1484
21.2k
      {
1485
21.2k
        if (ch == '\\')
1486
1.00k
    {
1487
      /* Special handling for backslash: a backslash may
1488
         be the beginning of a formal parameter (of a
1489
         macro) following another symbol character, with
1490
         whitespace in between.  If that is the case, we
1491
         output a space before the parameter.  Strictly
1492
         speaking, correct handling depends upon what the
1493
         macro parameter expands into; if the parameter
1494
         expands into something which does not start with
1495
         an operand character, then we don't want to keep
1496
         the space.  We don't have enough information to
1497
         make the right choice, so here we are making the
1498
         choice which is more likely to be correct.  */
1499
1.00k
      if (to + 1 >= toend)
1500
0
        {
1501
          /* If we're near the end of the buffer, save the
1502
             character for the next time round.  Otherwise
1503
             we'll lose our state.  */
1504
0
          UNGET (ch);
1505
0
          goto tofull;
1506
0
        }
1507
1.00k
      *to++ = ' ';
1508
1.00k
    }
1509
1510
21.2k
        state = 3;
1511
21.2k
      }
1512
17.1M
    PUT (ch);
1513
17.1M
    break;
1514
33.1M
  }
1515
33.1M
    }
1516
1517
  /*NOTREACHED*/
1518
1519
59.2k
 fromeof:
1520
  /* We have reached the end of the input.  */
1521
59.2k
  if (to > tostart)
1522
59.2k
    last_char = to[-1];
1523
59.2k
  return to - tostart;
1524
1525
1.90k
 tofull:
1526
  /* The output buffer is full.  Save any input we have not yet
1527
     processed.  */
1528
1.90k
  if (fromend > from)
1529
1.66k
    {
1530
1.66k
      saved_input = from;
1531
1.66k
      saved_input_len = fromend - from;
1532
1.66k
    }
1533
240
  else
1534
240
    saved_input = NULL;
1535
1536
1.90k
  if (to > tostart)
1537
1.90k
    last_char = to[-1];
1538
1.90k
  return to - tostart;
1539
61.1k
}
1540
1541
/* Return amount of pending input.  */
1542
1543
size_t
1544
do_scrub_pending (void)
1545
153k
{
1546
153k
  size_t len = 0;
1547
153k
  if (saved_input)
1548
603
    len += saved_input_len;
1549
153k
  if (state == -1)
1550
82
    len += strlen (out_string);
1551
153k
  return len;
1552
153k
}