Coverage Report

Created: 2023-06-29 07:09

/src/binutils-gdb/gas/app.c
Line
Count
Source (jump to first uncovered line)
1
/* This is the Assembler Pre-Processor
2
   Copyright (C) 1987-2023 Free Software Foundation, Inc.
3
4
   This file is part of GAS, the GNU Assembler.
5
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in # <number> <filename> <garbage> into a
25
   .linefile.  This needs better error-handling.  */
26
27
#include "as.h"
28
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
#else
49
2.99M
#define scrub_m68k_mri 0
50
#endif
51
52
#if defined TC_ARM && defined OBJ_ELF
53
/* The pseudo-op for which we need to special-case `@' characters.
54
   See the comment in do_scrub_chars.  */
55
static const char   symver_pseudo[] = ".symver";
56
static const char * symver_state;
57
#endif
58
59
static char last_char;
60
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
65
30.9M
#define LEX_IS_SYMBOL_COMPONENT   1
66
2.23M
#define LEX_IS_WHITESPACE   2
67
3.24M
#define LEX_IS_LINE_SEPARATOR   3
68
914k
#define LEX_IS_COMMENT_START    4
69
204k
#define LEX_IS_LINE_COMMENT_START 5
70
0
#define LEX_IS_TWOCHAR_COMMENT_1ST  6
71
309k
#define LEX_IS_STRINGQUOTE    8
72
2.87M
#define LEX_IS_COLON      9
73
6.54M
#define LEX_IS_NEWLINE      10
74
246k
#define LEX_IS_ONECHAR_QUOTE    11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST   12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST    13
83
#endif
84
842k
#define LEX_IS_PARALLEL_SEPARATOR 14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H      15
87
#endif
88
1.42M
#define IS_SYMBOL_COMPONENT(c)    (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
1.23M
#define IS_WHITESPACE(c)    (lex[c] == LEX_IS_WHITESPACE)
90
1.71M
#define IS_LINE_SEPARATOR(c)    (lex[c] == LEX_IS_LINE_SEPARATOR)
91
842k
#define IS_PARALLEL_SEPARATOR(c)  (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
1.71M
#define IS_COMMENT(c)     (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)    (lex[c] == LEX_IS_LINE_COMMENT_START)
94
4.59M
#define IS_NEWLINE(c)     (lex[c] == LEX_IS_NEWLINE)
95
96
static int process_escape (int);
97
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
633
{
105
633
  const char *p;
106
633
  int c;
107
108
633
  lex[' '] = LEX_IS_WHITESPACE;
109
633
  lex['\t'] = LEX_IS_WHITESPACE;
110
633
  lex['\r'] = LEX_IS_WHITESPACE;
111
633
  lex['\n'] = LEX_IS_NEWLINE;
112
633
  lex[':'] = LEX_IS_COLON;
113
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
117
  if (! m68k_mri)
118
#endif
119
633
    {
120
633
      lex['"'] = LEX_IS_STRINGQUOTE;
121
122
633
#if ! defined (TC_HPPA)
123
633
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
124
633
#endif
125
126
#ifdef SINGLE_QUOTE_STRINGS
127
      lex['\''] = LEX_IS_STRINGQUOTE;
128
#endif
129
633
    }
130
131
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
132
     in state 5 of do_scrub_chars must be changed.  */
133
134
  /* Note that these override the previous defaults, e.g. if ';' is a
135
     comment char, then it isn't a line separator.  */
136
41.7k
  for (p = symbol_chars; *p; ++p)
137
41.1k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
138
139
81.6k
  for (c = 128; c < 256; ++c)
140
81.0k
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
141
142
633
#ifdef tc_symbol_chars
143
  /* This macro permits the processor to specify all characters which
144
     may appears in an operand.  This will prevent the scrubber from
145
     discarding meaningful whitespace in certain cases.  The i386
146
     backend uses this to support prefixes, which can confuse the
147
     scrubber as to whether it is parsing operands or opcodes.  */
148
5.06k
  for (p = tc_symbol_chars; *p; ++p)
149
4.43k
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
150
633
#endif
151
152
  /* The m68k backend wants to be able to change comment_chars.  */
153
#ifndef tc_comment_chars
154
#define tc_comment_chars comment_chars
155
#endif
156
1.26k
  for (p = tc_comment_chars; *p; p++)
157
633
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
158
159
1.89k
  for (p = line_comment_chars; *p; p++)
160
1.26k
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
161
162
633
#ifndef tc_line_separator_chars
163
633
#define tc_line_separator_chars line_separator_chars
164
633
#endif
165
1.26k
  for (p = tc_line_separator_chars; *p; p++)
166
633
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
167
168
#ifdef tc_parallel_separator_chars
169
  /* This macro permits the processor to specify all characters which
170
     separate parallel insns on the same line.  */
171
  for (p = tc_parallel_separator_chars; *p; p++)
172
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
173
#endif
174
175
  /* Only allow slash-star comments if slash is not in use.
176
     FIXME: This isn't right.  We should always permit them.  */
177
633
  if (lex['/'] == 0)
178
0
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
179
180
#ifdef TC_M68K
181
  if (m68k_mri)
182
    {
183
      lex['\''] = LEX_IS_STRINGQUOTE;
184
      lex[';'] = LEX_IS_COMMENT_START;
185
      lex['*'] = LEX_IS_LINE_COMMENT_START;
186
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
187
   then it can't be used in an expression.  */
188
      lex['!'] = LEX_IS_LINE_COMMENT_START;
189
    }
190
#endif
191
192
#ifdef TC_V850
193
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
194
#endif
195
#ifdef DOUBLEBAR_PARALLEL
196
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
197
#endif
198
#ifdef TC_D30V
199
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
200
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
201
#endif
202
203
#ifdef H_TICK_HEX
204
  if (enable_h_tick_hex)
205
    {
206
      lex['h'] = LEX_IS_H;
207
      lex['H'] = LEX_IS_H;
208
    }
209
#endif
210
633
}
211
212
/* Saved state of the scrubber.  */
213
static int state;
214
static int old_state;
215
static const char *out_string;
216
static char out_buf[20];
217
static int add_newlines;
218
static char *saved_input;
219
static size_t saved_input_len;
220
static char input_buffer[32 * 1024];
221
static const char *mri_state;
222
static char mri_last_ch;
223
224
/* Data structure for saving the state of app across #include's.  Note that
225
   app is called asynchronously to the parsing of the .include's, so our
226
   state at the time .include is interpreted is completely unrelated.
227
   That's why we have to save it all.  */
228
229
struct app_save
230
{
231
  int          state;
232
  int          old_state;
233
  const char * out_string;
234
  char         out_buf[sizeof (out_buf)];
235
  int          add_newlines;
236
  char *       saved_input;
237
  size_t       saved_input_len;
238
#ifdef TC_M68K
239
  int          scrub_m68k_mri;
240
#endif
241
  const char * mri_state;
242
  char         mri_last_ch;
243
#if defined TC_ARM && defined OBJ_ELF
244
  const char * symver_state;
245
#endif
246
  char         last_char;
247
};
248
249
char *
250
app_push (void)
251
16.7k
{
252
16.7k
  struct app_save *saved;
253
254
16.7k
  saved = XNEW (struct app_save);
255
16.7k
  saved->state = state;
256
16.7k
  saved->old_state = old_state;
257
16.7k
  saved->out_string = out_string;
258
16.7k
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
259
16.7k
  saved->add_newlines = add_newlines;
260
16.7k
  if (saved_input == NULL)
261
9.18k
    saved->saved_input = NULL;
262
7.53k
  else
263
7.53k
    {
264
7.53k
      saved->saved_input = XNEWVEC (char, saved_input_len);
265
7.53k
      memcpy (saved->saved_input, saved_input, saved_input_len);
266
7.53k
      saved->saved_input_len = saved_input_len;
267
7.53k
    }
268
#ifdef TC_M68K
269
  saved->scrub_m68k_mri = scrub_m68k_mri;
270
#endif
271
16.7k
  saved->mri_state = mri_state;
272
16.7k
  saved->mri_last_ch = mri_last_ch;
273
#if defined TC_ARM && defined OBJ_ELF
274
  saved->symver_state = symver_state;
275
#endif
276
16.7k
  saved->last_char = last_char;
277
278
  /* do_scrub_begin() is not useful, just wastes time.  */
279
280
16.7k
  state = 0;
281
16.7k
  saved_input = NULL;
282
16.7k
  add_newlines = 0;
283
284
16.7k
  return (char *) saved;
285
16.7k
}
286
287
void
288
app_pop (char *arg)
289
16.7k
{
290
16.7k
  struct app_save *saved = (struct app_save *) arg;
291
292
  /* There is no do_scrub_end ().  */
293
16.7k
  state = saved->state;
294
16.7k
  old_state = saved->old_state;
295
16.7k
  out_string = saved->out_string;
296
16.7k
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
297
16.7k
  add_newlines = saved->add_newlines;
298
16.7k
  if (saved->saved_input == NULL)
299
9.17k
    saved_input = NULL;
300
7.53k
  else
301
7.53k
    {
302
7.53k
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
303
0
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
304
7.53k
      saved_input = input_buffer;
305
7.53k
      saved_input_len = saved->saved_input_len;
306
7.53k
      free (saved->saved_input);
307
7.53k
    }
308
#ifdef TC_M68K
309
  scrub_m68k_mri = saved->scrub_m68k_mri;
310
#endif
311
0
  mri_state = saved->mri_state;
312
16.7k
  mri_last_ch = saved->mri_last_ch;
313
#if defined TC_ARM && defined OBJ_ELF
314
  symver_state = saved->symver_state;
315
#endif
316
16.7k
  last_char = saved->last_char;
317
318
16.7k
  free (arg);
319
16.7k
}
320
321
/* @@ This assumes that \n &c are the same on host and target.  This is not
322
   necessarily true.  */
323
324
static int
325
process_escape (int ch)
326
9.23k
{
327
9.23k
  switch (ch)
328
9.23k
    {
329
2.56k
    case 'b':
330
2.56k
      return '\b';
331
135
    case 'f':
332
135
      return '\f';
333
4.51k
    case 'n':
334
4.51k
      return '\n';
335
15
    case 'r':
336
15
      return '\r';
337
0
    case 't':
338
0
      return '\t';
339
14
    case '\'':
340
14
      return '\'';
341
74
    case '"':
342
74
      return '\"';
343
1.91k
    default:
344
1.91k
      return ch;
345
9.23k
    }
346
9.23k
}
347
348
0
#define MULTIBYTE_WARN_COUNT_LIMIT 10
349
static unsigned int multibyte_warn_count = 0;
350
351
bool
352
scan_for_multibyte_characters (const unsigned char *  start,
353
             const unsigned char *  end,
354
             bool                   warn)
355
0
{
356
0
  if (end <= start)
357
0
    return false;
358
359
0
  if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
360
0
    return false;
361
362
0
  bool found = false;
363
364
0
  while (start < end)
365
0
    {
366
0
      unsigned char c;
367
368
0
      if ((c = * start++) <= 0x7f)
369
0
  continue;
370
371
0
      if (!warn)
372
0
  return true;
373
374
0
      found = true;
375
376
0
      const char * filename;
377
0
      unsigned int lineno;
378
379
0
      filename = as_where (& lineno);
380
0
      if (filename == NULL)
381
0
  as_warn (_("multibyte character (%#x) encountered in input"), c);
382
0
      else if (lineno == 0)
383
0
  as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
384
0
      else
385
0
  as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
386
387
0
      if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
388
0
  {
389
0
    as_warn (_("further multibyte character warnings suppressed"));
390
0
    break;
391
0
  }
392
0
    }
393
394
0
  return found;
395
0
}
396
397
/* This function is called to process input characters.  The GET
398
   parameter is used to retrieve more input characters.  GET should
399
   set its parameter to point to a buffer, and return the length of
400
   the buffer; it should return 0 at end of file.  The scrubbed output
401
   characters are put into the buffer starting at TOSTART; the TOSTART
402
   buffer is TOLEN bytes in length.  The function returns the number
403
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
404
   end of file was seen.  This function is arranged as a state
405
   machine, and saves its state so that it may return at any point.
406
   This is the way the old code used to work.  */
407
408
size_t
409
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
410
10.2k
{
411
10.2k
  char *to = tostart;
412
10.2k
  char *toend = tostart + tolen;
413
10.2k
  char *from;
414
10.2k
  char *fromend;
415
10.2k
  size_t fromlen;
416
10.2k
  int ch, ch2 = 0;
417
  /* Character that started the string we're working on.  */
418
10.2k
  static char quotechar;
419
420
  /*State 0: beginning of normal line
421
    1: After first whitespace on line (flush more white)
422
    2: After first non-white (opcode) on line (keep 1white)
423
    3: after second white on line (into operands) (flush white)
424
    4: after putting out a .linefile, put out digits
425
    5: parsing a string, then go to old-state
426
    6: putting out \ escape in a "d string.
427
    7: no longer used
428
    8: no longer used
429
    9: After seeing symbol char in state 3 (keep 1white after symchar)
430
   10: After seeing whitespace in state 9 (keep white before symchar)
431
   11: After seeing a symbol character in state 0 (eg a label definition)
432
   -1: output string in out_string and go to the state in old_state
433
   -2: flush text until a '*' '/' is seen, then go to state old_state
434
#ifdef TC_V850
435
   12: After seeing a dash, looking for a second dash as a start
436
       of comment.
437
#endif
438
#ifdef DOUBLEBAR_PARALLEL
439
   13: After seeing a vertical bar, looking for a second
440
       vertical bar as a parallel expression separator.
441
#endif
442
#ifdef TC_PREDICATE_START_CHAR
443
   14: After seeing a predicate start character at state 0, looking
444
       for a predicate end character as predicate.
445
   15: After seeing a predicate start character at state 1, looking
446
       for a predicate end character as predicate.
447
#endif
448
#ifdef TC_Z80
449
   16: After seeing an 'a' or an 'A' at the start of a symbol
450
   17: After seeing an 'f' or an 'F' in state 16
451
#endif
452
    */
453
454
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
455
     constructs like ``.loc 1 20''.  This was turning into ``.loc
456
     120''.  States 9 and 10 ensure that a space is never dropped in
457
     between characters which could appear in an identifier.  Ian
458
     Taylor, ian@cygnus.com.
459
460
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
461
     correctly on the PA (and any other target where colons are optional).
462
     Jeff Law, law@cs.utah.edu.
463
464
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
465
     get squashed into "cmp r1,r2||trap#1", with the all important space
466
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
467
468
  /* This macro gets the next input character.  */
469
470
10.2k
#define GET()             \
471
28.6M
  (from < fromend            \
472
28.6M
   ? * (unsigned char *) (from++)        \
473
28.6M
   : (saved_input = NULL,          \
474
10.8k
      fromlen = (*get) (input_buffer, sizeof input_buffer), \
475
10.8k
      from = input_buffer,          \
476
10.8k
      fromend = from + fromlen,         \
477
10.8k
      (fromlen == 0            \
478
10.8k
       ? EOF              \
479
10.8k
       : * (unsigned char *) (from++))))
480
481
  /* This macro pushes a character back on the input stream.  */
482
483
1.18M
#define UNGET(uch) (*--from = (uch))
484
485
  /* This macro puts a character into the output buffer.  If this
486
     character fills the output buffer, this macro jumps to the label
487
     TOFULL.  We use this rather ugly approach because we need to
488
     handle two different termination conditions: EOF on the input
489
     stream, and a full output buffer.  It would be simpler if we
490
     always read in the entire input stream before processing it, but
491
     I don't want to make such a significant change to the assembler's
492
     memory usage.  */
493
494
10.2k
#define PUT(pch)        \
495
20.1M
  do            \
496
20.1M
    {           \
497
20.1M
      *to++ = (pch);        \
498
20.1M
      if (to >= toend)       \
499
20.1M
  goto tofull;       \
500
20.1M
    }           \
501
20.1M
  while (0)
502
503
10.2k
  if (saved_input != NULL)
504
1.00k
    {
505
1.00k
      from = saved_input;
506
1.00k
      fromend = from + saved_input_len;
507
1.00k
    }
508
9.19k
  else
509
9.19k
    {
510
9.19k
      fromlen = (*get) (input_buffer, sizeof input_buffer);
511
9.19k
      if (fromlen == 0)
512
537
  return 0;
513
8.65k
      from = input_buffer;
514
8.65k
      fromend = from + fromlen;
515
516
8.65k
      if (multibyte_handling == multibyte_warn)
517
0
  (void) scan_for_multibyte_characters ((const unsigned char *) from,
518
0
                (const unsigned char* ) fromend,
519
0
                true /* Generate warnings.  */);
520
8.65k
    }
521
522
17.9M
  while (1)
523
17.9M
    {
524
      /* The cases in this switch end with continue, in order to
525
   branch back to the top of this while loop and generate the
526
   next output character in the appropriate state.  */
527
17.9M
      switch (state)
528
17.9M
  {
529
611k
  case -1:
530
611k
    ch = *out_string++;
531
611k
    if (*out_string == '\0')
532
257k
      {
533
257k
        state = old_state;
534
257k
        old_state = 3;
535
257k
      }
536
611k
    PUT (ch);
537
611k
    continue;
538
539
611k
  case -2:
540
647
    for (;;)
541
7.26k
      {
542
7.26k
        do
543
3.23M
    {
544
3.23M
      ch = GET ();
545
546
3.23M
      if (ch == EOF)
547
38
        {
548
38
          as_warn (_("end of file in comment"));
549
38
          goto fromeof;
550
38
        }
551
552
3.23M
      if (ch == '\n')
553
548k
        PUT ('\n');
554
3.23M
    }
555
3.23M
        while (ch != '*');
556
557
7.71k
        while ((ch = GET ()) == '*')
558
507
    ;
559
560
7.20k
        if (ch == EOF)
561
0
    {
562
0
      as_warn (_("end of file in comment"));
563
0
      goto fromeof;
564
0
    }
565
566
7.20k
        if (ch == '/')
567
591
    break;
568
569
6.61k
        UNGET (ch);
570
6.61k
      }
571
572
591
    state = old_state;
573
591
    UNGET (' ');
574
591
    continue;
575
576
38.5k
  case 4:
577
38.5k
    ch = GET ();
578
38.5k
    if (ch == EOF)
579
46
      goto fromeof;
580
38.5k
    else if (ch >= '0' && ch <= '9')
581
21.8k
      PUT (ch);
582
16.6k
    else
583
16.6k
      {
584
16.7k
        while (ch != EOF && IS_WHITESPACE (ch))
585
42
    ch = GET ();
586
16.6k
        if (ch == '"')
587
14.9k
    {
588
14.9k
      quotechar = ch;
589
14.9k
      state = 5;
590
14.9k
      old_state = 3;
591
14.9k
      PUT (ch);
592
14.9k
    }
593
1.73k
        else
594
1.73k
    {
595
82.0k
      while (ch != EOF && ch != '\n')
596
80.3k
        ch = GET ();
597
1.73k
      state = 0;
598
1.73k
      PUT (ch);
599
1.73k
    }
600
16.6k
      }
601
38.5k
    continue;
602
603
1.79M
  case 5:
604
    /* We are going to copy everything up to a quote character,
605
       with special handling for a backslash.  We try to
606
       optimize the copying in the simple case without using the
607
       GET and PUT macros.  */
608
1.79M
    {
609
1.79M
      char *s;
610
1.79M
      ptrdiff_t len;
611
612
27.1M
      for (s = from; s < fromend; s++)
613
27.1M
        {
614
27.1M
    ch = *s;
615
27.1M
    if (ch == '\\'
616
27.1M
        || ch == quotechar
617
27.1M
        || ch == '\n')
618
1.79M
      break;
619
27.1M
        }
620
1.79M
      len = s - from;
621
1.79M
      if (len > toend - to)
622
334
        len = toend - to;
623
1.79M
      if (len > 0)
624
645k
        {
625
645k
    memcpy (to, from, len);
626
645k
    to += len;
627
645k
    from += len;
628
645k
    if (to >= toend)
629
343
      goto tofull;
630
645k
        }
631
1.79M
    }
632
633
1.79M
    ch = GET ();
634
1.79M
    if (ch == EOF)
635
412
      {
636
        /* This buffer is here specifically so
637
     that the UNGET below will work.  */
638
412
        static char one_char_buf[1];
639
640
412
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
641
412
        state = old_state;
642
412
        from = fromend = one_char_buf + 1;
643
412
        fromlen = 1;
644
412
        UNGET ('\n');
645
412
        PUT (quotechar);
646
412
      }
647
1.79M
    else if (ch == quotechar)
648
323k
      {
649
323k
        state = old_state;
650
323k
        PUT (ch);
651
323k
      }
652
1.47M
    else if (TC_STRING_ESCAPES && ch == '\\')
653
315k
      {
654
315k
        state = 6;
655
315k
        PUT (ch);
656
315k
      }
657
1.15M
    else if (scrub_m68k_mri && ch == '\n')
658
0
      {
659
        /* Just quietly terminate the string.  This permits lines like
660
       bne  label loop if we haven't reach end yet.  */
661
0
        state = old_state;
662
0
        UNGET (ch);
663
0
        PUT ('\'');
664
0
      }
665
1.15M
    else
666
1.15M
      {
667
1.15M
        PUT (ch);
668
1.15M
      }
669
1.79M
    continue;
670
671
1.79M
  case 6:
672
315k
    state = 5;
673
315k
    ch = GET ();
674
315k
    switch (ch)
675
315k
      {
676
        /* Handle strings broken across lines, by turning '\n' into
677
     '\\' and 'n'.  */
678
654
      case '\n':
679
654
        UNGET ('n');
680
654
        add_newlines++;
681
654
        PUT ('\\');
682
654
        continue;
683
684
654
      case EOF:
685
0
        as_warn (_("end of file in string; '%c' inserted"), quotechar);
686
0
        PUT (quotechar);
687
0
        continue;
688
689
1.39k
      case '"':
690
213k
      case '\\':
691
216k
      case 'b':
692
217k
      case 'f':
693
224k
      case 'n':
694
225k
      case 'r':
695
225k
      case 't':
696
225k
      case 'v':
697
226k
      case 'x':
698
227k
      case 'X':
699
229k
      case '0':
700
229k
      case '1':
701
229k
      case '2':
702
230k
      case '3':
703
230k
      case '4':
704
230k
      case '5':
705
231k
      case '6':
706
231k
      case '7':
707
231k
        break;
708
709
83.3k
      default:
710
#ifdef ONLY_STANDARD_ESCAPES
711
        as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
712
#endif
713
83.3k
        break;
714
315k
      }
715
314k
    PUT (ch);
716
314k
    continue;
717
718
#ifdef DOUBLEBAR_PARALLEL
719
  case 13:
720
    ch = GET ();
721
    if (ch != '|')
722
      abort ();
723
724
    /* Reset back to state 1 and pretend that we are parsing a
725
       line from just after the first white space.  */
726
    state = 1;
727
    PUT ('|');
728
#ifdef TC_TIC6X
729
    /* "||^" is used for SPMASKed instructions.  */
730
    ch = GET ();
731
    if (ch == EOF)
732
      goto fromeof;
733
    else if (ch == '^')
734
      PUT ('^');
735
    else
736
      UNGET (ch);
737
#endif
738
    continue;
739
#endif
740
#ifdef TC_Z80
741
  case 16:
742
    /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
743
    ch = GET ();
744
    if (ch == 'f' || ch == 'F')
745
      {
746
        state = 17;
747
        PUT (ch);
748
      }
749
    else
750
      {
751
        if (ch != EOF)
752
    UNGET (ch);
753
        state = 9;
754
        break;
755
      }
756
    /* Fall through.  */
757
  case 17:
758
    /* We have seen "af" at the start of a symbol,
759
       a ' here is a part of that symbol.  */
760
    ch = GET ();
761
    state = 9;
762
    if (ch == '\'')
763
      /* Change to avoid warning about unclosed string.  */
764
      PUT ('`');
765
    else if (ch != EOF)
766
      UNGET (ch);
767
    break;
768
#endif
769
17.9M
  }
770
771
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
772
773
      /* flushchar: */
774
15.1M
      ch = GET ();
775
776
#ifdef TC_PREDICATE_START_CHAR
777
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
778
  {
779
    state += 14;
780
    PUT (ch);
781
    continue;
782
  }
783
      else if (state == 14 || state == 15)
784
  {
785
    if (ch == TC_PREDICATE_END_CHAR)
786
      {
787
        state -= 14;
788
        PUT (ch);
789
        ch = GET ();
790
      }
791
    else
792
      {
793
        PUT (ch);
794
        continue;
795
      }
796
  }
797
#endif
798
799
15.6M
    recycle:
800
801
#if defined TC_ARM && defined OBJ_ELF
802
      /* We need to watch out for .symver directives.  See the comment later
803
   in this function.  */
804
      if (symver_state == NULL)
805
  {
806
    if ((state == 0 || state == 1) && ch == symver_pseudo[0])
807
      symver_state = symver_pseudo + 1;
808
  }
809
      else
810
  {
811
    /* We advance to the next state if we find the right
812
       character.  */
813
    if (ch != '\0' && (*symver_state == ch))
814
      ++symver_state;
815
    else if (*symver_state != '\0')
816
      /* We did not get the expected character, or we didn't
817
         get a valid terminating character after seeing the
818
         entire pseudo-op, so we must go back to the beginning.  */
819
      symver_state = NULL;
820
    else
821
      {
822
        /* We've read the entire pseudo-op.  If this is the end
823
     of the line, go back to the beginning.  */
824
        if (IS_NEWLINE (ch))
825
    symver_state = NULL;
826
      }
827
  }
828
#endif /* TC_ARM && OBJ_ELF */
829
830
#ifdef TC_M68K
831
      /* We want to have pseudo-ops which control whether we are in
832
   MRI mode or not.  Unfortunately, since m68k MRI mode affects
833
   the scrubber, that means that we need a special purpose
834
   recognizer here.  */
835
      if (mri_state == NULL)
836
  {
837
    if ((state == 0 || state == 1)
838
        && ch == mri_pseudo[0])
839
      mri_state = mri_pseudo + 1;
840
  }
841
      else
842
  {
843
    /* We advance to the next state if we find the right
844
       character, or if we need a space character and we get any
845
       whitespace character, or if we need a '0' and we get a
846
       '1' (this is so that we only need one state to handle
847
       ``.mri 0'' and ``.mri 1'').  */
848
    if (ch != '\0'
849
        && (*mri_state == ch
850
      || (*mri_state == ' '
851
          && lex[ch] == LEX_IS_WHITESPACE)
852
      || (*mri_state == '0'
853
          && ch == '1')))
854
      {
855
        mri_last_ch = ch;
856
        ++mri_state;
857
      }
858
    else if (*mri_state != '\0'
859
       || (lex[ch] != LEX_IS_WHITESPACE
860
           && lex[ch] != LEX_IS_NEWLINE))
861
      {
862
        /* We did not get the expected character, or we didn't
863
     get a valid terminating character after seeing the
864
     entire pseudo-op, so we must go back to the
865
     beginning.  */
866
        mri_state = NULL;
867
      }
868
    else
869
      {
870
        /* We've read the entire pseudo-op.  mips_last_ch is
871
     either '0' or '1' indicating whether to enter or
872
     leave MRI mode.  */
873
        do_scrub_begin (mri_last_ch == '1');
874
        mri_state = NULL;
875
876
        /* We continue handling the character as usual.  The
877
     main gas reader must also handle the .mri pseudo-op
878
     to control expression parsing and the like.  */
879
      }
880
  }
881
#endif
882
883
15.6M
      if (ch == EOF)
884
8.10k
  {
885
8.10k
    if (state != 0)
886
542
      {
887
542
        as_warn (_("end of file not at end of a line; newline inserted"));
888
542
        state = 0;
889
542
        PUT ('\n');
890
542
      }
891
8.10k
    goto fromeof;
892
8.10k
  }
893
894
15.6M
      switch (lex[ch])
895
15.6M
  {
896
996k
  case LEX_IS_WHITESPACE:
897
996k
    do
898
1.15M
      {
899
1.15M
        ch = GET ();
900
1.15M
      }
901
1.15M
    while (ch != EOF && IS_WHITESPACE (ch));
902
996k
    if (ch == EOF)
903
288
      goto fromeof;
904
905
996k
    if (state == 0)
906
139k
      {
907
        /* Preserve a single whitespace character at the
908
     beginning of a line.  */
909
139k
        state = 1;
910
139k
        UNGET (ch);
911
139k
        PUT (' ');
912
139k
        break;
913
139k
      }
914
915
#ifdef KEEP_WHITE_AROUND_COLON
916
    if (lex[ch] == LEX_IS_COLON)
917
      {
918
        /* Only keep this white if there's no white *after* the
919
     colon.  */
920
        ch2 = GET ();
921
        if (ch2 != EOF)
922
    UNGET (ch2);
923
        if (!IS_WHITESPACE (ch2))
924
    {
925
      state = 9;
926
      UNGET (ch);
927
      PUT (' ');
928
      break;
929
    }
930
      }
931
#endif
932
856k
    if (IS_COMMENT (ch)
933
856k
        || IS_LINE_SEPARATOR (ch)
934
856k
        || IS_PARALLEL_SEPARATOR (ch))
935
13.9k
      {
936
13.9k
        if (scrub_m68k_mri)
937
0
    {
938
      /* In MRI mode, we keep these spaces.  */
939
0
      UNGET (ch);
940
0
      PUT (' ');
941
0
      break;
942
0
    }
943
13.9k
        goto recycle;
944
13.9k
      }
945
946
    /* If we're in state 2 or 11, we've seen a non-white
947
       character followed by whitespace.  If the next character
948
       is ':', this is whitespace after a label name which we
949
       normally must ignore.  In MRI mode, though, spaces are
950
       not permitted between the label and the colon.  */
951
842k
    if ((state == 2 || state == 11)
952
842k
        && lex[ch] == LEX_IS_COLON
953
842k
        && ! scrub_m68k_mri)
954
9.57k
      {
955
9.57k
        state = 1;
956
9.57k
        PUT (ch);
957
9.57k
        break;
958
9.57k
      }
959
960
833k
    switch (state)
961
833k
      {
962
273
      case 1:
963
        /* We can arrive here if we leave a leading whitespace
964
     character at the beginning of a line.  */
965
273
        goto recycle;
966
124k
      case 2:
967
124k
        state = 3;
968
124k
        if (to + 1 < toend)
969
124k
    {
970
      /* Optimize common case by skipping UNGET/GET.  */
971
124k
      PUT (' '); /* Sp after opco */
972
124k
      goto recycle;
973
124k
    }
974
1
        UNGET (ch);
975
1
        PUT (' ');
976
0
        break;
977
33.5k
      case 3:
978
33.5k
#ifndef TC_KEEP_OPERAND_SPACES
979
        /* For TI C6X, we keep these spaces as they may separate
980
     functional unit specifiers from operands.  */
981
33.5k
        if (scrub_m68k_mri)
982
0
#endif
983
0
    {
984
      /* In MRI mode, we keep these spaces.  */
985
0
      UNGET (ch);
986
0
      PUT (' ');
987
0
      break;
988
0
    }
989
33.5k
        goto recycle; /* Sp in operands */
990
352k
      case 9:
991
352k
      case 10:
992
352k
#ifndef TC_KEEP_OPERAND_SPACES
993
352k
        if (scrub_m68k_mri)
994
0
#endif
995
0
    {
996
      /* In MRI mode, we keep these spaces.  */
997
0
      state = 3;
998
0
      UNGET (ch);
999
0
      PUT (' ');
1000
0
      break;
1001
0
    }
1002
352k
        state = 10; /* Sp after symbol char */
1003
352k
        goto recycle;
1004
322k
      case 11:
1005
322k
        if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1006
0
    state = 1;
1007
322k
        else
1008
322k
    {
1009
      /* We know that ch is not ':', since we tested that
1010
         case above.  Therefore this is not a label, so it
1011
         must be the opcode, and we've just seen the
1012
         whitespace after it.  */
1013
322k
      state = 3;
1014
322k
    }
1015
322k
        UNGET (ch);
1016
322k
        PUT (' '); /* Sp after label definition.  */
1017
322k
        break;
1018
322k
      default:
1019
0
        BAD_CASE (state);
1020
833k
      }
1021
322k
    break;
1022
1023
322k
  case LEX_IS_TWOCHAR_COMMENT_1ST:
1024
0
    ch2 = GET ();
1025
0
    if (ch2 == '*')
1026
0
      {
1027
0
        for (;;)
1028
0
    {
1029
0
      do
1030
0
        {
1031
0
          ch2 = GET ();
1032
0
          if (ch2 != EOF && IS_NEWLINE (ch2))
1033
0
      add_newlines++;
1034
0
        }
1035
0
      while (ch2 != EOF && ch2 != '*');
1036
1037
0
      while (ch2 == '*')
1038
0
        ch2 = GET ();
1039
1040
0
      if (ch2 == EOF || ch2 == '/')
1041
0
        break;
1042
1043
      /* This UNGET will ensure that we count newlines
1044
         correctly.  */
1045
0
      UNGET (ch2);
1046
0
    }
1047
1048
0
        if (ch2 == EOF)
1049
0
    as_warn (_("end of file in multiline comment"));
1050
1051
0
        ch = ' ';
1052
0
        goto recycle;
1053
0
      }
1054
#ifdef DOUBLESLASH_LINE_COMMENTS
1055
    else if (ch2 == '/')
1056
      {
1057
        do
1058
    {
1059
      ch = GET ();
1060
    }
1061
        while (ch != EOF && !IS_NEWLINE (ch));
1062
        if (ch == EOF)
1063
    as_warn ("end of file in comment; newline inserted");
1064
        state = 0;
1065
        PUT ('\n');
1066
        break;
1067
      }
1068
#endif
1069
0
    else
1070
0
      {
1071
0
        if (ch2 != EOF)
1072
0
    UNGET (ch2);
1073
0
        if (state == 9 || state == 10)
1074
0
    state = 3;
1075
0
        PUT (ch);
1076
0
      }
1077
0
    break;
1078
1079
308k
  case LEX_IS_STRINGQUOTE:
1080
308k
    quotechar = ch;
1081
308k
    if (state == 10)
1082
2.00k
      {
1083
        /* Preserve the whitespace in foo "bar".  */
1084
2.00k
        UNGET (ch);
1085
2.00k
        state = 3;
1086
2.00k
        PUT (' ');
1087
1088
        /* PUT didn't jump out.  We could just break, but we
1089
     know what will happen, so optimize a bit.  */
1090
2.00k
        ch = GET ();
1091
2.00k
        old_state = 9;
1092
2.00k
      }
1093
306k
    else if (state == 3)
1094
18.8k
      old_state = 9;
1095
287k
    else
1096
287k
      old_state = state;
1097
308k
    state = 5;
1098
308k
    PUT (ch);
1099
308k
    break;
1100
1101
308k
  case LEX_IS_ONECHAR_QUOTE:
1102
#ifdef H_TICK_HEX
1103
    if (state == 9 && enable_h_tick_hex)
1104
      {
1105
        char c;
1106
1107
        c = GET ();
1108
        as_warn ("'%c found after symbol", c);
1109
        UNGET (c);
1110
      }
1111
#endif
1112
246k
    if (state == 10)
1113
835
      {
1114
        /* Preserve the whitespace in foo 'b'.  */
1115
835
        UNGET (ch);
1116
835
        state = 3;
1117
835
        PUT (' ');
1118
834
        break;
1119
835
      }
1120
245k
    ch = GET ();
1121
245k
    if (ch == EOF)
1122
11
      {
1123
11
        as_warn (_("end of file after a one-character quote; \\0 inserted"));
1124
11
        ch = 0;
1125
11
      }
1126
245k
    if (ch == '\\')
1127
9.23k
      {
1128
9.23k
        ch = GET ();
1129
9.23k
        if (ch == EOF)
1130
0
    {
1131
0
      as_warn (_("end of file in escape character"));
1132
0
      ch = '\\';
1133
0
    }
1134
9.23k
        else
1135
9.23k
    ch = process_escape (ch);
1136
9.23k
      }
1137
245k
    sprintf (out_buf, "%d", (int) (unsigned char) ch);
1138
1139
    /* None of these 'x constants for us.  We want 'x'.  */
1140
245k
    if ((ch = GET ()) != '\'')
1141
234k
      {
1142
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1143
        as_warn (_("missing close quote; (assumed)"));
1144
#else
1145
234k
        if (ch != EOF)
1146
234k
    UNGET (ch);
1147
234k
#endif
1148
234k
      }
1149
245k
    if (strlen (out_buf) == 1)
1150
4.98k
      {
1151
4.98k
        PUT (out_buf[0]);
1152
4.98k
        break;
1153
4.98k
      }
1154
240k
    if (state == 9)
1155
20.1k
      old_state = 3;
1156
220k
    else
1157
220k
      old_state = state;
1158
240k
    state = -1;
1159
240k
    out_string = out_buf;
1160
240k
    PUT (*out_string++);
1161
240k
    break;
1162
1163
1.57M
  case LEX_IS_COLON:
1164
#ifdef KEEP_WHITE_AROUND_COLON
1165
    state = 9;
1166
#else
1167
1.57M
    if (state == 9 || state == 10)
1168
322k
      state = 3;
1169
1.25M
    else if (state != 3)
1170
1.21M
      state = 1;
1171
1.57M
#endif
1172
1.57M
    PUT (ch);
1173
1.57M
    break;
1174
1175
1.95M
  case LEX_IS_NEWLINE:
1176
    /* Roll out a bunch of newlines from inside comments, etc.  */
1177
1.95M
    if (add_newlines)
1178
654
      {
1179
654
        --add_newlines;
1180
654
        UNGET (ch);
1181
654
      }
1182
    /* Fall through.  */
1183
1184
2.38M
  case LEX_IS_LINE_SEPARATOR:
1185
2.38M
    state = 0;
1186
2.38M
    PUT (ch);
1187
2.38M
    break;
1188
1189
2.38M
  case LEX_IS_PARALLEL_SEPARATOR:
1190
0
    state = 1;
1191
0
    PUT (ch);
1192
0
    break;
1193
1194
#ifdef TC_V850
1195
  case LEX_IS_DOUBLEDASH_1ST:
1196
    ch2 = GET ();
1197
    if (ch2 != '-')
1198
      {
1199
        if (ch2 != EOF)
1200
    UNGET (ch2);
1201
        goto de_fault;
1202
      }
1203
    /* Read and skip to end of line.  */
1204
    do
1205
      {
1206
        ch = GET ();
1207
      }
1208
    while (ch != EOF && ch != '\n');
1209
1210
    if (ch == EOF)
1211
      as_warn (_("end of file in comment; newline inserted"));
1212
1213
    state = 0;
1214
    PUT ('\n');
1215
    break;
1216
#endif
1217
#ifdef DOUBLEBAR_PARALLEL
1218
  case LEX_IS_DOUBLEBAR_1ST:
1219
    ch2 = GET ();
1220
    if (ch2 != EOF)
1221
      UNGET (ch2);
1222
    if (ch2 != '|')
1223
      goto de_fault;
1224
1225
    /* Handle '||' in two states as invoking PUT twice might
1226
       result in the first one jumping out of this loop.  We'd
1227
       then lose track of the state and one '|' char.  */
1228
    state = 13;
1229
    PUT ('|');
1230
    break;
1231
#endif
1232
203k
  case LEX_IS_LINE_COMMENT_START:
1233
    /* FIXME-someday: The two character comment stuff was badly
1234
       thought out.  On i386, we want '/' as line comment start
1235
       AND we want C style comments.  hence this hack.  The
1236
       whole lexical process should be reworked.  xoxorich.  */
1237
203k
    if (ch == '/')
1238
83.1k
      {
1239
83.1k
        ch2 = GET ();
1240
83.1k
        if (ch2 == '*')
1241
591
    {
1242
591
      old_state = 3;
1243
591
      state = -2;
1244
591
      break;
1245
591
    }
1246
82.5k
        else if (ch2 != EOF)
1247
82.5k
    {
1248
82.5k
      UNGET (ch2);
1249
82.5k
    }
1250
83.1k
      }
1251
1252
202k
    if (state == 0 || state == 1)  /* Only comment at start of line.  */
1253
71.3k
      {
1254
71.3k
        int startch;
1255
1256
71.3k
        startch = ch;
1257
1258
71.3k
        do
1259
71.7k
    {
1260
71.7k
      ch = GET ();
1261
71.7k
    }
1262
71.7k
        while (ch != EOF && IS_WHITESPACE (ch));
1263
1264
71.3k
        if (ch == EOF)
1265
161
    {
1266
161
      as_warn (_("end of file in comment; newline inserted"));
1267
161
      PUT ('\n');
1268
161
      break;
1269
161
    }
1270
1271
71.1k
        if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1272
54.4k
    {
1273
      /* Not a cpp line.  */
1274
732k
      while (ch != EOF && !IS_NEWLINE (ch))
1275
678k
        ch = GET ();
1276
54.4k
      if (ch == EOF)
1277
43
        {
1278
43
          as_warn (_("end of file in comment; newline inserted"));
1279
43
          PUT ('\n');
1280
43
        }
1281
54.4k
      else /* IS_NEWLINE (ch) */
1282
54.4k
        {
1283
          /* To process non-zero add_newlines.  */
1284
54.4k
          UNGET (ch);
1285
54.4k
        }
1286
54.4k
      state = 0;
1287
54.4k
      break;
1288
54.4k
    }
1289
        /* Looks like `# 123 "filename"' from cpp.  */
1290
16.6k
        UNGET (ch);
1291
16.6k
        old_state = 4;
1292
16.6k
        state = -1;
1293
16.6k
        if (scrub_m68k_mri)
1294
0
    out_string = "\tlinefile ";
1295
16.6k
        else
1296
16.6k
    out_string = "\t.linefile ";
1297
16.6k
        PUT (*out_string++);
1298
16.6k
        break;
1299
16.6k
      }
1300
1301
#ifdef TC_D10V
1302
    /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1303
       Trap is the only short insn that has a first operand that is
1304
       neither register nor label.
1305
       We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1306
       We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1307
       already LEX_IS_LINE_COMMENT_START.  However, it is the
1308
       only character in line_comment_chars for d10v, hence we
1309
       can recognize it as such.  */
1310
    /* An alternative approach would be to reset the state to 1 when
1311
       we see '||', '<'- or '->', but that seems to be overkill.  */
1312
    if (state == 10)
1313
      PUT (' ');
1314
#endif
1315
    /* We have a line comment character which is not at the
1316
       start of a line.  If this is also a normal comment
1317
       character, fall through.  Otherwise treat it as a default
1318
       character.  */
1319
131k
    if (strchr (tc_comment_chars, ch) == NULL
1320
131k
        && (! scrub_m68k_mri
1321
74.5k
      || (ch != '!' && ch != '*')))
1322
74.5k
      goto de_fault;
1323
56.7k
    if (scrub_m68k_mri
1324
56.7k
        && (ch == '!' || ch == '*' || ch == '#')
1325
56.7k
        && state != 1
1326
56.7k
        && state != 10)
1327
0
      goto de_fault;
1328
    /* Fall through.  */
1329
56.7k
  case LEX_IS_COMMENT_START:
1330
#if defined TC_ARM && defined OBJ_ELF
1331
    /* On the ARM, `@' is the comment character.
1332
       Unfortunately this is also a special character in ELF .symver
1333
       directives (and .type, though we deal with those another way).
1334
       So we check if this line is such a directive, and treat
1335
       the character as default if so.  This is a hack.  */
1336
    if ((symver_state != NULL) && (*symver_state == 0))
1337
      goto de_fault;
1338
#endif
1339
1340
    /* Care is needed not to damage occurrences of \<comment-char>
1341
       by stripping the <comment-char> onwards.  Yuck.  */
1342
56.7k
    if ((to > tostart ? to[-1] : last_char) == '\\')
1343
      /* Do not treat the <comment-char> as a start-of-comment.  */
1344
247
      goto de_fault;
1345
1346
#ifdef WARN_COMMENTS
1347
    if (!found_comment)
1348
      found_comment_file = as_where (&found_comment);
1349
#endif
1350
56.5k
    do
1351
3.86M
      {
1352
3.86M
        ch = GET ();
1353
3.86M
      }
1354
3.86M
    while (ch != EOF && !IS_NEWLINE (ch));
1355
56.5k
    if (ch == EOF)
1356
49
      as_warn (_("end of file in comment; newline inserted"));
1357
56.5k
    state = 0;
1358
56.5k
    PUT ('\n');
1359
56.5k
    break;
1360
1361
#ifdef H_TICK_HEX
1362
  case LEX_IS_H:
1363
    /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1364
       the H' with 0x to make them gas-style hex characters.  */
1365
    if (enable_h_tick_hex)
1366
      {
1367
        char quot;
1368
1369
        quot = GET ();
1370
        if (quot == '\'')
1371
    {
1372
      UNGET ('x');
1373
      ch = '0';
1374
    }
1375
        else
1376
    UNGET (quot);
1377
      }
1378
#endif
1379
    /* Fall through.  */
1380
1381
4.46M
  case LEX_IS_SYMBOL_COMPONENT:
1382
4.46M
    if (state == 10)
1383
322k
      {
1384
        /* This is a symbol character following another symbol
1385
     character, with whitespace in between.  We skipped
1386
     the whitespace earlier, so output it now.  */
1387
322k
        UNGET (ch);
1388
322k
        state = 3;
1389
322k
        PUT (' ');
1390
322k
        break;
1391
322k
      }
1392
1393
#ifdef TC_Z80
1394
    /* "af'" is a symbol containing '\''.  */
1395
    if (state == 3 && (ch == 'a' || ch == 'A'))
1396
      {
1397
        state = 16;
1398
        PUT (ch);
1399
        ch = GET ();
1400
        if (ch == 'f' || ch == 'F')
1401
    {
1402
      state = 17;
1403
      PUT (ch);
1404
      break;
1405
    }
1406
        else
1407
    {
1408
      state = 9;
1409
      if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1410
        {
1411
          if (ch != EOF)
1412
      UNGET (ch);
1413
          break;
1414
        }
1415
    }
1416
      }
1417
#endif
1418
4.14M
    if (state == 3)
1419
1.20M
      state = 9;
1420
1421
    /* This is a common case.  Quickly copy CH and all the
1422
       following symbol component or normal characters.  */
1423
4.14M
    if (to + 1 < toend
1424
4.14M
        && mri_state == NULL
1425
#if defined TC_ARM && defined OBJ_ELF
1426
        && symver_state == NULL
1427
#endif
1428
4.14M
        )
1429
4.14M
      {
1430
4.14M
        char *s;
1431
4.14M
        ptrdiff_t len;
1432
1433
31.4M
        for (s = from; s < fromend; s++)
1434
31.4M
    {
1435
31.4M
      int type;
1436
1437
31.4M
      ch2 = *(unsigned char *) s;
1438
31.4M
      type = lex[ch2];
1439
31.4M
      if (type != 0
1440
31.4M
          && type != LEX_IS_SYMBOL_COMPONENT)
1441
4.13M
        break;
1442
31.4M
    }
1443
1444
4.14M
        if (s > from)
1445
    /* Handle the last character normally, for
1446
       simplicity.  */
1447
2.13M
    --s;
1448
1449
4.14M
        len = s - from;
1450
1451
4.14M
        if (len > (toend - to) - 1)
1452
273
    len = (toend - to) - 1;
1453
1454
4.14M
        if (len > 0)
1455
1.61M
    {
1456
1.61M
      PUT (ch);
1457
1.61M
      memcpy (to, from, len);
1458
1.61M
      to += len;
1459
1.61M
      from += len;
1460
1.61M
      if (to >= toend)
1461
288
        goto tofull;
1462
1.61M
      ch = GET ();
1463
1.61M
    }
1464
4.14M
      }
1465
1466
    /* Fall through.  */
1467
9.64M
  default:
1468
9.71M
  de_fault:
1469
    /* Some relatively `normal' character.  */
1470
9.71M
    if (state == 0)
1471
1.03M
      {
1472
1.03M
        state = 11; /* Now seeing label definition.  */
1473
1.03M
      }
1474
8.68M
    else if (state == 1)
1475
1.34M
      {
1476
1.34M
        state = 2;  /* Ditto.  */
1477
1.34M
      }
1478
7.34M
    else if (state == 9)
1479
1.42M
      {
1480
1.42M
        if (!IS_SYMBOL_COMPONENT (ch))
1481
172k
    state = 3;
1482
1.42M
      }
1483
5.92M
    else if (state == 10)
1484
18.2k
      {
1485
18.2k
        if (ch == '\\')
1486
4.86k
    {
1487
      /* Special handling for backslash: a backslash may
1488
         be the beginning of a formal parameter (of a
1489
         macro) following another symbol character, with
1490
         whitespace in between.  If that is the case, we
1491
         output a space before the parameter.  Strictly
1492
         speaking, correct handling depends upon what the
1493
         macro parameter expands into; if the parameter
1494
         expands into something which does not start with
1495
         an operand character, then we don't want to keep
1496
         the space.  We don't have enough information to
1497
         make the right choice, so here we are making the
1498
         choice which is more likely to be correct.  */
1499
4.86k
      if (to + 1 >= toend)
1500
0
        {
1501
          /* If we're near the end of the buffer, save the
1502
             character for the next time round.  Otherwise
1503
             we'll lose our state.  */
1504
0
          UNGET (ch);
1505
0
          goto tofull;
1506
0
        }
1507
4.86k
      *to++ = ' ';
1508
4.86k
    }
1509
1510
18.2k
        state = 3;
1511
18.2k
      }
1512
9.71M
    PUT (ch);
1513
9.71M
    break;
1514
15.6M
  }
1515
15.6M
    }
1516
1517
  /*NOTREACHED*/
1518
1519
8.47k
 fromeof:
1520
  /* We have reached the end of the input.  */
1521
8.47k
  if (to > tostart)
1522
8.47k
    last_char = to[-1];
1523
8.47k
  return to - tostart;
1524
1525
1.18k
 tofull:
1526
  /* The output buffer is full.  Save any input we have not yet
1527
     processed.  */
1528
1.18k
  if (fromend > from)
1529
1.00k
    {
1530
1.00k
      saved_input = from;
1531
1.00k
      saved_input_len = fromend - from;
1532
1.00k
    }
1533
176
  else
1534
176
    saved_input = NULL;
1535
1536
1.18k
  if (to > tostart)
1537
1.18k
    last_char = to[-1];
1538
1.18k
  return to - tostart;
1539
9.66k
}
1540
1541
/* Return amount of pending input.  */
1542
1543
size_t
1544
do_scrub_pending (void)
1545
28.0k
{
1546
28.0k
  size_t len = 0;
1547
28.0k
  if (saved_input)
1548
173
    len += saved_input_len;
1549
28.0k
  if (state == -1)
1550
33
    len += strlen (out_string);
1551
28.0k
  return len;
1552
28.0k
}