/src/xpdf-4.04/xpdf/Lexer.cc

Source (jump to first uncovered line)
//========================================================================
//
// Lexer.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================

#include <aconf.h>

#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif

#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include "gmempp.h"
#include "Lexer.h"
#include "Error.h"

//------------------------------------------------------------------------

// A '1' in this array means the character is white space.  A '1' or
// '2' means the character ends a name or command.
static char specialChars[256] = {
  1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,   // 0x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 1x
  1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2,   // 2x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,   // 3x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 4x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 5x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 6x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 7x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 8x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 9x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ax
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // bx
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // cx
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // dx
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ex
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0    // fx
};

//------------------------------------------------------------------------
// Lexer
//------------------------------------------------------------------------

Lexer::Lexer(XRef *xref, Stream *str) {
  Object obj;

  curStr.initStream(str);
  streams = new Array(xref);
  streams->add(curStr.copy(&obj));
  strPtr = 0;
  freeArray = gTrue;
  curStr.streamReset();
}

Lexer::Lexer(XRef *xref, Object *obj) {
  Object obj2;

  if (obj->isStream()) {
    streams = new Array(xref);
    freeArray = gTrue;
    streams->add(obj->copy(&obj2));
  } else {
    streams = obj->getArray();
    freeArray = gFalse;
  }
  strPtr = 0;
  if (streams->getLength() > 0) {
    streams->get(strPtr, &curStr);
    curStr.streamReset();
  }
}

Lexer::~Lexer() {
  if (!curStr.isNone()) {
    curStr.streamClose();
    curStr.free();
  }
  if (freeArray) {
    delete streams;
  }
}

int Lexer::getChar() {
  int c;

  c = EOF;
  while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
    curStr.streamClose();
    curStr.free();
    ++strPtr;
    if (strPtr < streams->getLength()) {
      streams->get(strPtr, &curStr);
      curStr.streamReset();
    }
  }
  return c;
}

int Lexer::lookChar() {
  if (curStr.isNone()) {
    return EOF;
  }
  return curStr.streamLookChar();
}

Object *Lexer::getObj(Object *obj) {
  char *p;
  int c, c2;
  GBool comment, neg, doubleMinus, done, invalid;
  int numParen;
  int xi;
  double xf, scale;
  GString *s;
  int n, m;

  // skip whitespace and comments
  comment = gFalse;
  while (1) {
    if ((c = getChar()) == EOF) {
      return obj->initEOF();
    }
    if (comment) {
      if (c == '\r' || c == '\n')
  comment = gFalse;
    } else if (c == '%') {
      comment = gTrue;
    } else if (specialChars[c] != 1) {
      break;
    }
  }

  // start reading token
  switch (c) {

  // number
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
  case '+': case '-': case '.':
    // Adobe's number lexer has some "interesting" behavior:
    // "--123" is interpreted as 0
    // "--123.4" is interpreted as -123.4 [I've seen this in the wild]
    // "50-100" is interpreted as 50 [I've seen this in the wild]
    // "50--100" is interpreted as 50
    // "50-100.0" is an error -- but older versions of Acrobat may
    //   have interpreted it as 50100.0 (?)
    // "50--100.0" is an error -- but older versions of Acrobat may
    //   have interpreted it as 50100.0 (?)
    // "50.0-100" is interpreted as 50.0 (or maybe 50.0100?)
    // "50.0--100" is interpreted as 50.0 (or maybe 50.0100?)
    // "-50-100" is interpreted as -50
    // "-" is interpreted as 0
    // "-." is interpreted as 0.0
    neg = gFalse;
    doubleMinus = gFalse;
    xf = xi = 0;
    if (c == '+') {
      // just ignore it
    } else if (c == '-') {
      neg = gTrue;
      if (lookChar() == '-') {
  doubleMinus = gTrue;
  do {
    getChar();
  } while (lookChar() == '-');
      }
    } else if (c == '.') {
      goto doReal;
    } else {
      xf = xi = c - '0';
    }
    while (1) {
      c = lookChar();
      if (isdigit(c)) {
  getChar();
  xi = xi * 10 + (c - '0');
  if (xf < 1e20) {
    xf = xf * 10 + (c - '0');
  }
      } else if (c == '.') {
  getChar();
  goto doReal;
      } else {
  break;
      }
    }
    while ((c = lookChar()) == '-' || isdigit(c)) {
      getChar();
    }
    if (neg) {
      xi = -xi;
    }
    if (doubleMinus) {
      xi = 0;
    }
    obj->initInt(xi);
    break;
  doReal:
    scale = 0.1;
    while (1) {
      c = lookChar();
      if (c == '-') {
  error(errSyntaxWarning, getPos(), "Badly formatted number");
  getChar();
  continue;
      }
      if (!isdigit(c)) {
  break;
      }
      getChar();
      xf = xf + scale * (c - '0');
      scale *= 0.1;
    }
    while ((c = lookChar()) == '-' || isdigit(c)) {
      getChar();
    }
    if (neg) {
      xf = -xf;
    }
    obj->initReal(xf);
    break;

  // string
  case '(':
    p = tokBuf;
    n = 0;
    numParen = 1;
    done = gFalse;
    s = NULL;
    do {
      c2 = EOF;
      switch (c = getChar()) {

      case EOF:
  error(errSyntaxError, getPos(), "Unterminated string");
  done = gTrue;
  break;

      case '(':
  ++numParen;
  c2 = c;
  break;

      case ')':
  if (--numParen == 0) {
    done = gTrue;
  } else {
    c2 = c;
  }
  break;

      case '\r':
  // The PDF spec says that any literal end-of-line sequence
  // (LF, CR, CR+LF) is translated to a single LF char.
  c = lookChar();
  if (c == '\n') {
    getChar();
  }
  c2 = '\n';
  break;

      case '\\':
  switch (c = getChar()) {
  case 'n':
    c2 = '\n';
    break;
  case 'r':
    c2 = '\r';
    break;
  case 't':
    c2 = '\t';
    break;
  case 'b':
    c2 = '\b';
    break;
  case 'f':
    c2 = '\f';
    break;
  case '\\':
  case '(':
  case ')':
    c2 = c;
    break;
  case '0': case '1': case '2': case '3':
  case '4': case '5': case '6': case '7':
    c2 = c - '0';
    c = lookChar();
    if (c >= '0' && c <= '7') {
      getChar();
      c2 = (c2 << 3) + (c - '0');
      c = lookChar();
      if (c >= '0' && c <= '7') {
        getChar();
        c2 = (c2 << 3) + (c - '0');
      }
    }
    break;
  case '\r':
    c = lookChar();
    if (c == '\n') {
      getChar();
    }
    break;
  case '\n':
    break;
  case EOF:
    error(errSyntaxError, getPos(), "Unterminated string");
    done = gTrue;
    break;
  default:
    c2 = c;
    break;
  }
  break;

      default:
  c2 = c;
  break;
      }

      if (c2 != EOF) {
  if (n == tokBufSize) {
    if (!s)
      s = new GString(tokBuf, tokBufSize);
    else
      s->append(tokBuf, tokBufSize);
    p = tokBuf;
    n = 0;
  }
  *p++ = (char)c2;
  ++n;
      }
    } while (!done);
    if (!s)
      s = new GString(tokBuf, n);
    else
      s->append(tokBuf, n);
    obj->initString(s);
    break;

  // name
  case '/':
    p = tokBuf;
    n = 0;
    s = NULL;
    invalid = gFalse;
    while ((c = lookChar()) != EOF && !specialChars[c]) {
      getChar();
      if (c == '#') {
  c2 = lookChar();
  if (c2 >= '0' && c2 <= '9') {
    c = c2 - '0';
  } else if (c2 >= 'A' && c2 <= 'F') {
    c = c2 - 'A' + 10;
  } else if (c2 >= 'a' && c2 <= 'f') {
    c = c2 - 'a' + 10;
  } else {
    error(errSyntaxError, getPos(), "Invalid hex escape in name");
    goto notEscChar;
  }
  getChar();
  c2 = lookChar();
  if (c2 >= '0' && c2 <= '9') {
    c = (c << 4) + (c2 - '0');
  } else if (c2 >= 'A' && c2 <= 'F') {
    c = (c << 4) + (c2 - 'A' + 10);
  } else if (c2 >= 'a' && c2 <= 'f') {
    c = (c << 4) + (c2 - 'a' + 10);
  } else {
    error(errSyntaxError, getPos(), "Invalid hex escape in name");
    goto notEscChar;
  }
  getChar();
  if (c == 0) {
    invalid = gTrue;
  }
      }
     notEscChar:
      // the PDF spec claims that names are limited to 127 chars, but
      // Distiller 8 will produce longer names, and Acrobat 8 will
      // accept longer names
      ++n;
      if (n < tokBufSize) {
  *p++ = (char)c;
      } else if (n == tokBufSize) {
  *p = (char)c;
  s = new GString(tokBuf, n);
      } else {
  s->append((char)c);
      }
    }
    if (invalid) {
      error(errSyntaxError, getPos(), "Null character in name");
      obj->initError();
      if (s) {
  delete s;
      }
    } else if (n < tokBufSize) {
      *p = '\0';
      obj->initName(tokBuf);
    } else {
      obj->initName(s->getCString());
      delete s;
    }
    break;

  // array punctuation
  case '[':
  case ']':
    tokBuf[0] = (char)c;
    tokBuf[1] = '\0';
    obj->initCmd(tokBuf);
    break;

  // hex string or dict punctuation
  case '<':
    c = lookChar();

    // dict punctuation
    if (c == '<') {
      getChar();
      tokBuf[0] = tokBuf[1] = '<';
      tokBuf[2] = '\0';
      obj->initCmd(tokBuf);

    // hex string
    } else {
      p = tokBuf;
      m = n = 0;
      c2 = 0;
      s = NULL;
      while (1) {
  c = getChar();
  if (c == '>') {
    break;
  } else if (c == EOF) {
    error(errSyntaxError, getPos(), "Unterminated hex string");
    break;
  } else if (specialChars[c] != 1) {
    c2 = c2 << 4;
    if (c >= '0' && c <= '9')
      c2 += c - '0';
    else if (c >= 'A' && c <= 'F')
      c2 += c - 'A' + 10;
    else if (c >= 'a' && c <= 'f')
      c2 += c - 'a' + 10;
    else
      error(errSyntaxError, getPos(),
      "Illegal character <{0:02x}> in hex string", c);
    if (++m == 2) {
      if (n == tokBufSize) {
        if (!s)
    s = new GString(tokBuf, tokBufSize);
        else
    s->append(tokBuf, tokBufSize);
        p = tokBuf;
        n = 0;
      }
      *p++ = (char)c2;
      ++n;
      c2 = 0;
      m = 0;
    }
  }
      }
      if (!s)
  s = new GString(tokBuf, n);
      else
  s->append(tokBuf, n);
      if (m == 1)
  s->append((char)(c2 << 4));
      obj->initString(s);
    }
    break;

  // dict punctuation
  case '>':
    c = lookChar();
    if (c == '>') {
      getChar();
      tokBuf[0] = tokBuf[1] = '>';
      tokBuf[2] = '\0';
      obj->initCmd(tokBuf);
    } else {
      error(errSyntaxError, getPos(), "Illegal character '>'");
      obj->initError();
    }
    break;

  // error
  case ')':
  case '{':
  case '}':
    error(errSyntaxError, getPos(), "Illegal character '{0:c}'", c);
    obj->initError();
    break;

  // command
  default:
    p = tokBuf;
    *p++ = (char)c;
    n = 1;
    while ((c = lookChar()) != EOF && !specialChars[c]) {
      getChar();
      if (++n == tokBufSize) {
  error(errSyntaxError, getPos(), "Command token too long");
  break;
      }
      *p++ = (char)c;
    }
    *p = '\0';
    if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
      obj->initBool(gTrue);
    } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
      obj->initBool(gFalse);
    } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
      obj->initNull();
    } else {
      obj->initCmd(tokBuf);
    }
    break;
  }

  return obj;
}

void Lexer::skipToNextLine() {
  int c;

  while (1) {
    c = getChar();
    if (c == EOF || c == '\n') {
      return;
    }
    if (c == '\r') {
      if ((c = lookChar()) == '\n') {
  getChar();
      }
      return;
    }
  }
}

void Lexer::skipToEOF() {
  while (getChar() != EOF) ;
}

GBool Lexer::isSpace(int c) {
  return c >= 0 && c <= 0xff && specialChars[c] == 1;
}

Coverage Report

Created: 2023-09-25 06:35

Line	Count	Source (jump to first uncovered line)
1		//========================================================================
2		//
3		// Lexer.cc
4		//
5		// Copyright 1996-2003 Glyph & Cog, LLC
6		//
7		//========================================================================
8
9		#include <aconf.h>
10
11		#ifdef USE_GCC_PRAGMAS
12		#pragma implementation
13		#endif
14
15		#include <stdlib.h>
16		#include <stddef.h>
17		#include <string.h>
18		#include <ctype.h>
19		#include "gmempp.h"
20		#include "Lexer.h"
21		#include "Error.h"
22
23		//------------------------------------------------------------------------
24
25		// A '1' in this array means the character is white space. A '1' or
26		// '2' means the character ends a name or command.
27		static char specialChars[256] = {
28		1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
29		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
30		1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
31		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
32		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
33		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
34		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
35		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
36		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
37		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
38		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
39		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
40		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
41		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
42		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
43		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
44		};
45
46		//------------------------------------------------------------------------
47		// Lexer
48		//------------------------------------------------------------------------
49
50	132k	Lexer::Lexer(XRef xref, Stream str) {
51	132k	Object obj;
52
53	132k	curStr.initStream(str);
54	132k	streams = new Array(xref);
55	132k	streams->add(curStr.copy(&obj));
56	132k	strPtr = 0;
57	132k	freeArray = gTrue;
58	132k	curStr.streamReset();
59	132k	}
60
61	0	Lexer::Lexer(XRef xref, Object obj) {
62	0	Object obj2;
63
64	0	if (obj->isStream()) {
65	0	streams = new Array(xref);
66	0	freeArray = gTrue;
67	0	streams->add(obj->copy(&obj2));
68	0	} else {
69	0	streams = obj->getArray();
70	0	freeArray = gFalse;
71	0	}
72	0	strPtr = 0;
73	0	if (streams->getLength() > 0) {
74	0	streams->get(strPtr, &curStr);
75	0	curStr.streamReset();
76	0	}
77	0	}
78
79	132k	Lexer::~Lexer() {
80	132k	if (!curStr.isNone()) {
81	55.8k	curStr.streamClose();
82	55.8k	curStr.free();
83	55.8k	}
84	132k	if (freeArray) {
85	132k	delete streams;
86	132k	}
87	132k	}
88
89	752M	int Lexer::getChar() {
90	752M	int c;
91
92	752M	c = EOF;
93	752M	while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
94	76.3k	curStr.streamClose();
95	76.3k	curStr.free();
96	76.3k	++strPtr;
97	76.3k	if (strPtr < streams->getLength()) {
98	0	streams->get(strPtr, &curStr);
99	0	curStr.streamReset();
100	0	}
101	76.3k	}
102	752M	return c;
103	752M	}
104
105	161M	int Lexer::lookChar() {
106	161M	if (curStr.isNone()) {
107	209	return EOF;
108	209	}
109	161M	return curStr.streamLookChar();
110	161M	}
111
112	44.6M	Object Lexer::getObj(Object obj) {
113	44.6M	char *p;
114	44.6M	int c, c2;
115	44.6M	GBool comment, neg, doubleMinus, done, invalid;
116	44.6M	int numParen;
117	44.6M	int xi;
118	44.6M	double xf, scale;
119	44.6M	GString *s;
120	44.6M	int n, m;
121
122		// skip whitespace and comments
123	44.6M	comment = gFalse;
124	126M	while (1) {
125	126M	if ((c = getChar()) == EOF) {
126	10.6M	return obj->initEOF();
127	10.6M	}
128	115M	if (comment) {
129	15.6M	if (c == '\r' \|\| c == '\n')
130	147k	comment = gFalse;
131	99.8M	} else if (c == '%') {
132	147k	comment = gTrue;
133	99.6M	} else if (specialChars[c] != 1) {
134	34.0M	break;
135	34.0M	}
136	115M	}
137
138		// start reading token
139	34.0M	switch (c) {
140
141		// number
142	2.85M	case '0': case '1': case '2': case '3': case '4':
143	3.86M	case '5': case '6': case '7': case '8': case '9':
144	4.33M	case '+': case '-': case '.':
145		// Adobe's number lexer has some "interesting" behavior:
146		// "--123" is interpreted as 0
147		// "--123.4" is interpreted as -123.4 [I've seen this in the wild]
148		// "50-100" is interpreted as 50 [I've seen this in the wild]
149		// "50--100" is interpreted as 50
150		// "50-100.0" is an error -- but older versions of Acrobat may
151		// have interpreted it as 50100.0 (?)
152		// "50--100.0" is an error -- but older versions of Acrobat may
153		// have interpreted it as 50100.0 (?)
154		// "50.0-100" is interpreted as 50.0 (or maybe 50.0100?)
155		// "50.0--100" is interpreted as 50.0 (or maybe 50.0100?)
156		// "-50-100" is interpreted as -50
157		// "-" is interpreted as 0
158		// "-." is interpreted as 0.0
159	4.33M	neg = gFalse;
160	4.33M	doubleMinus = gFalse;
161	4.33M	xf = xi = 0;
162	4.33M	if (c == '+') {
163		// just ignore it
164	4.31M	} else if (c == '-') {
165	245k	neg = gTrue;
166	245k	if (lookChar() == '-') {
167	10.6k	doubleMinus = gTrue;
168	19.1k	do {
169	19.1k	getChar();
170	19.1k	} while (lookChar() == '-');
171	10.6k	}
172	4.07M	} else if (c == '.') {
173	204k	goto doReal;
174	3.86M	} else {
175	3.86M	xf = xi = c - '0';
176	3.86M	}
177	8.33M	while (1) {
178	8.33M	c = lookChar();
179	8.33M	if (isdigit(c)) {
180	4.20M	getChar();
181	4.20M	xi = xi * 10 + (c - '0');
182	4.20M	if (xf < 1e20) {
183	4.16M	xf = xf * 10 + (c - '0');
184	4.16M	}
185	4.20M	} else if (c == '.') {
186	96.9k	getChar();
187	96.9k	goto doReal;
188	4.03M	} else {
189	4.03M	break;
190	4.03M	}
191	8.33M	}
192	4.04M	while ((c = lookChar()) == '-' \|\| isdigit(c)) {
193	4.85k	getChar();
194	4.85k	}
195	4.03M	if (neg) {
196	239k	xi = -xi;
197	239k	}
198	4.03M	if (doubleMinus) {
199	10.6k	xi = 0;
200	10.6k	}
201	4.03M	obj->initInt(xi);
202	4.03M	break;
203	301k	doReal:
204	301k	scale = 0.1;
205	460k	while (1) {
206	460k	c = lookChar();
207	460k	if (c == '-') {
208	6.02k	error(errSyntaxWarning, getPos(), "Badly formatted number");
209	6.02k	getChar();
210	6.02k	continue;
211	6.02k	}
212	454k	if (!isdigit(c)) {
213	301k	break;
214	301k	}
215	152k	getChar();
216	152k	xf = xf + scale * (c - '0');
217	152k	scale *= 0.1;
218	152k	}
219	301k	while ((c = lookChar()) == '-' \|\| isdigit(c)) {
220	0	getChar();
221	0	}
222	301k	if (neg) {
223	6.17k	xf = -xf;
224	6.17k	}
225	301k	obj->initReal(xf);
226	301k	break;
227
228		// string
229	105k	case '(':
230	105k	p = tokBuf;
231	105k	n = 0;
232	105k	numParen = 1;
233	105k	done = gFalse;
234	105k	s = NULL;
235	437M	do {
236	437M	c2 = EOF;
237	437M	switch (c = getChar()) {
238
239	6.01k	case EOF:
240	6.01k	error(errSyntaxError, getPos(), "Unterminated string");
241	6.01k	done = gTrue;
242	6.01k	break;
243
244	463k	case '(':
245	463k	++numParen;
246	463k	c2 = c;
247	463k	break;
248
249	520k	case ')':
250	520k	if (--numParen == 0) {
251	99.0k	done = gTrue;
252	420k	} else {
253	420k	c2 = c;
254	420k	}
255	520k	break;
256
257	4.54M	case '\r':
258		// The PDF spec says that any literal end-of-line sequence
259		// (LF, CR, CR+LF) is translated to a single LF char.
260	4.54M	c = lookChar();
261	4.54M	if (c == '\n') {
262	121k	getChar();
263	121k	}
264	4.54M	c2 = '\n';
265	4.54M	break;
266
267	235k	case '\\':
268	235k	switch (c = getChar()) {
269	6.28k	case 'n':
270	6.28k	c2 = '\n';
271	6.28k	break;
272	13.8k	case 'r':
273	13.8k	c2 = '\r';
274	13.8k	break;
275	23.1k	case 't':
276	23.1k	c2 = '\t';
277	23.1k	break;
278	533	case 'b':
279	533	c2 = '\b';
280	533	break;
281	1.41k	case 'f':
282	1.41k	c2 = '\f';
283	1.41k	break;
284	5.77k	case '\\':
285	9.05k	case '(':
286	11.4k	case ')':
287	11.4k	c2 = c;
288	11.4k	break;
289	9.00k	case '0': case '1': case '2': case '3':
290	32.5k	case '4': case '5': case '6': case '7':
291	32.5k	c2 = c - '0';
292	32.5k	c = lookChar();
293	32.5k	if (c >= '0' && c <= '7') {
294	6.00k	getChar();
295	6.00k	c2 = (c2 << 3) + (c - '0');
296	6.00k	c = lookChar();
297	6.00k	if (c >= '0' && c <= '7') {
298	985	getChar();
299	985	c2 = (c2 << 3) + (c - '0');
300	985	}
301	6.00k	}
302	32.5k	break;
303	1.63k	case '\r':
304	1.63k	c = lookChar();
305	1.63k	if (c == '\n') {
306	869	getChar();
307	869	}
308	1.63k	break;
309	5.76k	case '\n':
310	5.76k	break;
311	1	case EOF:
312	1	error(errSyntaxError, getPos(), "Unterminated string");
313	1	done = gTrue;
314	1	break;
315	138k	default:
316	138k	c2 = c;
317	138k	break;
318	235k	}
319	235k	break;
320
321	431M	default:
322	431M	c2 = c;
323	431M	break;
324	437M	}
325
326	437M	if (c2 != EOF) {
327	437M	if (n == tokBufSize) {
328	3.37M	if (!s)
329	56.2k	s = new GString(tokBuf, tokBufSize);
330	3.31M	else
331	3.31M	s->append(tokBuf, tokBufSize);
332	3.37M	p = tokBuf;
333	3.37M	n = 0;
334	3.37M	}
335	437M	*p++ = (char)c2;
336	437M	++n;
337	437M	}
338	437M	} while (!done);
339	105k	if (!s)
340	48.8k	s = new GString(tokBuf, n);
341	56.2k	else
342	56.2k	s->append(tokBuf, n);
343	105k	obj->initString(s);
344	105k	break;
345
346		// name
347	4.75M	case '/':
348	4.75M	p = tokBuf;
349	4.75M	n = 0;
350	4.75M	s = NULL;
351	4.75M	invalid = gFalse;
352	16.3M	while ((c = lookChar()) != EOF && !specialChars[c]) {
353	11.5M	getChar();
354	11.5M	if (c == '#') {
355	712k	c2 = lookChar();
356	712k	if (c2 >= '0' && c2 <= '9') {
357	2.10k	c = c2 - '0';
358	709k	} else if (c2 >= 'A' && c2 <= 'F') {
359	46.7k	c = c2 - 'A' + 10;
360	663k	} else if (c2 >= 'a' && c2 <= 'f') {
361	75.0k	c = c2 - 'a' + 10;
362	588k	} else {
363	588k	error(errSyntaxError, getPos(), "Invalid hex escape in name");
364	588k	goto notEscChar;
365	588k	}
366	123k	getChar();
367	123k	c2 = lookChar();
368	123k	if (c2 >= '0' && c2 <= '9') {
369	25.1k	c = (c << 4) + (c2 - '0');
370	98.6k	} else if (c2 >= 'A' && c2 <= 'F') {
371	46.3k	c = (c << 4) + (c2 - 'A' + 10);
372	52.3k	} else if (c2 >= 'a' && c2 <= 'f') {
373	258	c = (c << 4) + (c2 - 'a' + 10);
374	52.0k	} else {
375	52.0k	error(errSyntaxError, getPos(), "Invalid hex escape in name");
376	52.0k	goto notEscChar;
377	52.0k	}
378	71.7k	getChar();
379	71.7k	if (c == 0) {
380	261	invalid = gTrue;
381	261	}
382	71.7k	}
383	11.5M	notEscChar:
384		// the PDF spec claims that names are limited to 127 chars, but
385		// Distiller 8 will produce longer names, and Acrobat 8 will
386		// accept longer names
387	11.5M	++n;
388	11.5M	if (n < tokBufSize) {
389	10.4M	*p++ = (char)c;
390	10.4M	} else if (n == tokBufSize) {
391	562	*p = (char)c;
392	562	s = new GString(tokBuf, n);
393	1.08M	} else {
394	1.08M	s->append((char)c);
395	1.08M	}
396	11.5M	}
397	4.75M	if (invalid) {
398	131	error(errSyntaxError, getPos(), "Null character in name");
399	131	obj->initError();
400	131	if (s) {
401	89	delete s;
402	89	}
403	4.75M	} else if (n < tokBufSize) {
404	4.75M	*p = '\0';
405	4.75M	obj->initName(tokBuf);
406	4.75M	} else {
407	473	obj->initName(s->getCString());
408	473	delete s;
409	473	}
410	4.75M	break;
411
412		// array punctuation
413	4.86M	case '[':
414	5.21M	case ']':
415	5.21M	tokBuf[0] = (char)c;
416	5.21M	tokBuf[1] = '\0';
417	5.21M	obj->initCmd(tokBuf);
418	5.21M	break;
419
420		// hex string or dict punctuation
421	3.30M	case '<':
422	3.30M	c = lookChar();
423
424		// dict punctuation
425	3.30M	if (c == '<') {
426	2.69M	getChar();
427	2.69M	tokBuf[0] = tokBuf[1] = '<';
428	2.69M	tokBuf[2] = '\0';
429	2.69M	obj->initCmd(tokBuf);
430
431		// hex string
432	2.69M	} else {
433	606k	p = tokBuf;
434	606k	m = n = 0;
435	606k	c2 = 0;
436	606k	s = NULL;
437	58.7M	while (1) {
438	58.7M	c = getChar();
439	58.7M	if (c == '>') {
440	606k	break;
441	58.1M	} else if (c == EOF) {
442	688	error(errSyntaxError, getPos(), "Unterminated hex string");
443	688	break;
444	58.1M	} else if (specialChars[c] != 1) {
445	51.6M	c2 = c2 << 4;
446	51.6M	if (c >= '0' && c <= '9')
447	4.22M	c2 += c - '0';
448	47.4M	else if (c >= 'A' && c <= 'F')
449	678k	c2 += c - 'A' + 10;
450	46.7M	else if (c >= 'a' && c <= 'f')
451	6.64M	c2 += c - 'a' + 10;
452	40.1M	else
453	40.1M	error(errSyntaxError, getPos(),
454	40.1M	"Illegal character <{0:02x}> in hex string", c);
455	51.6M	if (++m == 2) {
456	25.7M	if (n == tokBufSize) {
457	48.0k	if (!s)
458	23.1k	s = new GString(tokBuf, tokBufSize);
459	24.9k	else
460	24.9k	s->append(tokBuf, tokBufSize);
461	48.0k	p = tokBuf;
462	48.0k	n = 0;
463	48.0k	}
464	25.7M	*p++ = (char)c2;
465	25.7M	++n;
466	25.7M	c2 = 0;
467	25.7M	m = 0;
468	25.7M	}
469	51.6M	}
470	58.7M	}
471	606k	if (!s)
472	583k	s = new GString(tokBuf, n);
473	23.1k	else
474	23.1k	s->append(tokBuf, n);
475	606k	if (m == 1)
476	224k	s->append((char)(c2 << 4));
477	606k	obj->initString(s);
478	606k	}
479	3.30M	break;
480
481		// dict punctuation
482	2.36M	case '>':
483	2.36M	c = lookChar();
484	2.36M	if (c == '>') {
485	1.42M	getChar();
486	1.42M	tokBuf[0] = tokBuf[1] = '>';
487	1.42M	tokBuf[2] = '\0';
488	1.42M	obj->initCmd(tokBuf);
489	1.42M	} else {
490	935k	error(errSyntaxError, getPos(), "Illegal character '>'");
491	935k	obj->initError();
492	935k	}
493	2.36M	break;
494
495		// error
496	698k	case ')':
497	1.17M	case '{':
498	1.27M	case '}':
499	1.27M	error(errSyntaxError, getPos(), "Illegal character '{0:c}'", c);
500	1.27M	obj->initError();
501	1.27M	break;
502
503		// command
504	12.6M	default:
505	12.6M	p = tokBuf;
506	12.6M	*p++ = (char)c;
507	12.6M	n = 1;
508	120M	while ((c = lookChar()) != EOF && !specialChars[c]) {
509	108M	getChar();
510	108M	if (++n == tokBufSize) {
511	348k	error(errSyntaxError, getPos(), "Command token too long");
512	348k	break;
513	348k	}
514	107M	*p++ = (char)c;
515	107M	}
516	12.6M	*p = '\0';
517	12.6M	if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
518	67.2k	obj->initBool(gTrue);
519	12.5M	} else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
520	492	obj->initBool(gFalse);
521	12.5M	} else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
522	283	obj->initNull();
523	12.5M	} else {
524	12.5M	obj->initCmd(tokBuf);
525	12.5M	}
526	12.6M	break;
527	34.0M	}
528
529	34.0M	return obj;
530	34.0M	}
531
532	26.8k	void Lexer::skipToNextLine() {
533	26.8k	int c;
534
535	94.1k	while (1) {
536	94.1k	c = getChar();
537	94.1k	if (c == EOF \|\| c == '\n') {
538	7.83k	return;
539	7.83k	}
540	86.3k	if (c == '\r') {
541	19.0k	if ((c = lookChar()) == '\n') {
542	15.5k	getChar();
543	15.5k	}
544	19.0k	return;
545	19.0k	}
546	86.3k	}
547	26.8k	}
548
549	14.2k	void Lexer::skipToEOF() {
550	712k	while (getChar() != EOF) ;
551	14.2k	}
552
553	26.7M	GBool Lexer::isSpace(int c) {
554	26.7M	return c >= 0 && c <= 0xff && specialChars[c] == 1;
555	26.7M	}