Coverage Report

Created: 2026-04-09 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xpdf-4.06/xpdf/UTF8.cc
Line
Count
Source
1
//========================================================================
2
//
3
// UTF8.cc
4
//
5
// Copyright 2001-2017 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
#include "UTF8.h"
11
12
153k
int mapUTF8(Unicode u, char *buf, int bufSize) {
13
153k
  if        (u <= 0x0000007f) {
14
131k
    if (bufSize < 1) {
15
0
      return 0;
16
0
    }
17
131k
    buf[0] = (char)u;
18
131k
    return 1;
19
131k
  } else if (u <= 0x000007ff) {
20
16.1k
    if (bufSize < 2) {
21
0
      return 0;
22
0
    }
23
16.1k
    buf[0] = (char)(0xc0 + (u >> 6));
24
16.1k
    buf[1] = (char)(0x80 + (u & 0x3f));
25
16.1k
    return 2;
26
16.1k
  } else if (u <= 0x0000ffff) {
27
5.14k
    if (bufSize < 3) {
28
0
      return 0;
29
0
    }
30
5.14k
    buf[0] = (char)(0xe0 + (u >> 12));
31
5.14k
    buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
32
5.14k
    buf[2] = (char)(0x80 + (u & 0x3f));
33
5.14k
    return 3;
34
5.14k
  } else if (u <= 0x0010ffff) {
35
102
    if (bufSize < 4) {
36
0
      return 0;
37
0
    }
38
102
    buf[0] = (char)(0xf0 + (u >> 18));
39
102
    buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
40
102
    buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
41
102
    buf[3] = (char)(0x80 + (u & 0x3f));
42
102
    return 4;
43
115
  } else {
44
115
    return 0;
45
115
  }
46
153k
}
47
48
0
int mapUCS2(Unicode u, char *buf, int bufSize) {
49
0
  if (u <= 0xffff) {
50
0
    if (bufSize < 2) {
51
0
      return 0;
52
0
    }
53
0
    buf[0] = (char)((u >> 8) & 0xff);
54
0
    buf[1] = (char)(u & 0xff);
55
0
    return 2;
56
0
  } else {
57
0
    return 0;
58
0
  }
59
0
}
60
61
4.47M
GBool getUTF8(GString *s, int *i, Unicode *u) {
62
4.47M
  Guchar c0, c1, c2, c3, c4, c5;
63
64
4.47M
  if (*i >= s->getLength()) {
65
1.73k
    return gFalse;
66
1.73k
  }
67
4.47M
  c0 = (Guchar)s->getChar((*i)++);
68
4.47M
  if (c0 < 0x80) {
69
3.51M
    *u = (Unicode)c0;
70
3.51M
  } else if (c0 < 0xe0) {
71
586k
    if (*i < s->getLength() &&
72
586k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
73
197k
      *i += 1;
74
197k
      *u = (Unicode)(((c0 & 0x1f) << 6) |
75
197k
         (c1 & 0x3f));
76
389k
    } else {
77
389k
      *u =  (Unicode)c0;
78
389k
    }
79
586k
  } else if (c0 < 0xf0) {
80
124k
    if (*i < s->getLength() - 1 &&
81
123k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
82
16.1k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
83
5.54k
      *i += 2;
84
5.54k
      *u = (Unicode)(((c0 & 0x0f) << 12) |
85
5.54k
         ((c1 & 0x3f) << 6) |
86
5.54k
         (c2 & 0x3f));
87
118k
    } else {
88
118k
      *u = (Unicode)c0;
89
118k
    }
90
247k
  } else if (c0 < 0xf8) {
91
30.6k
    if (*i < s->getLength() - 2 &&
92
30.5k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
93
11.8k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
94
5.13k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
95
2.03k
      *i += 3;
96
2.03k
      *u = (Unicode)(((c0 & 0x07) << 18) |
97
2.03k
         ((c1 & 0x3f) << 12) |
98
2.03k
         ((c2 & 0x3f) << 6) |
99
2.03k
         (c3 & 0x3f));
100
28.5k
    } else {
101
28.5k
      *u = (Unicode)c0;
102
28.5k
    }
103
217k
  } else if (c0 < 0xfc) {
104
8.56k
    if (*i < s->getLength() - 3 &&
105
8.48k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
106
1.50k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
107
859
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
108
720
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
109
614
      *i += 4;
110
614
      *u = (Unicode)(((c0 & 0x03) << 24) |
111
614
         ((c1 & 0x3f) << 18) |
112
614
         ((c2 & 0x3f) << 12) |
113
614
         ((c3 & 0x3f) << 6) |
114
614
         (c4 & 0x3f));
115
7.95k
    } else {
116
7.95k
      *u = (Unicode)c0;
117
7.95k
    }
118
208k
  } else if (c0 < 0xfe) {
119
33.2k
    if (*i < s->getLength() - 4 &&
120
32.6k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
121
3.60k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
122
2.75k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
123
2.60k
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
124
2.24k
  ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
125
1.73k
      *i += 5;
126
1.73k
      *u = (Unicode)(((c0 & 0x01) << 30) |
127
1.73k
         ((c1 & 0x3f) << 24) |
128
1.73k
         ((c2 & 0x3f) << 18) |
129
1.73k
         ((c3 & 0x3f) << 12) |
130
1.73k
         ((c4 & 0x3f) << 6) |
131
1.73k
         (c5 & 0x3f));
132
31.5k
    } else {
133
31.5k
      *u = (Unicode)c0;
134
31.5k
    }
135
175k
  } else {
136
175k
    *u = (Unicode)c0;
137
175k
  }
138
4.47M
  return gTrue;
139
4.47M
}
140
141
3.78M
GBool getUTF16BE(GString *s, int *i, Unicode *u) {
142
3.78M
  int w0, w1;
143
144
3.78M
  if (*i >= s->getLength() - 1) {
145
20.9k
    return gFalse;
146
20.9k
  }
147
3.76M
  w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
148
3.76M
  *i += 2;
149
3.76M
  if (w0 < 0xd800 || w0 >= 0xe000) {
150
3.67M
    *u = (Unicode)w0;
151
3.67M
  } else {
152
92.3k
    if (*i < s->getLength() - 1) {
153
91.8k
      w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
154
91.8k
      *i += 2;
155
91.8k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
156
91.8k
    } else {
157
439
      *u = (Unicode)w0;
158
439
    }
159
92.3k
  }
160
3.76M
  return gTrue;
161
3.78M
}
162
163
8.78M
GBool getUTF16LE(GString *s, int *i, Unicode *u) {
164
8.78M
  int w0, w1;
165
166
8.78M
  if (*i >= s->getLength() - 1) {
167
4.04k
    return gFalse;
168
4.04k
  }
169
8.77M
  w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
170
8.77M
  *i += 2;
171
8.77M
  if (w0 < 0xd800 || w0 >= 0xe000) {
172
8.76M
    *u = (Unicode)w0;
173
8.76M
  } else {
174
14.2k
    if (*i < s->getLength() - 1) {
175
13.9k
      w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
176
13.9k
      *i += 2;
177
13.9k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
178
13.9k
    } else {
179
270
      *u = (Unicode)w0;
180
270
    }
181
14.2k
  }
182
8.77M
  return gTrue;
183
8.78M
}