Coverage Report

Created: 2026-03-31 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xpdf-4.06/xpdf/UTF8.cc
Line
Count
Source
1
//========================================================================
2
//
3
// UTF8.cc
4
//
5
// Copyright 2001-2017 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
#include "UTF8.h"
11
12
37.4k
int mapUTF8(Unicode u, char *buf, int bufSize) {
13
37.4k
  if        (u <= 0x0000007f) {
14
34.3k
    if (bufSize < 1) {
15
0
      return 0;
16
0
    }
17
34.3k
    buf[0] = (char)u;
18
34.3k
    return 1;
19
34.3k
  } else if (u <= 0x000007ff) {
20
948
    if (bufSize < 2) {
21
0
      return 0;
22
0
    }
23
948
    buf[0] = (char)(0xc0 + (u >> 6));
24
948
    buf[1] = (char)(0x80 + (u & 0x3f));
25
948
    return 2;
26
2.18k
  } else if (u <= 0x0000ffff) {
27
1.99k
    if (bufSize < 3) {
28
0
      return 0;
29
0
    }
30
1.99k
    buf[0] = (char)(0xe0 + (u >> 12));
31
1.99k
    buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
32
1.99k
    buf[2] = (char)(0x80 + (u & 0x3f));
33
1.99k
    return 3;
34
1.99k
  } else if (u <= 0x0010ffff) {
35
83
    if (bufSize < 4) {
36
0
      return 0;
37
0
    }
38
83
    buf[0] = (char)(0xf0 + (u >> 18));
39
83
    buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
40
83
    buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
41
83
    buf[3] = (char)(0x80 + (u & 0x3f));
42
83
    return 4;
43
106
  } else {
44
106
    return 0;
45
106
  }
46
37.4k
}
47
48
0
int mapUCS2(Unicode u, char *buf, int bufSize) {
49
0
  if (u <= 0xffff) {
50
0
    if (bufSize < 2) {
51
0
      return 0;
52
0
    }
53
0
    buf[0] = (char)((u >> 8) & 0xff);
54
0
    buf[1] = (char)(u & 0xff);
55
0
    return 2;
56
0
  } else {
57
0
    return 0;
58
0
  }
59
0
}
60
61
3.41M
GBool getUTF8(GString *s, int *i, Unicode *u) {
62
3.41M
  Guchar c0, c1, c2, c3, c4, c5;
63
64
3.41M
  if (*i >= s->getLength()) {
65
962
    return gFalse;
66
962
  }
67
3.41M
  c0 = (Guchar)s->getChar((*i)++);
68
3.41M
  if (c0 < 0x80) {
69
2.61M
    *u = (Unicode)c0;
70
2.61M
  } else if (c0 < 0xe0) {
71
521k
    if (*i < s->getLength() &&
72
521k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
73
190k
      *i += 1;
74
190k
      *u = (Unicode)(((c0 & 0x1f) << 6) |
75
190k
         (c1 & 0x3f));
76
330k
    } else {
77
330k
      *u =  (Unicode)c0;
78
330k
    }
79
521k
  } else if (c0 < 0xf0) {
80
100k
    if (*i < s->getLength() - 1 &&
81
100k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
82
14.5k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
83
5.36k
      *i += 2;
84
5.36k
      *u = (Unicode)(((c0 & 0x0f) << 12) |
85
5.36k
         ((c1 & 0x3f) << 6) |
86
5.36k
         (c2 & 0x3f));
87
95.5k
    } else {
88
95.5k
      *u = (Unicode)c0;
89
95.5k
    }
90
174k
  } else if (c0 < 0xf8) {
91
24.6k
    if (*i < s->getLength() - 2 &&
92
24.5k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
93
8.47k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
94
3.63k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
95
1.67k
      *i += 3;
96
1.67k
      *u = (Unicode)(((c0 & 0x07) << 18) |
97
1.67k
         ((c1 & 0x3f) << 12) |
98
1.67k
         ((c2 & 0x3f) << 6) |
99
1.67k
         (c3 & 0x3f));
100
23.0k
    } else {
101
23.0k
      *u = (Unicode)c0;
102
23.0k
    }
103
149k
  } else if (c0 < 0xfc) {
104
7.85k
    if (*i < s->getLength() - 3 &&
105
7.77k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
106
1.71k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
107
866
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
108
765
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
109
662
      *i += 4;
110
662
      *u = (Unicode)(((c0 & 0x03) << 24) |
111
662
         ((c1 & 0x3f) << 18) |
112
662
         ((c2 & 0x3f) << 12) |
113
662
         ((c3 & 0x3f) << 6) |
114
662
         (c4 & 0x3f));
115
7.19k
    } else {
116
7.19k
      *u = (Unicode)c0;
117
7.19k
    }
118
141k
  } else if (c0 < 0xfe) {
119
23.2k
    if (*i < s->getLength() - 4 &&
120
22.8k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
121
3.60k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
122
2.90k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
123
2.71k
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
124
2.49k
  ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
125
2.11k
      *i += 5;
126
2.11k
      *u = (Unicode)(((c0 & 0x01) << 30) |
127
2.11k
         ((c1 & 0x3f) << 24) |
128
2.11k
         ((c2 & 0x3f) << 18) |
129
2.11k
         ((c3 & 0x3f) << 12) |
130
2.11k
         ((c4 & 0x3f) << 6) |
131
2.11k
         (c5 & 0x3f));
132
21.1k
    } else {
133
21.1k
      *u = (Unicode)c0;
134
21.1k
    }
135
118k
  } else {
136
118k
    *u = (Unicode)c0;
137
118k
  }
138
3.41M
  return gTrue;
139
3.41M
}
140
141
3.06M
GBool getUTF16BE(GString *s, int *i, Unicode *u) {
142
3.06M
  int w0, w1;
143
144
3.06M
  if (*i >= s->getLength() - 1) {
145
14.0k
    return gFalse;
146
14.0k
  }
147
3.05M
  w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
148
3.05M
  *i += 2;
149
3.05M
  if (w0 < 0xd800 || w0 >= 0xe000) {
150
2.96M
    *u = (Unicode)w0;
151
2.96M
  } else {
152
91.0k
    if (*i < s->getLength() - 1) {
153
90.6k
      w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
154
90.6k
      *i += 2;
155
90.6k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
156
90.6k
    } else {
157
423
      *u = (Unicode)w0;
158
423
    }
159
91.0k
  }
160
3.05M
  return gTrue;
161
3.06M
}
162
163
5.95M
GBool getUTF16LE(GString *s, int *i, Unicode *u) {
164
5.95M
  int w0, w1;
165
166
5.95M
  if (*i >= s->getLength() - 1) {
167
1.45k
    return gFalse;
168
1.45k
  }
169
5.94M
  w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
170
5.94M
  *i += 2;
171
5.94M
  if (w0 < 0xd800 || w0 >= 0xe000) {
172
5.94M
    *u = (Unicode)w0;
173
5.94M
  } else {
174
9.13k
    if (*i < s->getLength() - 1) {
175
8.91k
      w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
176
8.91k
      *i += 2;
177
8.91k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
178
8.91k
    } else {
179
221
      *u = (Unicode)w0;
180
221
    }
181
9.13k
  }
182
5.94M
  return gTrue;
183
5.95M
}