Coverage Report

Created: 2026-04-12 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xpdf-4.06/xpdf/UTF8.cc
Line
Count
Source
1
//========================================================================
2
//
3
// UTF8.cc
4
//
5
// Copyright 2001-2017 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
#include "UTF8.h"
11
12
41.7k
int mapUTF8(Unicode u, char *buf, int bufSize) {
13
41.7k
  if        (u <= 0x0000007f) {
14
38.6k
    if (bufSize < 1) {
15
0
      return 0;
16
0
    }
17
38.6k
    buf[0] = (char)u;
18
38.6k
    return 1;
19
38.6k
  } else if (u <= 0x000007ff) {
20
962
    if (bufSize < 2) {
21
0
      return 0;
22
0
    }
23
962
    buf[0] = (char)(0xc0 + (u >> 6));
24
962
    buf[1] = (char)(0x80 + (u & 0x3f));
25
962
    return 2;
26
2.10k
  } else if (u <= 0x0000ffff) {
27
1.93k
    if (bufSize < 3) {
28
0
      return 0;
29
0
    }
30
1.93k
    buf[0] = (char)(0xe0 + (u >> 12));
31
1.93k
    buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
32
1.93k
    buf[2] = (char)(0x80 + (u & 0x3f));
33
1.93k
    return 3;
34
1.93k
  } else if (u <= 0x0010ffff) {
35
79
    if (bufSize < 4) {
36
0
      return 0;
37
0
    }
38
79
    buf[0] = (char)(0xf0 + (u >> 18));
39
79
    buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
40
79
    buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
41
79
    buf[3] = (char)(0x80 + (u & 0x3f));
42
79
    return 4;
43
95
  } else {
44
95
    return 0;
45
95
  }
46
41.7k
}
47
48
0
int mapUCS2(Unicode u, char *buf, int bufSize) {
49
0
  if (u <= 0xffff) {
50
0
    if (bufSize < 2) {
51
0
      return 0;
52
0
    }
53
0
    buf[0] = (char)((u >> 8) & 0xff);
54
0
    buf[1] = (char)(u & 0xff);
55
0
    return 2;
56
0
  } else {
57
0
    return 0;
58
0
  }
59
0
}
60
61
4.35M
GBool getUTF8(GString *s, int *i, Unicode *u) {
62
4.35M
  Guchar c0, c1, c2, c3, c4, c5;
63
64
4.35M
  if (*i >= s->getLength()) {
65
817
    return gFalse;
66
817
  }
67
4.35M
  c0 = (Guchar)s->getChar((*i)++);
68
4.35M
  if (c0 < 0x80) {
69
3.42M
    *u = (Unicode)c0;
70
3.42M
  } else if (c0 < 0xe0) {
71
574k
    if (*i < s->getLength() &&
72
574k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
73
191k
      *i += 1;
74
191k
      *u = (Unicode)(((c0 & 0x1f) << 6) |
75
191k
         (c1 & 0x3f));
76
382k
    } else {
77
382k
      *u =  (Unicode)c0;
78
382k
    }
79
574k
  } else if (c0 < 0xf0) {
80
120k
    if (*i < s->getLength() - 1 &&
81
119k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
82
15.8k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
83
5.39k
      *i += 2;
84
5.39k
      *u = (Unicode)(((c0 & 0x0f) << 12) |
85
5.39k
         ((c1 & 0x3f) << 6) |
86
5.39k
         (c2 & 0x3f));
87
114k
    } else {
88
114k
      *u = (Unicode)c0;
89
114k
    }
90
242k
  } else if (c0 < 0xf8) {
91
29.7k
    if (*i < s->getLength() - 2 &&
92
29.7k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
93
11.5k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
94
5.03k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
95
1.97k
      *i += 3;
96
1.97k
      *u = (Unicode)(((c0 & 0x07) << 18) |
97
1.97k
         ((c1 & 0x3f) << 12) |
98
1.97k
         ((c2 & 0x3f) << 6) |
99
1.97k
         (c3 & 0x3f));
100
27.8k
    } else {
101
27.8k
      *u = (Unicode)c0;
102
27.8k
    }
103
213k
  } else if (c0 < 0xfc) {
104
7.70k
    if (*i < s->getLength() - 3 &&
105
7.70k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
106
1.28k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
107
695
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
108
575
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
109
575
      *i += 4;
110
575
      *u = (Unicode)(((c0 & 0x03) << 24) |
111
575
         ((c1 & 0x3f) << 18) |
112
575
         ((c2 & 0x3f) << 12) |
113
575
         ((c3 & 0x3f) << 6) |
114
575
         (c4 & 0x3f));
115
7.12k
    } else {
116
7.12k
      *u = (Unicode)c0;
117
7.12k
    }
118
205k
  } else if (c0 < 0xfe) {
119
32.8k
    if (*i < s->getLength() - 4 &&
120
32.2k
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
121
3.52k
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
122
2.69k
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
123
2.54k
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
124
2.19k
  ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
125
1.71k
      *i += 5;
126
1.71k
      *u = (Unicode)(((c0 & 0x01) << 30) |
127
1.71k
         ((c1 & 0x3f) << 24) |
128
1.71k
         ((c2 & 0x3f) << 18) |
129
1.71k
         ((c3 & 0x3f) << 12) |
130
1.71k
         ((c4 & 0x3f) << 6) |
131
1.71k
         (c5 & 0x3f));
132
31.0k
    } else {
133
31.0k
      *u = (Unicode)c0;
134
31.0k
    }
135
172k
  } else {
136
172k
    *u = (Unicode)c0;
137
172k
  }
138
4.35M
  return gTrue;
139
4.35M
}
140
141
2.98M
GBool getUTF16BE(GString *s, int *i, Unicode *u) {
142
2.98M
  int w0, w1;
143
144
2.98M
  if (*i >= s->getLength() - 1) {
145
14.2k
    return gFalse;
146
14.2k
  }
147
2.97M
  w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
148
2.97M
  *i += 2;
149
2.97M
  if (w0 < 0xd800 || w0 >= 0xe000) {
150
2.88M
    *u = (Unicode)w0;
151
2.88M
  } else {
152
83.7k
    if (*i < s->getLength() - 1) {
153
83.4k
      w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
154
83.4k
      *i += 2;
155
83.4k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
156
83.4k
    } else {
157
383
      *u = (Unicode)w0;
158
383
    }
159
83.7k
  }
160
2.97M
  return gTrue;
161
2.98M
}
162
163
8.62M
GBool getUTF16LE(GString *s, int *i, Unicode *u) {
164
8.62M
  int w0, w1;
165
166
8.62M
  if (*i >= s->getLength() - 1) {
167
1.61k
    return gFalse;
168
1.61k
  }
169
8.62M
  w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
170
8.62M
  *i += 2;
171
8.62M
  if (w0 < 0xd800 || w0 >= 0xe000) {
172
8.60M
    *u = (Unicode)w0;
173
8.60M
  } else {
174
12.3k
    if (*i < s->getLength() - 1) {
175
12.1k
      w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
176
12.1k
      *i += 2;
177
12.1k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
178
12.1k
    } else {
179
190
      *u = (Unicode)w0;
180
190
    }
181
12.3k
  }
182
8.62M
  return gTrue;
183
8.62M
}