Coverage Report

Created: 2025-07-18 07:17

/src/xpdf-4.05/xpdf/UTF8.cc
Line
Count
Source (jump to first uncovered line)
1
//========================================================================
2
//
3
// UTF8.cc
4
//
5
// Copyright 2001-2017 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
#include "UTF8.h"
11
12
7.76k
int mapUTF8(Unicode u, char *buf, int bufSize) {
13
7.76k
  if        (u <= 0x0000007f) {
14
6.43k
    if (bufSize < 1) {
15
0
      return 0;
16
0
    }
17
6.43k
    buf[0] = (char)u;
18
6.43k
    return 1;
19
6.43k
  } else if (u <= 0x000007ff) {
20
923
    if (bufSize < 2) {
21
0
      return 0;
22
0
    }
23
923
    buf[0] = (char)(0xc0 + (u >> 6));
24
923
    buf[1] = (char)(0x80 + (u & 0x3f));
25
923
    return 2;
26
923
  } else if (u <= 0x0000ffff) {
27
355
    if (bufSize < 3) {
28
0
      return 0;
29
0
    }
30
355
    buf[0] = (char)(0xe0 + (u >> 12));
31
355
    buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
32
355
    buf[2] = (char)(0x80 + (u & 0x3f));
33
355
    return 3;
34
355
  } else if (u <= 0x0010ffff) {
35
14
    if (bufSize < 4) {
36
0
      return 0;
37
0
    }
38
14
    buf[0] = (char)(0xf0 + (u >> 18));
39
14
    buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
40
14
    buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
41
14
    buf[3] = (char)(0x80 + (u & 0x3f));
42
14
    return 4;
43
31
  } else {
44
31
    return 0;
45
31
  }
46
7.76k
}
47
48
0
int mapUCS2(Unicode u, char *buf, int bufSize) {
49
0
  if (u <= 0xffff) {
50
0
    if (bufSize < 2) {
51
0
      return 0;
52
0
    }
53
0
    buf[0] = (char)((u >> 8) & 0xff);
54
0
    buf[1] = (char)(u & 0xff);
55
0
    return 2;
56
0
  } else {
57
0
    return 0;
58
0
  }
59
0
}
60
61
0
GBool getUTF8(GString *s, int *i, Unicode *u) {
62
0
  Guchar c0, c1, c2, c3, c4, c5;
63
64
0
  if (*i >= s->getLength()) {
65
0
    return gFalse;
66
0
  }
67
0
  c0 = (Guchar)s->getChar((*i)++);
68
0
  if (c0 < 0x80) {
69
0
    *u = (Unicode)c0;
70
0
  } else if (c0 < 0xe0) {
71
0
    if (*i < s->getLength() &&
72
0
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
73
0
      *i += 1;
74
0
      *u = (Unicode)(((c0 & 0x1f) << 6) |
75
0
         (c1 & 0x3f));
76
0
    } else {
77
0
      *u =  (Unicode)c0;
78
0
    }
79
0
  } else if (c0 < 0xf0) {
80
0
    if (*i < s->getLength() - 1 &&
81
0
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
82
0
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
83
0
      *i += 2;
84
0
      *u = (Unicode)(((c0 & 0x0f) << 12) |
85
0
         ((c1 & 0x3f) << 6) |
86
0
         (c2 & 0x3f));
87
0
    } else {
88
0
      *u = (Unicode)c0;
89
0
    }
90
0
  } else if (c0 < 0xf8) {
91
0
    if (*i < s->getLength() - 2 &&
92
0
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
93
0
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
94
0
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
95
0
      *i += 3;
96
0
      *u = (Unicode)(((c0 & 0x07) << 18) |
97
0
         ((c1 & 0x3f) << 12) |
98
0
         ((c2 & 0x3f) << 6) |
99
0
         (c3 & 0x3f));
100
0
    } else {
101
0
      *u = (Unicode)c0;
102
0
    }
103
0
  } else if (c0 < 0xfc) {
104
0
    if (*i < s->getLength() - 3 &&
105
0
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
106
0
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
107
0
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
108
0
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
109
0
      *i += 4;
110
0
      *u = (Unicode)(((c0 & 0x03) << 24) |
111
0
         ((c1 & 0x3f) << 18) |
112
0
         ((c2 & 0x3f) << 12) |
113
0
         ((c3 & 0x3f) << 6) |
114
0
         (c4 & 0x3f));
115
0
    } else {
116
0
      *u = (Unicode)c0;
117
0
    }
118
0
  } else if (c0 < 0xfe) {
119
0
    if (*i < s->getLength() - 4 &&
120
0
  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
121
0
  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
122
0
  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
123
0
  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
124
0
  ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
125
0
      *i += 5;
126
0
      *u = (Unicode)(((c0 & 0x01) << 30) |
127
0
         ((c1 & 0x3f) << 24) |
128
0
         ((c2 & 0x3f) << 18) |
129
0
         ((c3 & 0x3f) << 12) |
130
0
         ((c4 & 0x3f) << 6) |
131
0
         (c5 & 0x3f));
132
0
    } else {
133
0
      *u = (Unicode)c0;
134
0
    }
135
0
  } else {
136
0
    *u = (Unicode)c0;
137
0
  }
138
0
  return gTrue;
139
0
}
140
141
6.62M
GBool getUTF16BE(GString *s, int *i, Unicode *u) {
142
6.62M
  int w0, w1;
143
144
6.62M
  if (*i >= s->getLength() - 1) {
145
16.5k
    return gFalse;
146
16.5k
  }
147
6.61M
  w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
148
6.61M
  *i += 2;
149
6.61M
  if (w0 < 0xd800 || w0 >= 0xe000) {
150
6.47M
    *u = (Unicode)w0;
151
6.47M
  } else {
152
135k
    if (*i < s->getLength() - 1) {
153
134k
      w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
154
134k
      *i += 2;
155
134k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
156
134k
    } else {
157
906
      *u = (Unicode)w0;
158
906
    }
159
135k
  }
160
6.61M
  return gTrue;
161
6.62M
}
162
163
14.4M
GBool getUTF16LE(GString *s, int *i, Unicode *u) {
164
14.4M
  int w0, w1;
165
166
14.4M
  if (*i >= s->getLength() - 1) {
167
1.86k
    return gFalse;
168
1.86k
  }
169
14.4M
  w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
170
14.4M
  *i += 2;
171
14.4M
  if (w0 < 0xd800 || w0 >= 0xe000) {
172
14.4M
    *u = (Unicode)w0;
173
14.4M
  } else {
174
27.7k
    if (*i < s->getLength() - 1) {
175
27.5k
      w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
176
27.5k
      *i += 2;
177
27.5k
      *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
178
27.5k
    } else {
179
217
      *u = (Unicode)w0;
180
217
    }
181
27.7k
  }
182
14.4M
  return gTrue;
183
14.4M
}