/src/libvisio/src/lib/VSDMetaData.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* |
3 | | * This file is part of the libvisio project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include "VSDMetaData.h" |
11 | | |
12 | | #include <cassert> |
13 | | #include <cmath> |
14 | | #include <cstdio> |
15 | | #include <cstring> |
16 | | #include <string> |
17 | | #include <unicode/ucnv.h> |
18 | | #include <ctime> |
19 | | |
20 | | libvisio::VSDMetaData::VSDMetaData() |
21 | 488 | : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData() |
22 | 488 | { |
23 | 488 | } |
24 | | |
25 | | libvisio::VSDMetaData::~VSDMetaData() |
26 | 488 | { |
27 | 488 | } |
28 | | |
29 | | enum PIDDSI |
30 | | { |
31 | | PIDDSI_CODEPAGE = 0x00000001, |
32 | | PIDDSI_CATEGORY = 0x00000002, |
33 | | PIDDSI_PRESFORMAT = 0x00000003, |
34 | | PIDDSI_BYTECOUNT = 0x00000004, |
35 | | PIDDSI_LINECOUNT = 0x00000005, |
36 | | PIDDSI_PARACOUNT = 0x00000006, |
37 | | PIDDSI_SLIDECOUNT = 0x00000007, |
38 | | PIDDSI_NOTECOUNT = 0x00000008, |
39 | | PIDDSI_HIDDENCOUNT = 0x00000009, |
40 | | PIDDSI_MMCLIPCOUNT = 0x0000000A, |
41 | | PIDDSI_SCALE = 0x0000000B, |
42 | | PIDDSI_HEADINGPAIR = 0x0000000C, |
43 | | PIDDSI_DOCPARTS = 0x0000000D, |
44 | | PIDDSI_MANAGER = 0x0000000E, |
45 | | PIDDSI_COMPANY = 0x0000000F, |
46 | | PIDDSI_LINKSDIRTY = 0x00000010, |
47 | | PIDDSI_CCHWITHSPACES = 0x00000011, |
48 | | PIDDSI_SHAREDDOC = 0x00000013, |
49 | | PIDDSI_LINKBASE = 0x00000014, |
50 | | PIDDSI_HLINKS = 0x00000015, |
51 | | PIDDSI_HYPERLINKSCHANGED = 0x00000016, |
52 | | PIDDSI_VERSION = 0x00000017, |
53 | | PIDDSI_DIGSIG = 0x00000018, |
54 | | PIDDSI_CONTENTTYPE = 0x0000001A, |
55 | | PIDDSI_CONTENTSTATUS = 0x0000001B, |
56 | | PIDDSI_LANGUAGE = 0x0000001C, |
57 | | PIDDSI_DOCVERSION = 0x0000001D |
58 | | }; |
59 | | |
60 | | enum PIDSI |
61 | | { |
62 | | CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001, |
63 | | PIDSI_TITLE = 0x00000002, |
64 | | PIDSI_SUBJECT = 0x00000003, |
65 | | PIDSI_AUTHOR = 0x00000004, |
66 | | PIDSI_KEYWORDS = 0x00000005, |
67 | | PIDSI_COMMENTS = 0x00000006, |
68 | | PIDSI_TEMPLATE = 0x00000007, |
69 | | PIDSI_LASTAUTHOR = 0x00000008, |
70 | | PIDSI_REVNUMBER = 0x00000009, |
71 | | PIDSI_EDITTIME = 0x0000000A, |
72 | | PIDSI_LASTPRINTED = 0x0000000B, |
73 | | PIDSI_CREATE_DTM = 0x0000000C, |
74 | | PIDSI_LASTSAVE_DTM = 0x0000000D, |
75 | | PIDSI_PAGECOUNT = 0x0000000E, |
76 | | PIDSI_WORDCOUNT = 0x0000000F, |
77 | | PIDSI_CHARCOUNT = 0x00000010, |
78 | | PIDSI_THUMBNAIL = 0x00000011, |
79 | | PIDSI_APPNAME = 0x00000012, |
80 | | PIDSI_DOC_SECURITY = 0x00000013 |
81 | | }; |
82 | | |
83 | | bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input) |
84 | 845 | { |
85 | 845 | if (!input) |
86 | 0 | return false; |
87 | | |
88 | 845 | readPropertySetStream(input); |
89 | | |
90 | 845 | return true; |
91 | 845 | } |
92 | | |
93 | | void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *input) |
94 | 845 | { |
95 | | // ByteOrder |
96 | 845 | input->seek(2, librevenge::RVNG_SEEK_CUR); |
97 | | // Version |
98 | 845 | input->seek(2, librevenge::RVNG_SEEK_CUR); |
99 | | // SystemIdentifier |
100 | 845 | input->seek(4, librevenge::RVNG_SEEK_CUR); |
101 | | // CLSID |
102 | 845 | input->seek(16, librevenge::RVNG_SEEK_CUR); |
103 | | // NumPropertySets |
104 | 845 | input->seek(4, librevenge::RVNG_SEEK_CUR); |
105 | | // FMTID0 |
106 | | //input->seek(16, librevenge::RVNG_SEEK_CUR); |
107 | 845 | uint32_t data1 = readU32(input); |
108 | 845 | uint16_t data2 = readU16(input); |
109 | 845 | uint16_t data3 = readU16(input); |
110 | 845 | uint8_t data4[8]; |
111 | 845 | for (unsigned char &i : data4) |
112 | 6.76k | { |
113 | 6.76k | i = readU8(input); |
114 | 6.76k | } |
115 | | // Pretty-printed GUID is 36 bytes + the terminating null-character. |
116 | 845 | char FMTID0[37]; |
117 | 845 | sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3, |
118 | 845 | data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]); |
119 | | |
120 | 845 | uint32_t offset0 = readU32(input); |
121 | 845 | readPropertySet(input, offset0, FMTID0); |
122 | 845 | } |
123 | | |
124 | | void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID) |
125 | 845 | { |
126 | 845 | input->seek(offset, librevenge::RVNG_SEEK_SET); |
127 | | |
128 | | // Size |
129 | 845 | input->seek(4, librevenge::RVNG_SEEK_CUR); |
130 | 845 | uint32_t numProperties = readU32(input); |
131 | | // The exact size of a property is not known beforehand: check upper bound |
132 | 845 | if (numProperties > getRemainingLength(input) / 12) |
133 | 90 | numProperties = getRemainingLength(input) / 12; |
134 | 35.5k | for (uint32_t i = 0; i < numProperties; ++i) |
135 | 34.7k | readPropertyIdentifierAndOffset(input); |
136 | 10.2k | for (uint32_t i = 0; i < numProperties; ++i) |
137 | 9.41k | { |
138 | 9.41k | if (i >= m_idsAndOffsets.size()) |
139 | 0 | break; |
140 | 9.41k | readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID); |
141 | 9.41k | } |
142 | 845 | } |
143 | | |
144 | | uint32_t libvisio::VSDMetaData::getCodePage() |
145 | 3.10k | { |
146 | 20.9k | for (size_t i = 0; i < m_idsAndOffsets.size(); ++i) |
147 | 20.9k | { |
148 | 20.9k | if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER) |
149 | 3.03k | { |
150 | 3.03k | if (i >= m_typedPropertyValues.size()) |
151 | 81 | break; |
152 | 2.95k | return m_typedPropertyValues[i]; |
153 | 3.03k | } |
154 | 20.9k | } |
155 | | |
156 | 145 | return 0; |
157 | 3.10k | } |
158 | | |
159 | | void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input) |
160 | 34.7k | { |
161 | 34.7k | uint32_t propertyIdentifier = readU32(input); |
162 | 34.7k | uint32_t offset = readU32(input); |
163 | 34.7k | m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset)); |
164 | 34.7k | } |
165 | | |
166 | 9.41k | #define VT_I2 0x0002 |
167 | 8.19k | #define VT_LPSTR 0x001E |
168 | | |
169 | | void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, |
170 | | uint32_t index, |
171 | | uint32_t offset, |
172 | | char *FMTID) |
173 | 9.41k | { |
174 | 9.41k | input->seek(offset, librevenge::RVNG_SEEK_SET); |
175 | 9.41k | uint16_t type = readU16(input); |
176 | | // Padding |
177 | 9.41k | input->seek(2, librevenge::RVNG_SEEK_CUR); |
178 | | |
179 | 9.41k | if (type == VT_I2) |
180 | 1.22k | { |
181 | 1.22k | uint16_t value = readU16(input); |
182 | 1.22k | m_typedPropertyValues[index] = value; |
183 | 1.22k | } |
184 | 8.19k | else if (type == VT_LPSTR) |
185 | 3.12k | { |
186 | 3.12k | librevenge::RVNGString string = readCodePageString(input); |
187 | 3.12k | if (!string.empty()) |
188 | 1.46k | { |
189 | 1.46k | if (index >= m_idsAndOffsets.size()) |
190 | 0 | return; |
191 | | |
192 | 1.46k | if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9")) |
193 | 1.23k | { |
194 | 1.23k | switch (m_idsAndOffsets[index].first) |
195 | 1.23k | { |
196 | 144 | case PIDSI_TITLE: |
197 | 144 | m_metaData.insert("dc:title", string); |
198 | 144 | break; |
199 | 56 | case PIDSI_SUBJECT: |
200 | 56 | m_metaData.insert("dc:subject", string); |
201 | 56 | break; |
202 | 301 | case PIDSI_AUTHOR: |
203 | 301 | m_metaData.insert("meta:initial-creator", string); |
204 | 301 | m_metaData.insert("dc:creator", string); |
205 | 301 | break; |
206 | 9 | case PIDSI_KEYWORDS: |
207 | 9 | m_metaData.insert("meta:keyword", string); |
208 | 9 | break; |
209 | 9 | case PIDSI_COMMENTS: |
210 | 9 | m_metaData.insert("dc:description", string); |
211 | 9 | break; |
212 | 194 | case PIDSI_TEMPLATE: |
213 | 194 | std::string templateHref(string.cstr()); |
214 | 194 | size_t found = templateHref.find_last_of("/\\"); |
215 | 194 | if (found != std::string::npos) |
216 | 187 | string = librevenge::RVNGString(templateHref.substr(found+1).c_str()); |
217 | 194 | m_metaData.insert("librevenge:template", string); |
218 | 194 | break; |
219 | 1.23k | } |
220 | 1.23k | } |
221 | 228 | else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae")) |
222 | 169 | { |
223 | 169 | switch (m_idsAndOffsets[index].first) |
224 | 169 | { |
225 | 9 | case PIDDSI_CATEGORY: |
226 | 9 | m_metaData.insert("librevenge:category", string); |
227 | 9 | break; |
228 | 4 | case PIDDSI_LINECOUNT: |
229 | | // this should actually be PIDDSI_COMPANY but this |
230 | | // is what company is mapped to |
231 | 4 | m_metaData.insert("librevenge:company", string); |
232 | 4 | break; |
233 | 3 | case PIDDSI_LANGUAGE: |
234 | 3 | m_metaData.insert("dc:language", string); |
235 | 3 | break; |
236 | 169 | } |
237 | 169 | } |
238 | 1.46k | } |
239 | 3.12k | } |
240 | 9.41k | } |
241 | | |
242 | | librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input) |
243 | 3.12k | { |
244 | 3.12k | uint32_t size = readU32(input); |
245 | 3.12k | if (size > getRemainingLength(input)) |
246 | 118 | size = getRemainingLength(input); |
247 | | |
248 | 3.12k | if (size == 0) |
249 | 20 | return librevenge::RVNGString(); |
250 | | |
251 | 3.10k | std::vector<unsigned char> characters; |
252 | 392k | for (uint32_t i = 0; i < size; ++i) |
253 | 389k | characters.push_back(readU8(input)); |
254 | | |
255 | 3.10k | uint32_t codepage = getCodePage(); |
256 | 3.10k | librevenge::RVNGString string; |
257 | | |
258 | 3.10k | if (codepage == 65001) |
259 | 79 | { |
260 | | // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx |
261 | | // says this is UTF-8. |
262 | 79 | characters.push_back(0); |
263 | 79 | string.append(reinterpret_cast<const char *>(characters.data())); |
264 | 79 | } |
265 | 3.02k | else |
266 | 3.02k | { |
267 | 3.02k | UErrorCode status = U_ZERO_ERROR; |
268 | 3.02k | UConverter *conv = nullptr; |
269 | | |
270 | 3.02k | switch (codepage) |
271 | 3.02k | { |
272 | 2.71k | case 1252: |
273 | | // http://msdn.microsoft.com/en-us/goglobal/bb964654 |
274 | 2.71k | conv = ucnv_open("windows-1252", &status); |
275 | 2.71k | break; |
276 | 3.02k | } |
277 | | |
278 | 3.02k | if (U_SUCCESS(status) && conv) |
279 | 2.71k | { |
280 | 2.71k | assert(!characters.empty()); |
281 | 2.71k | const auto *src = (const char *)characters.data(); |
282 | 2.71k | const char *srcLimit = (const char *)src + characters.size(); |
283 | 356k | while (src < srcLimit) |
284 | 353k | { |
285 | 353k | UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status); |
286 | 353k | if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character)) |
287 | 353k | appendUCS4(string, ucs4Character); |
288 | 353k | } |
289 | 2.71k | } |
290 | | |
291 | 3.02k | if (conv) |
292 | 2.71k | ucnv_close(conv); |
293 | 3.02k | } |
294 | | |
295 | 3.10k | return string; |
296 | 3.10k | } |
297 | | |
298 | | bool libvisio::VSDMetaData::parseTimes(librevenge::RVNGInputStream *input) |
299 | 384 | { |
300 | | // Parse the header |
301 | | // HeaderSignature: 8 bytes |
302 | | // HeaderCLSID: 16 bytes |
303 | | // MinorVersion: 2 bytes |
304 | | // MajorVersion: 2 bytes |
305 | | // ByteOrder: 2 bytes |
306 | 384 | input->seek(30, librevenge::RVNG_SEEK_CUR); |
307 | 384 | uint16_t sectorShift = readU16(input); |
308 | | // MiniSectorShift: 2 bytes |
309 | | // Reserved: 6 bytes |
310 | | // NumDirectorySectors: 4 bytes |
311 | | // NumFATSectors: 4 bytes |
312 | 384 | input->seek(16, librevenge::RVNG_SEEK_CUR); |
313 | 384 | uint32_t firstDirSectorLocation = readU32(input); |
314 | | |
315 | | // Seek to the Root Directory Entry |
316 | 384 | size_t sectorSize = std::pow(2, sectorShift); |
317 | 384 | input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET); |
318 | | // DirectoryEntryName: 64 bytes |
319 | | // DirectoryEntryNameLength: 2 bytes |
320 | | // ObjectType: 1 byte |
321 | | // ColorFlag: 1 byte |
322 | | // LeftSiblingID: 4 bytes |
323 | | // RightSiblingID: 4 bytes |
324 | | // ChildID: 4 bytes |
325 | | // CLSID: 16 bytes |
326 | | // StateBits: 4 bytes |
327 | | // CreationTime: 8 bytes |
328 | 384 | input->seek(108, librevenge::RVNG_SEEK_CUR); |
329 | 384 | uint64_t modifiedTime = readU64(input); |
330 | | |
331 | | // modifiedTime is number of 100ns since Jan 1 1601 |
332 | 384 | const uint64_t epoch = uint64_t(116444736UL) * 100; |
333 | 384 | time_t sec = (modifiedTime / 10000000) - epoch; |
334 | 384 | const struct tm *time = localtime(&sec); |
335 | 384 | if (time) |
336 | 384 | { |
337 | 384 | static const int MAX_BUFFER = 1024; |
338 | 384 | char buffer[MAX_BUFFER]; |
339 | 384 | strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time); |
340 | 384 | librevenge::RVNGString result; |
341 | 384 | result.append(buffer); |
342 | | // Visio UI uses modifiedTime for both purposes. |
343 | 384 | m_metaData.insert("meta:creation-date", result); |
344 | 384 | m_metaData.insert("dc:date", result); |
345 | 384 | return true; |
346 | 384 | } |
347 | 0 | return false; |
348 | 384 | } |
349 | | |
350 | | const librevenge::RVNGPropertyList &libvisio::VSDMetaData::getMetaData() |
351 | 384 | { |
352 | 384 | return m_metaData; |
353 | 384 | } |
354 | | |
355 | | /* vim:set shiftwidth=2 softtabstop=2 expandtab: */ |