/src/libmspub/src/lib/MSPUBMetaData.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* |
3 | | * This file is part of the libmspub project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include "MSPUBMetaData.h" |
11 | | |
12 | | #include <cmath> |
13 | | #include <cstdio> |
14 | | #include <cstring> |
15 | | #include <ctime> |
16 | | #include <string> |
17 | | |
18 | | #include "libmspub_utils.h" |
19 | | |
20 | | libmspub::MSPUBMetaData::MSPUBMetaData() |
21 | 5.88k | : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData() |
22 | 5.88k | { |
23 | 5.88k | } |
24 | | |
25 | | libmspub::MSPUBMetaData::~MSPUBMetaData() |
26 | 5.88k | { |
27 | 5.88k | } |
28 | | |
29 | | enum PIDDSI |
30 | | { |
31 | | PIDDSI_CODEPAGE = 0x00000001, |
32 | | PIDDSI_CATEGORY = 0x00000002, |
33 | | PIDDSI_PRESFORMAT = 0x00000003, |
34 | | PIDDSI_BYTECOUNT = 0x00000004, |
35 | | PIDDSI_LINECOUNT = 0x00000005, |
36 | | PIDDSI_PARACOUNT = 0x00000006, |
37 | | PIDDSI_SLIDECOUNT = 0x00000007, |
38 | | PIDDSI_NOTECOUNT = 0x00000008, |
39 | | PIDDSI_HIDDENCOUNT = 0x00000009, |
40 | | PIDDSI_MMCLIPCOUNT = 0x0000000A, |
41 | | PIDDSI_SCALE = 0x0000000B, |
42 | | PIDDSI_HEADINGPAIR = 0x0000000C, |
43 | | PIDDSI_DOCPARTS = 0x0000000D, |
44 | | PIDDSI_MANAGER = 0x0000000E, |
45 | | PIDDSI_COMPANY = 0x0000000F, |
46 | | PIDDSI_LINKSDIRTY = 0x00000010, |
47 | | PIDDSI_CCHWITHSPACES = 0x00000011, |
48 | | PIDDSI_SHAREDDOC = 0x00000013, |
49 | | PIDDSI_LINKBASE = 0x00000014, |
50 | | PIDDSI_HLINKS = 0x00000015, |
51 | | PIDDSI_HYPERLINKSCHANGED = 0x00000016, |
52 | | PIDDSI_VERSION = 0x00000017, |
53 | | PIDDSI_DIGSIG = 0x00000018, |
54 | | PIDDSI_CONTENTTYPE = 0x0000001A, |
55 | | PIDDSI_CONTENTSTATUS = 0x0000001B, |
56 | | PIDDSI_LANGUAGE = 0x0000001C, |
57 | | PIDDSI_DOCVERSION = 0x0000001D |
58 | | }; |
59 | | |
60 | | enum PIDSI |
61 | | { |
62 | | CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001, |
63 | | PIDSI_TITLE = 0x00000002, |
64 | | PIDSI_SUBJECT = 0x00000003, |
65 | | PIDSI_AUTHOR = 0x00000004, |
66 | | PIDSI_KEYWORDS = 0x00000005, |
67 | | PIDSI_COMMENTS = 0x00000006, |
68 | | PIDSI_TEMPLATE = 0x00000007, |
69 | | PIDSI_LASTAUTHOR = 0x00000008, |
70 | | PIDSI_REVNUMBER = 0x00000009, |
71 | | PIDSI_EDITTIME = 0x0000000A, |
72 | | PIDSI_LASTPRINTED = 0x0000000B, |
73 | | PIDSI_CREATE_DTM = 0x0000000C, |
74 | | PIDSI_LASTSAVE_DTM = 0x0000000D, |
75 | | PIDSI_PAGECOUNT = 0x0000000E, |
76 | | PIDSI_WORDCOUNT = 0x0000000F, |
77 | | PIDSI_CHARCOUNT = 0x00000010, |
78 | | PIDSI_THUMBNAIL = 0x00000011, |
79 | | PIDSI_APPNAME = 0x00000012, |
80 | | PIDSI_DOC_SECURITY = 0x00000013 |
81 | | }; |
82 | | |
83 | | bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input) |
84 | 3.66k | { |
85 | 3.66k | if (!input) |
86 | 0 | return false; |
87 | | |
88 | 3.66k | readPropertySetStream(input); |
89 | | |
90 | 3.66k | return true; |
91 | 3.66k | } |
92 | | |
93 | | void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input) |
94 | 3.66k | { |
95 | | // ByteOrder |
96 | 3.66k | input->seek(2, librevenge::RVNG_SEEK_CUR); |
97 | | // Version |
98 | 3.66k | input->seek(2, librevenge::RVNG_SEEK_CUR); |
99 | | // SystemIdentifier |
100 | 3.66k | input->seek(4, librevenge::RVNG_SEEK_CUR); |
101 | | // CLSID |
102 | 3.66k | input->seek(16, librevenge::RVNG_SEEK_CUR); |
103 | | // NumPropertySets |
104 | 3.66k | input->seek(4, librevenge::RVNG_SEEK_CUR); |
105 | | // FMTID0 |
106 | | //input->seek(16, librevenge::RVNG_SEEK_CUR); |
107 | 3.66k | uint32_t data1 = readU32(input); |
108 | 3.66k | uint16_t data2 = readU16(input); |
109 | 3.66k | uint16_t data3 = readU16(input); |
110 | 3.66k | uint8_t data4[8]; |
111 | 3.66k | for (unsigned char &i : data4) |
112 | 29.2k | { |
113 | 29.2k | i = readU8(input); |
114 | 29.2k | } |
115 | | // Pretty-printed GUID is 36 bytes + the terminating null-character. |
116 | 3.66k | char FMTID0[37]; |
117 | 3.66k | sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3, |
118 | 3.66k | data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]); |
119 | | |
120 | 3.66k | uint32_t offset0 = readU32(input); |
121 | 3.66k | readPropertySet(input, offset0, FMTID0); |
122 | 3.66k | } |
123 | | |
124 | | void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID) |
125 | 3.65k | { |
126 | 3.65k | input->seek(offset, librevenge::RVNG_SEEK_SET); |
127 | | |
128 | | // Size |
129 | 3.65k | input->seek(4, librevenge::RVNG_SEEK_CUR); |
130 | 3.65k | uint32_t numProperties = readU32(input); |
131 | 52.7k | for (uint32_t i = 0; i < numProperties; ++i) |
132 | 49.1k | readPropertyIdentifierAndOffset(input); |
133 | 19.1k | for (uint32_t i = 0; i < numProperties; ++i) |
134 | 15.4k | { |
135 | 15.4k | if (i >= m_idsAndOffsets.size()) |
136 | 0 | break; |
137 | 15.4k | readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID); |
138 | 15.4k | } |
139 | 3.65k | } |
140 | | |
141 | | uint32_t libmspub::MSPUBMetaData::getCodePage() |
142 | 2.68k | { |
143 | 56.3k | for (size_t i = 0; i < m_idsAndOffsets.size(); ++i) |
144 | 56.0k | { |
145 | 56.0k | if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER) |
146 | 2.28k | { |
147 | 2.28k | if (i >= m_typedPropertyValues.size()) |
148 | 282 | break; |
149 | 2.00k | return m_typedPropertyValues[i]; |
150 | 2.28k | } |
151 | 56.0k | } |
152 | | |
153 | 677 | return 0; |
154 | 2.68k | } |
155 | | |
156 | | void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input) |
157 | 49.1k | { |
158 | 49.1k | uint32_t propertyIdentifier = readU32(input); |
159 | 49.1k | uint32_t offset = readU32(input); |
160 | 49.1k | m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset)); |
161 | 49.1k | } |
162 | | |
163 | 15.4k | #define VT_I2 0x0002 |
164 | 10.2k | #define VT_LPSTR 0x001E |
165 | | |
166 | | void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, |
167 | | uint32_t index, |
168 | | uint32_t offset, |
169 | | char *FMTID) |
170 | 15.4k | { |
171 | 15.4k | input->seek(offset, librevenge::RVNG_SEEK_SET); |
172 | 15.4k | uint16_t type = readU16(input); |
173 | | // Padding |
174 | 15.4k | input->seek(2, librevenge::RVNG_SEEK_CUR); |
175 | | |
176 | 15.4k | if (type == VT_I2) |
177 | 5.15k | { |
178 | 5.15k | uint16_t value = readU16(input); |
179 | 5.15k | m_typedPropertyValues[index] = value; |
180 | 5.15k | } |
181 | 10.2k | else if (type == VT_LPSTR) |
182 | 2.86k | { |
183 | 2.86k | librevenge::RVNGString string = readCodePageString(input); |
184 | 2.86k | if (!string.empty()) |
185 | 1.09k | { |
186 | 1.09k | if (index >= m_idsAndOffsets.size()) |
187 | 0 | return; |
188 | | |
189 | 1.09k | if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9")) |
190 | 855 | { |
191 | 855 | switch (m_idsAndOffsets[index].first) |
192 | 855 | { |
193 | 9 | case PIDSI::PIDSI_TITLE: |
194 | 9 | m_metaData.insert("dc:title", string); |
195 | 9 | break; |
196 | 19 | case PIDSI::PIDSI_SUBJECT: |
197 | 19 | m_metaData.insert("dc:subject", string); |
198 | 19 | break; |
199 | 294 | case PIDSI::PIDSI_AUTHOR: |
200 | 294 | m_metaData.insert("meta:initial-creator", string); |
201 | 294 | m_metaData.insert("dc:creator", string); |
202 | 294 | break; |
203 | 10 | case PIDSI::PIDSI_KEYWORDS: |
204 | 10 | m_metaData.insert("meta:keyword", string); |
205 | 10 | break; |
206 | 9 | case PIDSI::PIDSI_COMMENTS: |
207 | 9 | m_metaData.insert("dc:description", string); |
208 | 9 | break; |
209 | 473 | case PIDSI::PIDSI_TEMPLATE: |
210 | 473 | std::string templateHref(string.cstr()); |
211 | 473 | size_t found = templateHref.find_last_of("/\\"); |
212 | 473 | if (found != std::string::npos) |
213 | 456 | string = librevenge::RVNGString(templateHref.substr(found+1).c_str()); |
214 | 473 | m_metaData.insert("librevenge:template", string); |
215 | 473 | break; |
216 | 855 | } |
217 | 855 | } |
218 | 236 | else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae")) |
219 | 31 | { |
220 | 31 | switch (m_idsAndOffsets[index].first) |
221 | 31 | { |
222 | 7 | case PIDDSI_CATEGORY: |
223 | 7 | m_metaData.insert("librevenge:category", string); |
224 | 7 | break; |
225 | 2 | case PIDDSI_LINECOUNT: |
226 | | // this should actually be PIDDSI_COMPANY but this |
227 | | // is what company is mapped to |
228 | 2 | m_metaData.insert("librevenge:company", string); |
229 | 2 | break; |
230 | 3 | case PIDDSI_LANGUAGE: |
231 | 3 | m_metaData.insert("dc:language", string); |
232 | 3 | break; |
233 | 31 | } |
234 | 31 | } |
235 | 1.09k | } |
236 | 2.86k | } |
237 | 15.4k | } |
238 | | |
239 | | librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input) |
240 | 2.86k | { |
241 | 2.86k | uint32_t size = readU32(input); |
242 | | |
243 | 2.86k | if (size == 0) |
244 | 134 | return librevenge::RVNGString(); |
245 | | |
246 | 2.72k | std::vector<unsigned char> characters; |
247 | 10.2M | for (uint32_t i = 0; i < size; ++i) |
248 | 10.2M | characters.push_back(readU8(input)); |
249 | | |
250 | 2.72k | uint32_t codepage = getCodePage(); |
251 | 2.72k | librevenge::RVNGString string; |
252 | | |
253 | 2.72k | if (codepage == 65001) |
254 | 129 | { |
255 | | // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx |
256 | | // says this is UTF-8. |
257 | 129 | characters.push_back(0); |
258 | 129 | string.append(reinterpret_cast<const char *>(characters.data())); |
259 | 129 | } |
260 | 2.60k | else |
261 | 2.60k | { |
262 | 2.60k | switch (codepage) |
263 | 2.60k | { |
264 | 1.75k | case 1252: |
265 | | // http://msdn.microsoft.com/en-us/goglobal/bb964654 |
266 | 1.75k | appendCharacters(string, characters, "windows-1252"); |
267 | 1.75k | break; |
268 | 797 | default: |
269 | 797 | MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage))); |
270 | 2.60k | } |
271 | 2.60k | } |
272 | | |
273 | 2.68k | return string; |
274 | 2.72k | } |
275 | | |
276 | | bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input) |
277 | 5.63k | { |
278 | | // Parse the header |
279 | | // HeaderSignature: 8 bytes |
280 | | // HeaderCLSID: 16 bytes |
281 | | // MinorVersion: 2 bytes |
282 | | // MajorVersion: 2 bytes |
283 | | // ByteOrder: 2 bytes |
284 | 5.63k | input->seek(30, librevenge::RVNG_SEEK_CUR); |
285 | 5.63k | uint16_t sectorShift = readU16(input); |
286 | | // MiniSectorShift: 2 bytes |
287 | | // Reserved: 6 bytes |
288 | | // NumDirectorySectors: 4 bytes |
289 | | // NumFATSectors: 4 bytes |
290 | 5.63k | input->seek(16, librevenge::RVNG_SEEK_CUR); |
291 | 5.63k | uint32_t firstDirSectorLocation = readU32(input); |
292 | | |
293 | | // Seek to the Root Directory Entry |
294 | 5.63k | size_t sectorSize = std::pow(2, sectorShift); |
295 | 5.63k | input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET); |
296 | | // DirectoryEntryName: 64 bytes |
297 | | // DirectoryEntryNameLength: 2 bytes |
298 | | // ObjectType: 1 byte |
299 | | // ColorFlag: 1 byte |
300 | | // LeftSiblingID: 4 bytes |
301 | | // RightSiblingID: 4 bytes |
302 | | // ChildID: 4 bytes |
303 | | // CLSID: 16 bytes |
304 | | // StateBits: 4 bytes |
305 | | // CreationTime: 8 bytes |
306 | 5.63k | input->seek(108, librevenge::RVNG_SEEK_CUR); |
307 | 5.63k | uint64_t modifiedTime = readU64(input); |
308 | | |
309 | | // modifiedTime is number of 100ns since Jan 1 1601 |
310 | 5.63k | const uint64_t epoch = uint64_t(116444736UL) * 100; |
311 | 5.63k | time_t sec = (modifiedTime / 10000000) - epoch; |
312 | 5.63k | const struct tm *time = localtime(&sec); |
313 | 5.63k | if (time) |
314 | 5.55k | { |
315 | 5.55k | static const int MAX_BUFFER = 1024; |
316 | 5.55k | char buffer[MAX_BUFFER]; |
317 | 5.55k | strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time); |
318 | 5.55k | librevenge::RVNGString result; |
319 | 5.55k | result.append(buffer); |
320 | | // Visio UI uses modifiedTime for both purposes. |
321 | 5.55k | m_metaData.insert("meta:creation-date", result); |
322 | 5.55k | m_metaData.insert("dc:date", result); |
323 | 5.55k | return true; |
324 | 5.55k | } |
325 | 77 | return false; |
326 | 5.63k | } |
327 | | |
328 | | const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData() |
329 | 5.55k | { |
330 | 5.55k | return m_metaData; |
331 | 5.55k | } |
332 | | |
333 | | /* vim:set shiftwidth=2 softtabstop=2 expandtab: */ |