Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmspub/src/lib/MSPUBMetaData.cpp
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/*
3
 * This file is part of the libmspub project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#include "MSPUBMetaData.h"
11
12
#include <cmath>
13
#include <cstdio>
14
#include <cstring>
15
#include <ctime>
16
#include <string>
17
18
#include "libmspub_utils.h"
19
20
libmspub::MSPUBMetaData::MSPUBMetaData()
21
5.88k
  : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData()
22
5.88k
{
23
5.88k
}
24
25
libmspub::MSPUBMetaData::~MSPUBMetaData()
26
5.88k
{
27
5.88k
}
28
29
enum PIDDSI
30
{
31
  PIDDSI_CODEPAGE          = 0x00000001,
32
  PIDDSI_CATEGORY          = 0x00000002,
33
  PIDDSI_PRESFORMAT        = 0x00000003,
34
  PIDDSI_BYTECOUNT         = 0x00000004,
35
  PIDDSI_LINECOUNT         = 0x00000005,
36
  PIDDSI_PARACOUNT         = 0x00000006,
37
  PIDDSI_SLIDECOUNT        = 0x00000007,
38
  PIDDSI_NOTECOUNT         = 0x00000008,
39
  PIDDSI_HIDDENCOUNT       = 0x00000009,
40
  PIDDSI_MMCLIPCOUNT       = 0x0000000A,
41
  PIDDSI_SCALE             = 0x0000000B,
42
  PIDDSI_HEADINGPAIR       = 0x0000000C,
43
  PIDDSI_DOCPARTS          = 0x0000000D,
44
  PIDDSI_MANAGER           = 0x0000000E,
45
  PIDDSI_COMPANY           = 0x0000000F,
46
  PIDDSI_LINKSDIRTY        = 0x00000010,
47
  PIDDSI_CCHWITHSPACES     = 0x00000011,
48
  PIDDSI_SHAREDDOC         = 0x00000013,
49
  PIDDSI_LINKBASE          = 0x00000014,
50
  PIDDSI_HLINKS            = 0x00000015,
51
  PIDDSI_HYPERLINKSCHANGED = 0x00000016,
52
  PIDDSI_VERSION           = 0x00000017,
53
  PIDDSI_DIGSIG            = 0x00000018,
54
  PIDDSI_CONTENTTYPE       = 0x0000001A,
55
  PIDDSI_CONTENTSTATUS     = 0x0000001B,
56
  PIDDSI_LANGUAGE          = 0x0000001C,
57
  PIDDSI_DOCVERSION        = 0x0000001D
58
};
59
60
enum PIDSI
61
{
62
  CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
63
  PIDSI_TITLE                  = 0x00000002,
64
  PIDSI_SUBJECT                = 0x00000003,
65
  PIDSI_AUTHOR                 = 0x00000004,
66
  PIDSI_KEYWORDS               = 0x00000005,
67
  PIDSI_COMMENTS               = 0x00000006,
68
  PIDSI_TEMPLATE               = 0x00000007,
69
  PIDSI_LASTAUTHOR             = 0x00000008,
70
  PIDSI_REVNUMBER              = 0x00000009,
71
  PIDSI_EDITTIME               = 0x0000000A,
72
  PIDSI_LASTPRINTED            = 0x0000000B,
73
  PIDSI_CREATE_DTM             = 0x0000000C,
74
  PIDSI_LASTSAVE_DTM           = 0x0000000D,
75
  PIDSI_PAGECOUNT              = 0x0000000E,
76
  PIDSI_WORDCOUNT              = 0x0000000F,
77
  PIDSI_CHARCOUNT              = 0x00000010,
78
  PIDSI_THUMBNAIL              = 0x00000011,
79
  PIDSI_APPNAME                = 0x00000012,
80
  PIDSI_DOC_SECURITY           = 0x00000013
81
};
82
83
bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
84
3.66k
{
85
3.66k
  if (!input)
86
0
    return false;
87
88
3.66k
  readPropertySetStream(input);
89
90
3.66k
  return true;
91
3.66k
}
92
93
void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
94
3.66k
{
95
  // ByteOrder
96
3.66k
  input->seek(2, librevenge::RVNG_SEEK_CUR);
97
  // Version
98
3.66k
  input->seek(2, librevenge::RVNG_SEEK_CUR);
99
  // SystemIdentifier
100
3.66k
  input->seek(4, librevenge::RVNG_SEEK_CUR);
101
  // CLSID
102
3.66k
  input->seek(16, librevenge::RVNG_SEEK_CUR);
103
  // NumPropertySets
104
3.66k
  input->seek(4, librevenge::RVNG_SEEK_CUR);
105
  // FMTID0
106
  //input->seek(16, librevenge::RVNG_SEEK_CUR);
107
3.66k
  uint32_t data1 = readU32(input);
108
3.66k
  uint16_t data2 = readU16(input);
109
3.66k
  uint16_t data3 = readU16(input);
110
3.66k
  uint8_t data4[8];
111
3.66k
  for (unsigned char &i : data4)
112
29.2k
  {
113
29.2k
    i = readU8(input);
114
29.2k
  }
115
  // Pretty-printed GUID is 36 bytes + the terminating null-character.
116
3.66k
  char FMTID0[37];
117
3.66k
  sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
118
3.66k
          data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
119
120
3.66k
  uint32_t offset0 = readU32(input);
121
3.66k
  readPropertySet(input, offset0, FMTID0);
122
3.66k
}
123
124
void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
125
3.65k
{
126
3.65k
  input->seek(offset, librevenge::RVNG_SEEK_SET);
127
128
  // Size
129
3.65k
  input->seek(4, librevenge::RVNG_SEEK_CUR);
130
3.65k
  uint32_t numProperties = readU32(input);
131
52.7k
  for (uint32_t i = 0; i < numProperties; ++i)
132
49.1k
    readPropertyIdentifierAndOffset(input);
133
19.1k
  for (uint32_t i = 0; i < numProperties; ++i)
134
15.4k
  {
135
15.4k
    if (i >= m_idsAndOffsets.size())
136
0
      break;
137
15.4k
    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
138
15.4k
  }
139
3.65k
}
140
141
uint32_t libmspub::MSPUBMetaData::getCodePage()
142
2.68k
{
143
56.3k
  for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
144
56.0k
  {
145
56.0k
    if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
146
2.28k
    {
147
2.28k
      if (i >= m_typedPropertyValues.size())
148
282
        break;
149
2.00k
      return m_typedPropertyValues[i];
150
2.28k
    }
151
56.0k
  }
152
153
677
  return 0;
154
2.68k
}
155
156
void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
157
49.1k
{
158
49.1k
  uint32_t propertyIdentifier = readU32(input);
159
49.1k
  uint32_t offset = readU32(input);
160
49.1k
  m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
161
49.1k
}
162
163
15.4k
#define VT_I2 0x0002
164
10.2k
#define VT_LPSTR 0x001E
165
166
void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
167
                                                     uint32_t index,
168
                                                     uint32_t offset,
169
                                                     char *FMTID)
170
15.4k
{
171
15.4k
  input->seek(offset, librevenge::RVNG_SEEK_SET);
172
15.4k
  uint16_t type = readU16(input);
173
  // Padding
174
15.4k
  input->seek(2, librevenge::RVNG_SEEK_CUR);
175
176
15.4k
  if (type == VT_I2)
177
5.15k
  {
178
5.15k
    uint16_t value = readU16(input);
179
5.15k
    m_typedPropertyValues[index] = value;
180
5.15k
  }
181
10.2k
  else if (type == VT_LPSTR)
182
2.86k
  {
183
2.86k
    librevenge::RVNGString string = readCodePageString(input);
184
2.86k
    if (!string.empty())
185
1.09k
    {
186
1.09k
      if (index >= m_idsAndOffsets.size())
187
0
        return;
188
189
1.09k
      if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
190
855
      {
191
855
        switch (m_idsAndOffsets[index].first)
192
855
        {
193
9
        case PIDSI::PIDSI_TITLE:
194
9
          m_metaData.insert("dc:title", string);
195
9
          break;
196
19
        case PIDSI::PIDSI_SUBJECT:
197
19
          m_metaData.insert("dc:subject", string);
198
19
          break;
199
294
        case PIDSI::PIDSI_AUTHOR:
200
294
          m_metaData.insert("meta:initial-creator", string);
201
294
          m_metaData.insert("dc:creator", string);
202
294
          break;
203
10
        case PIDSI::PIDSI_KEYWORDS:
204
10
          m_metaData.insert("meta:keyword", string);
205
10
          break;
206
9
        case PIDSI::PIDSI_COMMENTS:
207
9
          m_metaData.insert("dc:description", string);
208
9
          break;
209
473
        case PIDSI::PIDSI_TEMPLATE:
210
473
          std::string templateHref(string.cstr());
211
473
          size_t found = templateHref.find_last_of("/\\");
212
473
          if (found != std::string::npos)
213
456
            string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
214
473
          m_metaData.insert("librevenge:template", string);
215
473
          break;
216
855
        }
217
855
      }
218
236
      else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
219
31
      {
220
31
        switch (m_idsAndOffsets[index].first)
221
31
        {
222
7
        case PIDDSI_CATEGORY:
223
7
          m_metaData.insert("librevenge:category", string);
224
7
          break;
225
2
        case PIDDSI_LINECOUNT:
226
          // this should actually be PIDDSI_COMPANY but this
227
          // is what company is mapped to
228
2
          m_metaData.insert("librevenge:company", string);
229
2
          break;
230
3
        case PIDDSI_LANGUAGE:
231
3
          m_metaData.insert("dc:language", string);
232
3
          break;
233
31
        }
234
31
      }
235
1.09k
    }
236
2.86k
  }
237
15.4k
}
238
239
librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input)
240
2.86k
{
241
2.86k
  uint32_t size = readU32(input);
242
243
2.86k
  if (size == 0)
244
134
    return librevenge::RVNGString();
245
246
2.72k
  std::vector<unsigned char> characters;
247
10.2M
  for (uint32_t i = 0; i < size; ++i)
248
10.2M
    characters.push_back(readU8(input));
249
250
2.72k
  uint32_t codepage = getCodePage();
251
2.72k
  librevenge::RVNGString string;
252
253
2.72k
  if (codepage == 65001)
254
129
  {
255
    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
256
    // says this is UTF-8.
257
129
    characters.push_back(0);
258
129
    string.append(reinterpret_cast<const char *>(characters.data()));
259
129
  }
260
2.60k
  else
261
2.60k
  {
262
2.60k
    switch (codepage)
263
2.60k
    {
264
1.75k
    case 1252:
265
      // http://msdn.microsoft.com/en-us/goglobal/bb964654
266
1.75k
      appendCharacters(string, characters, "windows-1252");
267
1.75k
      break;
268
797
    default:
269
797
      MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage)));
270
2.60k
    }
271
2.60k
  }
272
273
2.68k
  return string;
274
2.72k
}
275
276
bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input)
277
5.63k
{
278
  // Parse the header
279
  // HeaderSignature: 8 bytes
280
  // HeaderCLSID: 16 bytes
281
  // MinorVersion: 2 bytes
282
  // MajorVersion: 2 bytes
283
  // ByteOrder: 2 bytes
284
5.63k
  input->seek(30, librevenge::RVNG_SEEK_CUR);
285
5.63k
  uint16_t sectorShift = readU16(input);
286
  // MiniSectorShift: 2 bytes
287
  // Reserved: 6 bytes
288
  // NumDirectorySectors: 4 bytes
289
  // NumFATSectors: 4 bytes
290
5.63k
  input->seek(16, librevenge::RVNG_SEEK_CUR);
291
5.63k
  uint32_t firstDirSectorLocation = readU32(input);
292
293
  // Seek to the Root Directory Entry
294
5.63k
  size_t sectorSize = std::pow(2, sectorShift);
295
5.63k
  input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET);
296
  // DirectoryEntryName: 64 bytes
297
  // DirectoryEntryNameLength: 2 bytes
298
  // ObjectType: 1 byte
299
  // ColorFlag: 1 byte
300
  // LeftSiblingID: 4 bytes
301
  // RightSiblingID: 4 bytes
302
  // ChildID: 4 bytes
303
  // CLSID: 16 bytes
304
  // StateBits: 4 bytes
305
  // CreationTime: 8 bytes
306
5.63k
  input->seek(108, librevenge::RVNG_SEEK_CUR);
307
5.63k
  uint64_t modifiedTime = readU64(input);
308
309
  // modifiedTime is number of 100ns since Jan 1 1601
310
5.63k
  const uint64_t epoch = uint64_t(116444736UL) * 100;
311
5.63k
  time_t sec = (modifiedTime / 10000000) - epoch;
312
5.63k
  const struct tm *time = localtime(&sec);
313
5.63k
  if (time)
314
5.55k
  {
315
5.55k
    static const int MAX_BUFFER = 1024;
316
5.55k
    char buffer[MAX_BUFFER];
317
5.55k
    strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time);
318
5.55k
    librevenge::RVNGString result;
319
5.55k
    result.append(buffer);
320
    // Visio UI uses modifiedTime for both purposes.
321
5.55k
    m_metaData.insert("meta:creation-date", result);
322
5.55k
    m_metaData.insert("dc:date", result);
323
5.55k
    return true;
324
5.55k
  }
325
77
  return false;
326
5.63k
}
327
328
const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData()
329
5.55k
{
330
5.55k
  return m_metaData;
331
5.55k
}
332
333
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */