Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvisio/src/lib/VSDMetaData.cpp
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/*
3
 * This file is part of the libvisio project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#include "VSDMetaData.h"
11
12
#include <cassert>
13
#include <cmath>
14
#include <cstdio>
15
#include <cstring>
16
#include <string>
17
#include <unicode/ucnv.h>
18
#include <ctime>
19
20
libvisio::VSDMetaData::VSDMetaData()
21
488
  : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData()
22
488
{
23
488
}
24
25
libvisio::VSDMetaData::~VSDMetaData()
26
488
{
27
488
}
28
29
enum PIDDSI
30
{
31
  PIDDSI_CODEPAGE          = 0x00000001,
32
  PIDDSI_CATEGORY          = 0x00000002,
33
  PIDDSI_PRESFORMAT        = 0x00000003,
34
  PIDDSI_BYTECOUNT         = 0x00000004,
35
  PIDDSI_LINECOUNT         = 0x00000005,
36
  PIDDSI_PARACOUNT         = 0x00000006,
37
  PIDDSI_SLIDECOUNT        = 0x00000007,
38
  PIDDSI_NOTECOUNT         = 0x00000008,
39
  PIDDSI_HIDDENCOUNT       = 0x00000009,
40
  PIDDSI_MMCLIPCOUNT       = 0x0000000A,
41
  PIDDSI_SCALE             = 0x0000000B,
42
  PIDDSI_HEADINGPAIR       = 0x0000000C,
43
  PIDDSI_DOCPARTS          = 0x0000000D,
44
  PIDDSI_MANAGER           = 0x0000000E,
45
  PIDDSI_COMPANY           = 0x0000000F,
46
  PIDDSI_LINKSDIRTY        = 0x00000010,
47
  PIDDSI_CCHWITHSPACES     = 0x00000011,
48
  PIDDSI_SHAREDDOC         = 0x00000013,
49
  PIDDSI_LINKBASE          = 0x00000014,
50
  PIDDSI_HLINKS            = 0x00000015,
51
  PIDDSI_HYPERLINKSCHANGED = 0x00000016,
52
  PIDDSI_VERSION           = 0x00000017,
53
  PIDDSI_DIGSIG            = 0x00000018,
54
  PIDDSI_CONTENTTYPE       = 0x0000001A,
55
  PIDDSI_CONTENTSTATUS     = 0x0000001B,
56
  PIDDSI_LANGUAGE          = 0x0000001C,
57
  PIDDSI_DOCVERSION        = 0x0000001D
58
};
59
60
enum PIDSI
61
{
62
  CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
63
  PIDSI_TITLE                  = 0x00000002,
64
  PIDSI_SUBJECT                = 0x00000003,
65
  PIDSI_AUTHOR                 = 0x00000004,
66
  PIDSI_KEYWORDS               = 0x00000005,
67
  PIDSI_COMMENTS               = 0x00000006,
68
  PIDSI_TEMPLATE               = 0x00000007,
69
  PIDSI_LASTAUTHOR             = 0x00000008,
70
  PIDSI_REVNUMBER              = 0x00000009,
71
  PIDSI_EDITTIME               = 0x0000000A,
72
  PIDSI_LASTPRINTED            = 0x0000000B,
73
  PIDSI_CREATE_DTM             = 0x0000000C,
74
  PIDSI_LASTSAVE_DTM           = 0x0000000D,
75
  PIDSI_PAGECOUNT              = 0x0000000E,
76
  PIDSI_WORDCOUNT              = 0x0000000F,
77
  PIDSI_CHARCOUNT              = 0x00000010,
78
  PIDSI_THUMBNAIL              = 0x00000011,
79
  PIDSI_APPNAME                = 0x00000012,
80
  PIDSI_DOC_SECURITY           = 0x00000013
81
};
82
83
bool libvisio::VSDMetaData::parse(librevenge::RVNGInputStream *input)
84
845
{
85
845
  if (!input)
86
0
    return false;
87
88
845
  readPropertySetStream(input);
89
90
845
  return true;
91
845
}
92
93
void libvisio::VSDMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
94
845
{
95
  // ByteOrder
96
845
  input->seek(2, librevenge::RVNG_SEEK_CUR);
97
  // Version
98
845
  input->seek(2, librevenge::RVNG_SEEK_CUR);
99
  // SystemIdentifier
100
845
  input->seek(4, librevenge::RVNG_SEEK_CUR);
101
  // CLSID
102
845
  input->seek(16, librevenge::RVNG_SEEK_CUR);
103
  // NumPropertySets
104
845
  input->seek(4, librevenge::RVNG_SEEK_CUR);
105
  // FMTID0
106
  //input->seek(16, librevenge::RVNG_SEEK_CUR);
107
845
  uint32_t data1 = readU32(input);
108
845
  uint16_t data2 = readU16(input);
109
845
  uint16_t data3 = readU16(input);
110
845
  uint8_t data4[8];
111
845
  for (unsigned char &i : data4)
112
6.76k
  {
113
6.76k
    i = readU8(input);
114
6.76k
  }
115
  // Pretty-printed GUID is 36 bytes + the terminating null-character.
116
845
  char FMTID0[37];
117
845
  sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
118
845
          data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
119
120
845
  uint32_t offset0 = readU32(input);
121
845
  readPropertySet(input, offset0, FMTID0);
122
845
}
123
124
void libvisio::VSDMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
125
845
{
126
845
  input->seek(offset, librevenge::RVNG_SEEK_SET);
127
128
  // Size
129
845
  input->seek(4, librevenge::RVNG_SEEK_CUR);
130
845
  uint32_t numProperties = readU32(input);
131
  // The exact size of a property is not known beforehand: check upper bound
132
845
  if (numProperties > getRemainingLength(input) / 12)
133
90
    numProperties = getRemainingLength(input) / 12;
134
35.5k
  for (uint32_t i = 0; i < numProperties; ++i)
135
34.7k
    readPropertyIdentifierAndOffset(input);
136
10.2k
  for (uint32_t i = 0; i < numProperties; ++i)
137
9.41k
  {
138
9.41k
    if (i >= m_idsAndOffsets.size())
139
0
      break;
140
9.41k
    readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
141
9.41k
  }
142
845
}
143
144
uint32_t libvisio::VSDMetaData::getCodePage()
145
3.10k
{
146
20.9k
  for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
147
20.9k
  {
148
20.9k
    if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER)
149
3.03k
    {
150
3.03k
      if (i >= m_typedPropertyValues.size())
151
81
        break;
152
2.95k
      return m_typedPropertyValues[i];
153
3.03k
    }
154
20.9k
  }
155
156
145
  return 0;
157
3.10k
}
158
159
void libvisio::VSDMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
160
34.7k
{
161
34.7k
  uint32_t propertyIdentifier = readU32(input);
162
34.7k
  uint32_t offset = readU32(input);
163
34.7k
  m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
164
34.7k
}
165
166
9.41k
#define VT_I2 0x0002
167
8.19k
#define VT_LPSTR 0x001E
168
169
void libvisio::VSDMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
170
                                                   uint32_t index,
171
                                                   uint32_t offset,
172
                                                   char *FMTID)
173
9.41k
{
174
9.41k
  input->seek(offset, librevenge::RVNG_SEEK_SET);
175
9.41k
  uint16_t type = readU16(input);
176
  // Padding
177
9.41k
  input->seek(2, librevenge::RVNG_SEEK_CUR);
178
179
9.41k
  if (type == VT_I2)
180
1.22k
  {
181
1.22k
    uint16_t value = readU16(input);
182
1.22k
    m_typedPropertyValues[index] = value;
183
1.22k
  }
184
8.19k
  else if (type == VT_LPSTR)
185
3.12k
  {
186
3.12k
    librevenge::RVNGString string = readCodePageString(input);
187
3.12k
    if (!string.empty())
188
1.46k
    {
189
1.46k
      if (index >= m_idsAndOffsets.size())
190
0
        return;
191
192
1.46k
      if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
193
1.23k
      {
194
1.23k
        switch (m_idsAndOffsets[index].first)
195
1.23k
        {
196
144
        case PIDSI_TITLE:
197
144
          m_metaData.insert("dc:title", string);
198
144
          break;
199
56
        case PIDSI_SUBJECT:
200
56
          m_metaData.insert("dc:subject", string);
201
56
          break;
202
301
        case PIDSI_AUTHOR:
203
301
          m_metaData.insert("meta:initial-creator", string);
204
301
          m_metaData.insert("dc:creator", string);
205
301
          break;
206
9
        case PIDSI_KEYWORDS:
207
9
          m_metaData.insert("meta:keyword", string);
208
9
          break;
209
9
        case PIDSI_COMMENTS:
210
9
          m_metaData.insert("dc:description", string);
211
9
          break;
212
194
        case PIDSI_TEMPLATE:
213
194
          std::string templateHref(string.cstr());
214
194
          size_t found = templateHref.find_last_of("/\\");
215
194
          if (found != std::string::npos)
216
187
            string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
217
194
          m_metaData.insert("librevenge:template", string);
218
194
          break;
219
1.23k
        }
220
1.23k
      }
221
228
      else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
222
169
      {
223
169
        switch (m_idsAndOffsets[index].first)
224
169
        {
225
9
        case PIDDSI_CATEGORY:
226
9
          m_metaData.insert("librevenge:category", string);
227
9
          break;
228
4
        case PIDDSI_LINECOUNT:
229
          // this should actually be PIDDSI_COMPANY but this
230
          // is what company is mapped to
231
4
          m_metaData.insert("librevenge:company", string);
232
4
          break;
233
3
        case PIDDSI_LANGUAGE:
234
3
          m_metaData.insert("dc:language", string);
235
3
          break;
236
169
        }
237
169
      }
238
1.46k
    }
239
3.12k
  }
240
9.41k
}
241
242
librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input)
243
3.12k
{
244
3.12k
  uint32_t size = readU32(input);
245
3.12k
  if (size > getRemainingLength(input))
246
118
    size = getRemainingLength(input);
247
248
3.12k
  if (size == 0)
249
20
    return librevenge::RVNGString();
250
251
3.10k
  std::vector<unsigned char> characters;
252
392k
  for (uint32_t i = 0; i < size; ++i)
253
389k
    characters.push_back(readU8(input));
254
255
3.10k
  uint32_t codepage = getCodePage();
256
3.10k
  librevenge::RVNGString string;
257
258
3.10k
  if (codepage == 65001)
259
79
  {
260
    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
261
    // says this is UTF-8.
262
79
    characters.push_back(0);
263
79
    string.append(reinterpret_cast<const char *>(characters.data()));
264
79
  }
265
3.02k
  else
266
3.02k
  {
267
3.02k
    UErrorCode status = U_ZERO_ERROR;
268
3.02k
    UConverter *conv = nullptr;
269
270
3.02k
    switch (codepage)
271
3.02k
    {
272
2.71k
    case 1252:
273
      // http://msdn.microsoft.com/en-us/goglobal/bb964654
274
2.71k
      conv = ucnv_open("windows-1252", &status);
275
2.71k
      break;
276
3.02k
    }
277
278
3.02k
    if (U_SUCCESS(status) && conv)
279
2.71k
    {
280
2.71k
      assert(!characters.empty());
281
2.71k
      const auto *src = (const char *)characters.data();
282
2.71k
      const char *srcLimit = (const char *)src + characters.size();
283
356k
      while (src < srcLimit)
284
353k
      {
285
353k
        UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
286
353k
        if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
287
353k
          appendUCS4(string, ucs4Character);
288
353k
      }
289
2.71k
    }
290
291
3.02k
    if (conv)
292
2.71k
      ucnv_close(conv);
293
3.02k
  }
294
295
3.10k
  return string;
296
3.10k
}
297
298
bool libvisio::VSDMetaData::parseTimes(librevenge::RVNGInputStream *input)
299
384
{
300
  // Parse the header
301
  // HeaderSignature: 8 bytes
302
  // HeaderCLSID: 16 bytes
303
  // MinorVersion: 2 bytes
304
  // MajorVersion: 2 bytes
305
  // ByteOrder: 2 bytes
306
384
  input->seek(30, librevenge::RVNG_SEEK_CUR);
307
384
  uint16_t sectorShift = readU16(input);
308
  // MiniSectorShift: 2 bytes
309
  // Reserved: 6 bytes
310
  // NumDirectorySectors: 4 bytes
311
  // NumFATSectors: 4 bytes
312
384
  input->seek(16, librevenge::RVNG_SEEK_CUR);
313
384
  uint32_t firstDirSectorLocation = readU32(input);
314
315
  // Seek to the Root Directory Entry
316
384
  size_t sectorSize = std::pow(2, sectorShift);
317
384
  input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET);
318
  // DirectoryEntryName: 64 bytes
319
  // DirectoryEntryNameLength: 2 bytes
320
  // ObjectType: 1 byte
321
  // ColorFlag: 1 byte
322
  // LeftSiblingID: 4 bytes
323
  // RightSiblingID: 4 bytes
324
  // ChildID: 4 bytes
325
  // CLSID: 16 bytes
326
  // StateBits: 4 bytes
327
  // CreationTime: 8 bytes
328
384
  input->seek(108, librevenge::RVNG_SEEK_CUR);
329
384
  uint64_t modifiedTime = readU64(input);
330
331
  // modifiedTime is number of 100ns since Jan 1 1601
332
384
  const uint64_t epoch = uint64_t(116444736UL) * 100;
333
384
  time_t sec = (modifiedTime / 10000000) - epoch;
334
384
  const struct tm *time = localtime(&sec);
335
384
  if (time)
336
384
  {
337
384
    static const int MAX_BUFFER = 1024;
338
384
    char buffer[MAX_BUFFER];
339
384
    strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time);
340
384
    librevenge::RVNGString result;
341
384
    result.append(buffer);
342
    // Visio UI uses modifiedTime for both purposes.
343
384
    m_metaData.insert("meta:creation-date", result);
344
384
    m_metaData.insert("dc:date", result);
345
384
    return true;
346
384
  }
347
0
  return false;
348
384
}
349
350
const librevenge::RVNGPropertyList &libvisio::VSDMetaData::getMetaData()
351
384
{
352
384
  return m_metaData;
353
384
}
354
355
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */