Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwpd/src/lib/WP6Parser.cpp
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2
/* libwpd
3
 * Version: MPL 2.0 / LGPLv2.1+
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * Major Contributor(s):
10
 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
11
 * Copyright (C) 2002 Marc Maurer (uwog@uwog.net)
12
 *
13
 * For minor contributions see the git repository.
14
 *
15
 * Alternatively, the contents of this file may be used under the terms
16
 * of the GNU Lesser General Public License Version 2.1 or later
17
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
18
 * applicable instead of those above.
19
 *
20
 * For further information visit http://libwpd.sourceforge.net
21
 */
22
23
/* "This product is not manufactured, approved, or supported by
24
 * Corel Corporation or Corel Corporation Limited."
25
 */
26
27
#include <memory>
28
29
#include "WP6StylesListener.h"
30
#include "WP6ContentListener.h"
31
#include "WP6Parser.h"
32
#include "WPXHeader.h"
33
#include "WP6Header.h"
34
#include "WP60Header.h"
35
#include "WP61Header.h"
36
#include "WP6PrefixData.h"
37
#include "WP6Part.h"
38
#include "libwpd_internal.h"
39
#include "WP6DefaultInitialFontPacket.h"
40
#include "WPXTable.h"
41
42
WP6Parser::WP6Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) :
43
16.5k
  WPXParser(input, header, encryption)
44
16.5k
{
45
16.5k
}
46
47
WP6Parser::~WP6Parser()
48
16.5k
{
49
16.5k
}
50
51
WP6PrefixData *WP6Parser::getPrefixData(librevenge::RVNGInputStream *input, WPXEncryption *encryption)
52
0
{
53
0
  WP6PrefixData *prefixData = nullptr;
54
0
  try
55
0
  {
56
0
    prefixData = new WP6PrefixData(input, encryption, (static_cast<WP6Header *>(getHeader())->getNumPrefixIndices()));
57
0
    return prefixData;
58
0
  }
59
0
  catch (FileException)
60
0
  {
61
0
    WPD_DEBUG_MSG(("WordPerfect: Prefix Data most likely corrupted.\n"));
62
    // TODO: Try to check packet after packet so that we try to recover at least the begining if the corruption is not at
63
    //       the begining.
64
0
    throw FileException();
65
0
  }
66
0
  catch (...)
67
0
  {
68
0
    WPD_DEBUG_MSG(("WordPerfect: Prefix Data most likely corrupted. Trying to ignore.\n"));
69
    // TODO: Try to check packet after packet so that we try to recover at least the begining if the corruption is not at
70
    //       the begining.
71
0
    return nullptr;
72
0
  }
73
0
}
74
75
void WP6Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP6Listener *listener)
76
0
{
77
0
  listener->startDocument();
78
79
0
  input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET);
80
81
0
  WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
82
83
0
  parseDocument(input, encryption, listener);
84
85
0
  listener->endDocument();
86
0
}
87
88
static const unsigned short extendedInternationalCharacterMap[] =
89
{
90
  229, // lower case 'a' with a small circle
91
  197, // upper case 'a' with a small circle
92
  230, // lower case 'ae'
93
  198, // upper case 'ae'
94
  228, // lower case 'a' with diathesis
95
  196, // upper case 'a' with diathesis
96
  225, // lower case 'a' with acute
97
  224, // lower case 'a' with grave
98
  226, // lower case 'a' with circonflex
99
  227, // lower case 'a' with tilde
100
  195, // upper case 'a' with tilde
101
  231, // lower case 'c' with hook
102
  199, // upper case 'c' with hook
103
  235, // lower case 'e' with diathesis
104
  233, // lower case 'e' with acute
105
  201, // upper case 'e' with acute
106
  232, // lower case 'e' with grave
107
  234, // lower case 'e' with circonflex
108
  237, // lower case 'i' with acute
109
  241, // lower case 'n' with tilde
110
  209, // upper case 'n' with tilde
111
  248, // lower case 'o' with stroke
112
  216, // upper case 'o' with stroke
113
  245, // lower case 'o' with tilde
114
  213, // upper case 'o' with tilde
115
  246, // lower case 'o' with diathesis
116
  214, // upper case 'o' with diathesis
117
  252, // lower case 'u' with diathesis
118
  220, // upper case 'u' with diathesis
119
  250, // lower case 'u' with acute
120
  249, // lower case 'u' with grave
121
  223 // double s
122
};
123
124
// parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
125
void WP6Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP6Listener *listener)
126
26.5k
{
127
36.8M
  while (!input->isEnd())
128
36.8M
  {
129
36.8M
    unsigned char readVal;
130
36.8M
    readVal = readU8(input, encryption);
131
132
36.8M
    if (readVal == (unsigned char)0x00)
133
5.63M
    {
134
      // do nothing: this token is meaningless and is likely just corruption
135
5.63M
    }
136
31.1M
    else if (readVal <= (unsigned char)0x20)
137
13.9M
    {
138
13.9M
      listener->insertCharacter(extendedInternationalCharacterMap[(readVal-1)]);
139
13.9M
    }
140
17.2M
    else if (readVal >= (unsigned char)0x21 && readVal <= (unsigned char)0x7F)
141
5.95M
    {
142
      // normal ASCII characters
143
5.95M
      listener->insertCharacter((unsigned)readVal);
144
5.95M
    }
145
11.2M
    else
146
11.2M
    {
147
11.2M
      std::unique_ptr<WP6Part> part(WP6Part::constructPart(input, encryption, readVal));
148
11.2M
      if (part)
149
7.57M
        part->parse(listener);
150
11.2M
    }
151
36.8M
  }
152
26.5k
}
153
154
void WP6Parser::parsePacket(WP6PrefixData *prefixData, int type, WP6Listener *listener)
155
0
{
156
0
  if (!prefixData)
157
0
    return;
158
159
0
  std::pair< MPDP_CIter, MPDP_CIter > typeIterPair = prefixData->getPrefixDataPacketsOfType(type);
160
0
  if (typeIterPair.first != typeIterPair.second)
161
0
  {
162
0
    typeIterPair.first->second->parse(listener);
163
0
  }
164
0
}
165
166
void WP6Parser::parsePackets(WP6PrefixData *prefixData, int type, WP6Listener *listener)
167
0
{
168
0
  if (!prefixData)
169
0
    return;
170
171
0
  std::pair< MPDP_CIter, MPDP_CIter > typeIterPair = prefixData->getPrefixDataPacketsOfType(type);
172
0
  for (auto iter=typeIterPair.first; iter != typeIterPair.second; ++iter)
173
0
  {
174
0
    iter->second->parse(listener);
175
0
  }
176
0
}
177
178
// WP6Parser::parse() reads AND parses a wordperfect document, passing any retrieved low-level
179
// information to a low-level listener
180
void WP6Parser::parse(librevenge::RVNGTextInterface *documentInterface)
181
0
{
182
0
  std::list<WPXPageSpan> pageList;
183
0
  WPXTableList tableList;
184
185
0
  librevenge::RVNGInputStream *input = getInput();
186
0
  WPXEncryption *encryption = getEncryption();
187
188
0
  std::unique_ptr<WP6PrefixData> prefixData(getPrefixData(input, encryption));
189
190
  // do a "first-pass" parse of the document
191
  // gather table border information, page properties (per-page)
192
0
  WP6StylesListener stylesListener(pageList, tableList);
193
0
  stylesListener.setPrefixData(prefixData.get());
194
0
  parse(input, encryption, &stylesListener);
195
196
  // postprocess the pageList == remove duplicate page spans due to the page breaks
197
0
  auto previousPage = pageList.begin();
198
0
  for (auto Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */)
199
0
  {
200
0
    if ((Iter != previousPage) && ((*previousPage)==(*Iter)))
201
0
    {
202
0
      (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
203
0
      Iter = pageList.erase(Iter);
204
0
    }
205
0
    else
206
0
    {
207
0
      previousPage = Iter;
208
0
      ++Iter;
209
0
    }
210
0
  }
211
212
  // second pass: here is where we actually send the messages to the target app
213
  // that are necessary to emit the body of the target document
214
0
  WP6ContentListener listener(pageList, tableList, documentInterface);
215
0
  listener.setPrefixData(prefixData.get());
216
217
  // get the relevant initial prefix packets out of storage and tell them to parse
218
  // themselves
219
0
  parsePacket(prefixData.get(), WP6_INDEX_HEADER_EXTENDED_DOCUMENT_SUMMARY, &listener);
220
0
  parsePacket(prefixData.get(), WP6_INDEX_HEADER_INITIAL_FONT, &listener);
221
0
  parsePackets(prefixData.get(), WP6_INDEX_HEADER_OUTLINE_STYLE, &listener);
222
223
0
  parse(input, encryption, &listener);
224
0
}
225
226
void WP6Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface)
227
16.5k
{
228
16.5k
  std::list<WPXPageSpan> pageList;
229
16.5k
  WPXTableList tableList;
230
231
16.5k
  librevenge::RVNGInputStream *input = getInput();
232
233
16.5k
  try
234
16.5k
  {
235
16.5k
    WP6StylesListener stylesListener(pageList, tableList);
236
16.5k
    stylesListener.startSubDocument();
237
16.5k
    parseDocument(input, nullptr, &stylesListener);
238
16.5k
    stylesListener.endSubDocument();
239
240
16.5k
    input->seek(0, librevenge::RVNG_SEEK_SET);
241
242
16.5k
    WP6ContentListener listener(pageList, tableList, documentInterface);
243
16.5k
    listener.startSubDocument();
244
16.5k
    parseDocument(input, nullptr, &listener);
245
16.5k
    listener.endSubDocument();
246
16.5k
  }
247
16.5k
  catch (FileException)
248
16.5k
  {
249
6.51k
    WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
250
6.51k
    throw FileException();
251
6.51k
  }
252
16.5k
}
253
/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */