Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwpd/src/lib/WP42Parser.cpp
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2
/* libwpd
3
 * Version: MPL 2.0 / LGPLv2.1+
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * Major Contributor(s):
10
 * Copyright (C) 2003 William Lachance (wrlach@gmail.com)
11
 * Copyright (C) 2003 Marc Maurer (uwog@uwog.net)
12
 * Copyright (C) 2006 Fridrich Strba (fridrich.strba@bluewin.ch)
13
 *
14
 * For minor contributions see the git repository.
15
 *
16
 * Alternatively, the contents of this file may be used under the terms
17
 * of the GNU Lesser General Public License Version 2.1 or later
18
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
19
 * applicable instead of those above.
20
 *
21
 * For further information visit http://libwpd.sourceforge.net
22
 */
23
24
/* "This product is not manufactured, approved, or supported by
25
 * Corel Corporation or Corel Corporation Limited."
26
 */
27
28
#include "WP42Parser.h"
29
30
#include <memory>
31
32
#include "WP42Part.h"
33
#include "WPXHeader.h"
34
#include "libwpd_internal.h"
35
#include "WPXTable.h"
36
#include "WP42FileStructure.h"
37
#include "WP42StylesListener.h"
38
#include "WP42ContentListener.h"
39
40
WP42Parser::WP42Parser(librevenge::RVNGInputStream *input, WPXEncryption *encryption) :
41
873
  WPXParser(input, nullptr, encryption)
42
873
{
43
873
}
44
45
WP42Parser::~WP42Parser()
46
873
{
47
873
}
48
49
void WP42Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP42Listener *listener)
50
1.61k
{
51
1.61k
  listener->startDocument();
52
53
1.61k
  input->seek(0, librevenge::RVNG_SEEK_SET);
54
55
1.61k
  WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
56
57
1.61k
  parseDocument(input, encryption, listener);
58
59
1.61k
  listener->endDocument();
60
1.61k
}
61
62
// parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
63
void WP42Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP42Listener *listener)
64
66.6k
{
65
6.06M
  while (!input->isEnd())
66
6.00M
  {
67
6.00M
    unsigned char readVal;
68
6.00M
    readVal = readU8(input, encryption);
69
70
6.00M
    if (readVal < (unsigned char)0x20)
71
2.14M
    {
72
2.14M
      WPD_DEBUG_MSG(("Offset: %i, Handling Control Character 0x%2x\n", (unsigned int)input->tell(), readVal));
73
74
2.14M
      switch (readVal)
75
2.14M
      {
76
2.56k
      case 0x09: // tab
77
2.56k
        listener->insertTab(0, 0.0);
78
2.56k
        break;
79
49.1k
      case 0x0A: // hard new line
80
49.1k
        listener->insertEOL();
81
49.1k
        break;
82
2.32k
      case 0x0B: // soft new page
83
2.32k
        listener->insertBreak(WPX_SOFT_PAGE_BREAK);
84
2.32k
        break;
85
1.94M
      case 0x0C: // hard new page
86
1.94M
        listener->insertBreak(WPX_PAGE_BREAK);
87
1.94M
        break;
88
1.86k
      case 0x0D: // soft new line
89
1.86k
        listener->insertCharacter(' ');
90
1.86k
        break;
91
143k
      default:
92
        // unsupported or undocumented token, ignore
93
143k
        break;
94
2.14M
      }
95
2.14M
    }
96
3.85M
    else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7F)
97
3.67M
    {
98
3.67M
      WPD_DEBUG_MSG(("Offset: %i, Handling Ascii Character 0x%2x\n", (unsigned int)input->tell(), readVal));
99
100
      // normal ASCII characters
101
3.67M
      listener->insertCharacter(readVal);
102
3.67M
    }
103
178k
    else if (readVal >= (unsigned char)0x80 && readVal <= (unsigned char)0xBF)
104
12.0k
    {
105
12.0k
      WPD_DEBUG_MSG(("Offset: %i, Handling Single Character Function 0x%2x\n", (unsigned int)input->tell(), readVal));
106
107
      // single character function codes
108
12.0k
      switch (readVal)
109
12.0k
      {
110
807
      case 0x92:
111
807
        listener->attributeChange(true, WP42_ATTRIBUTE_STRIKE_OUT);
112
807
        break;
113
243
      case 0x93:
114
243
        listener->attributeChange(false, WP42_ATTRIBUTE_STRIKE_OUT);
115
243
        break;
116
369
      case 0x94:
117
369
        listener->attributeChange(true, WP42_ATTRIBUTE_UNDERLINE);
118
369
        break;
119
257
      case 0x95:
120
257
        listener->attributeChange(false, WP42_ATTRIBUTE_UNDERLINE);
121
257
        break;
122
123
320
      case 0x90:
124
320
        listener->attributeChange(true, WP42_ATTRIBUTE_REDLINE);
125
320
        break;
126
215
      case 0x91:
127
215
        listener->attributeChange(false, WP42_ATTRIBUTE_REDLINE);
128
215
        break;
129
130
419
      case 0x9C:
131
419
        listener->attributeChange(false, WP42_ATTRIBUTE_BOLD);
132
419
        break;
133
288
      case 0x9D:
134
288
        listener->attributeChange(true, WP42_ATTRIBUTE_BOLD);
135
288
        break;
136
137
836
      case 0xB2:
138
836
        listener->attributeChange(true, WP42_ATTRIBUTE_ITALICS);
139
836
        break;
140
219
      case 0xB3:
141
219
        listener->attributeChange(false, WP42_ATTRIBUTE_ITALICS);
142
219
        break;
143
3.36k
      case 0xB4:
144
3.36k
        listener->attributeChange(true, WP42_ATTRIBUTE_SHADOW);
145
3.36k
        break;
146
214
      case 0xB5:
147
214
        listener->attributeChange(false, WP42_ATTRIBUTE_SHADOW);
148
214
        break;
149
150
4.45k
      default:
151
        // unsupported or undocumented token, ignore
152
4.45k
        break;
153
12.0k
      }
154
12.0k
    }
155
166k
    else if (readVal >= (unsigned char)0xC0 && readVal <= (unsigned char)0xFE)
156
152k
    {
157
152k
      std::unique_ptr<WP42Part> part(WP42Part::constructPart(input, encryption, readVal));
158
152k
      if (part)
159
151k
        part->parse(listener);
160
152k
    }
161
    // ignore the rest since they are not documented and at least 0xFF is a special character that
162
    // marks end of variable length part in variable length multi-byte functions
163
6.00M
  }
164
66.6k
}
165
166
void WP42Parser::parse(librevenge::RVNGTextInterface *documentInterface)
167
873
{
168
873
  librevenge::RVNGInputStream *input = getInput();
169
873
  WPXEncryption *encryption = getEncryption();
170
873
  std::list<WPXPageSpan> pageList;
171
172
873
  try
173
873
  {
174
    // do a "first-pass" parse of the document
175
    // gather table border information, page properties (per-page)
176
873
    WP42StylesListener stylesListener(pageList);
177
873
    parse(input, encryption, &stylesListener);
178
179
    // postprocess the pageList == remove duplicate page spans due to the page breaks
180
873
    auto previousPage = pageList.begin();
181
958k
    for (auto Iter=pageList.begin(); Iter != pageList.end();)
182
957k
    {
183
957k
      if ((Iter != previousPage) && ((*previousPage)==(*Iter)))
184
892k
      {
185
892k
        (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
186
892k
        Iter = pageList.erase(Iter);
187
892k
      }
188
64.6k
      else
189
64.6k
      {
190
64.6k
        previousPage = Iter;
191
64.6k
        ++Iter;
192
64.6k
      }
193
957k
    }
194
195
    // second pass: here is where we actually send the messages to the target app
196
    // that are necessary to emit the body of the target document
197
873
    WP42ContentListener listener(pageList, documentInterface);
198
873
    parse(input, encryption, &listener);
199
873
  }
200
873
  catch (FileException)
201
873
  {
202
130
    WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
203
130
    throw FileException();
204
130
  }
205
206
873
}
207
208
void WP42Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface)
209
0
{
210
0
  std::list<WPXPageSpan> pageList;
211
212
0
  librevenge::RVNGInputStream *input = getInput();
213
214
0
  try
215
0
  {
216
0
    WP42StylesListener stylesListener(pageList);
217
0
    stylesListener.startSubDocument();
218
0
    parseDocument(input, nullptr, &stylesListener);
219
0
    stylesListener.endSubDocument();
220
221
0
    WP42ContentListener listener(pageList, documentInterface);
222
0
    listener.startSubDocument();
223
0
    parseDocument(input, nullptr, &listener);
224
0
    listener.endSubDocument();
225
0
  }
226
0
  catch (FileException)
227
0
  {
228
0
    WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
229
0
    throw FileException();
230
0
  }
231
0
}
232
/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */