Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwpd/src/lib/WP1Parser.cpp
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2
/* libwpd
3
 * Version: MPL 2.0 / LGPLv2.1+
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * Major Contributor(s):
10
 * Copyright (C) 2003 William Lachance (wrlach@gmail.com)
11
 * Copyright (C) 2003 Marc Maurer (uwog@uwog.net)
12
 * Copyright (C) 2006 Fridrich Strba (fridrich.strba@bluewin.ch)
13
 *
14
 * For minor contributions see the git repository.
15
 *
16
 * Alternatively, the contents of this file may be used under the terms
17
 * of the GNU Lesser General Public License Version 2.1 or later
18
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
19
 * applicable instead of those above.
20
 *
21
 * For further information visit http://libwpd.sourceforge.net
22
 */
23
24
/* "This product is not manufactured, approved, or supported by
25
 * Corel Corporation or Corel Corporation Limited."
26
 */
27
28
#include "WP1Parser.h"
29
30
#include <memory>
31
32
#include "WP1Part.h"
33
#include "libwpd_internal.h"
34
#include "WP1FileStructure.h"
35
#include "WP1StylesListener.h"
36
#include "WP1ContentListener.h"
37
38
WP1Parser::WP1Parser(librevenge::RVNGInputStream *input, WPXEncryption *encryption) :
39
0
  WPXParser(input, nullptr, encryption)
40
0
{
41
0
}
42
43
WP1Parser::~WP1Parser()
44
0
{
45
0
}
46
47
void WP1Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP1Listener *listener)
48
0
{
49
0
  listener->startDocument();
50
51
0
  if (encryption)
52
0
    input->seek(6, librevenge::RVNG_SEEK_SET);
53
0
  else
54
0
    input->seek(0, librevenge::RVNG_SEEK_SET);
55
56
0
  WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
57
58
0
  parseDocument(input, encryption, listener);
59
60
0
  listener->endDocument();
61
0
}
62
63
// parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
64
void WP1Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP1Listener *listener)
65
0
{
66
0
  while (!input->isEnd())
67
0
  {
68
0
    unsigned char readVal;
69
0
    readVal = readU8(input, encryption);
70
71
0
    if (readVal < (unsigned char)0x20)
72
0
    {
73
0
      WPD_DEBUG_MSG(("Offset: %i, Handling Control Character 0x%2x\n", (unsigned int)input->tell(), readVal));
74
75
0
      switch (readVal)
76
0
      {
77
0
      case 0x09: // tab
78
0
        listener->insertTab();
79
0
        break;
80
0
      case 0x0A: // hard new line
81
0
        listener->insertEOL();
82
0
        break;
83
0
      case 0x0B: // soft new page
84
0
        listener->insertBreak(WPX_SOFT_PAGE_BREAK);
85
0
        break;
86
0
      case 0x0C: // hard new page
87
0
        listener->insertBreak(WPX_PAGE_BREAK);
88
0
        break;
89
0
      case 0x0D: // soft new line
90
0
        listener->insertCharacter(' ');
91
0
        break;
92
0
      default:
93
        // unsupported or undocumented token, ignore
94
0
        break;
95
0
      }
96
0
    }
97
0
    else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7F)
98
0
    {
99
      // normal ASCII characters
100
0
      listener->insertCharacter(readVal);
101
0
    }
102
0
    else if (readVal >= (unsigned char)0x80 && readVal <= (unsigned char)0xBF)
103
0
    {
104
0
      WPD_DEBUG_MSG(("Offset: %i, Handling Single Character Function 0x%2x\n", (unsigned int)input->tell(), readVal));
105
106
      // single character function codes
107
0
      switch (readVal)
108
0
      {
109
0
      case 0x90:
110
0
        listener->attributeChange(true, WP1_ATTRIBUTE_REDLINE);
111
0
        break;
112
0
      case 0x91:
113
0
        listener->attributeChange(false, WP1_ATTRIBUTE_REDLINE);
114
0
        break;
115
116
0
      case 0x92:
117
0
        listener->attributeChange(true, WP1_ATTRIBUTE_STRIKE_OUT);
118
0
        break;
119
0
      case 0x93:
120
0
        listener->attributeChange(false, WP1_ATTRIBUTE_STRIKE_OUT);
121
0
        break;
122
123
0
      case 0x94:
124
0
        listener->attributeChange(true, WP1_ATTRIBUTE_UNDERLINE);
125
0
        break;
126
0
      case 0x95:
127
0
        listener->attributeChange(false, WP1_ATTRIBUTE_UNDERLINE);
128
0
        break;
129
130
0
      case 0x9C:
131
0
        listener->attributeChange(false, WP1_ATTRIBUTE_BOLD);
132
0
        break;
133
0
      case 0x9D:
134
0
        listener->attributeChange(true, WP1_ATTRIBUTE_BOLD);
135
0
        break;
136
137
0
      case 0xB2:
138
0
        listener->attributeChange(true, WP1_ATTRIBUTE_ITALICS);
139
0
        break;
140
0
      case 0xB3:
141
0
        listener->attributeChange(false, WP1_ATTRIBUTE_ITALICS);
142
0
        break;
143
144
0
      case 0xB4:
145
0
        listener->attributeChange(true, WP1_ATTRIBUTE_SHADOW);
146
0
        break;
147
0
      case 0xB5:
148
0
        listener->attributeChange(false, WP1_ATTRIBUTE_SHADOW);
149
0
        break;
150
151
0
      case 0xB6:
152
0
        listener->attributeChange(true, WP1_ATTRIBUTE_OUTLINE);
153
0
        break;
154
0
      case 0xB7:
155
0
        listener->attributeChange(false, WP1_ATTRIBUTE_OUTLINE);
156
0
        break;
157
158
0
      case 0xBC:
159
0
        listener->attributeChange(true, WP1_ATTRIBUTE_SUPERSCRIPT);
160
0
        break;
161
0
      case 0xB9:
162
0
        listener->attributeChange(false, WP1_ATTRIBUTE_SUPERSCRIPT);
163
0
        break;
164
165
0
      case 0xBD:
166
0
        listener->attributeChange(true, WP1_ATTRIBUTE_SUBSCRIPT);
167
0
        break;
168
0
      case 0xB8:
169
0
        listener->attributeChange(false, WP1_ATTRIBUTE_SUBSCRIPT);
170
0
        break;
171
172
0
      default:
173
        // unsupported or undocumented token, ignore
174
0
        break;
175
0
      }
176
0
    }
177
0
    else if (readVal >= (unsigned char)0xC0 && readVal <= (unsigned char)0xFE)
178
0
    {
179
0
      std::unique_ptr<WP1Part> part(WP1Part::constructPart(input, encryption, readVal));
180
0
      if (part)
181
0
        part->parse(listener);
182
0
    }
183
    // ignore the rest since they are not documented and at least 0xFF is a special character that
184
    // marks end of variable length part in variable length multi-byte functions
185
0
  }
186
0
}
187
188
void WP1Parser::parse(librevenge::RVNGTextInterface *documentInterface)
189
0
{
190
0
  librevenge::RVNGInputStream *input = getInput();
191
0
  WPXEncryption *encryption = getEncryption();
192
0
  std::list<WPXPageSpan> pageList;
193
194
0
  try
195
0
  {
196
    // do a "first-pass" parse of the document
197
    // gather table border information, page properties (per-page)
198
0
    WP1StylesListener stylesListener(pageList);
199
0
    parse(input, encryption, &stylesListener);
200
201
    // postprocess the pageList == remove duplicate page spans due to the page breaks
202
0
    auto previousPage = pageList.begin();
203
0
    for (auto Iter=pageList.begin(); Iter != pageList.end();)
204
0
    {
205
0
      if ((Iter != previousPage) && ((*previousPage)==(*Iter)))
206
0
      {
207
0
        (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
208
0
        Iter = pageList.erase(Iter);
209
0
      }
210
0
      else
211
0
      {
212
0
        previousPage = Iter;
213
0
        ++Iter;
214
0
      }
215
0
    }
216
217
    // second pass: here is where we actually send the messages to the target app
218
    // that are necessary to emit the body of the target document
219
0
    WP1ContentListener listener(pageList, documentInterface);
220
0
    parse(input, encryption, &listener);
221
0
  }
222
0
  catch (FileException)
223
0
  {
224
0
    WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
225
0
    throw FileException();
226
0
  }
227
228
0
}
229
230
void WP1Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface)
231
0
{
232
0
  std::list<WPXPageSpan> pageList;
233
234
0
  librevenge::RVNGInputStream *input = getInput();
235
236
0
  try
237
0
  {
238
0
    WP1StylesListener stylesListener(pageList);
239
0
    stylesListener.startSubDocument();
240
0
    parseDocument(input, nullptr, &stylesListener);
241
0
    stylesListener.endSubDocument();
242
243
0
    input->seek(0, librevenge::RVNG_SEEK_SET);
244
245
0
    WP1ContentListener listener(pageList, documentInterface);
246
0
    listener.startSubDocument();
247
0
    parseDocument(input, nullptr, &listener);
248
0
    listener.endSubDocument();
249
0
  }
250
0
  catch (FileException)
251
0
  {
252
0
    WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
253
0
    throw FileException();
254
0
  }
255
0
}
256
/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */