/src/libwpd/src/lib/WP1Parser.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */ |
2 | | /* libwpd |
3 | | * Version: MPL 2.0 / LGPLv2.1+ |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * Major Contributor(s): |
10 | | * Copyright (C) 2003 William Lachance (wrlach@gmail.com) |
11 | | * Copyright (C) 2003 Marc Maurer (uwog@uwog.net) |
12 | | * Copyright (C) 2006 Fridrich Strba (fridrich.strba@bluewin.ch) |
13 | | * |
14 | | * For minor contributions see the git repository. |
15 | | * |
16 | | * Alternatively, the contents of this file may be used under the terms |
17 | | * of the GNU Lesser General Public License Version 2.1 or later |
18 | | * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are |
19 | | * applicable instead of those above. |
20 | | * |
21 | | * For further information visit http://libwpd.sourceforge.net |
22 | | */ |
23 | | |
24 | | /* "This product is not manufactured, approved, or supported by |
25 | | * Corel Corporation or Corel Corporation Limited." |
26 | | */ |
27 | | |
28 | | #include "WP1Parser.h" |
29 | | |
30 | | #include <memory> |
31 | | |
32 | | #include "WP1Part.h" |
33 | | #include "libwpd_internal.h" |
34 | | #include "WP1FileStructure.h" |
35 | | #include "WP1StylesListener.h" |
36 | | #include "WP1ContentListener.h" |
37 | | |
38 | | WP1Parser::WP1Parser(librevenge::RVNGInputStream *input, WPXEncryption *encryption) : |
39 | 0 | WPXParser(input, nullptr, encryption) |
40 | 0 | { |
41 | 0 | } |
42 | | |
43 | | WP1Parser::~WP1Parser() |
44 | 0 | { |
45 | 0 | } |
46 | | |
47 | | void WP1Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP1Listener *listener) |
48 | 0 | { |
49 | 0 | listener->startDocument(); |
50 | |
|
51 | 0 | if (encryption) |
52 | 0 | input->seek(6, librevenge::RVNG_SEEK_SET); |
53 | 0 | else |
54 | 0 | input->seek(0, librevenge::RVNG_SEEK_SET); |
55 | |
|
56 | 0 | WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell())); |
57 | |
|
58 | 0 | parseDocument(input, encryption, listener); |
59 | |
|
60 | 0 | listener->endDocument(); |
61 | 0 | } |
62 | | |
63 | | // parseDocument: parses a document body (may call itself recursively, on other streams, or itself) |
64 | | void WP1Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP1Listener *listener) |
65 | 0 | { |
66 | 0 | while (!input->isEnd()) |
67 | 0 | { |
68 | 0 | unsigned char readVal; |
69 | 0 | readVal = readU8(input, encryption); |
70 | |
|
71 | 0 | if (readVal < (unsigned char)0x20) |
72 | 0 | { |
73 | 0 | WPD_DEBUG_MSG(("Offset: %i, Handling Control Character 0x%2x\n", (unsigned int)input->tell(), readVal)); |
74 | |
|
75 | 0 | switch (readVal) |
76 | 0 | { |
77 | 0 | case 0x09: // tab |
78 | 0 | listener->insertTab(); |
79 | 0 | break; |
80 | 0 | case 0x0A: // hard new line |
81 | 0 | listener->insertEOL(); |
82 | 0 | break; |
83 | 0 | case 0x0B: // soft new page |
84 | 0 | listener->insertBreak(WPX_SOFT_PAGE_BREAK); |
85 | 0 | break; |
86 | 0 | case 0x0C: // hard new page |
87 | 0 | listener->insertBreak(WPX_PAGE_BREAK); |
88 | 0 | break; |
89 | 0 | case 0x0D: // soft new line |
90 | 0 | listener->insertCharacter(' '); |
91 | 0 | break; |
92 | 0 | default: |
93 | | // unsupported or undocumented token, ignore |
94 | 0 | break; |
95 | 0 | } |
96 | 0 | } |
97 | 0 | else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7F) |
98 | 0 | { |
99 | | // normal ASCII characters |
100 | 0 | listener->insertCharacter(readVal); |
101 | 0 | } |
102 | 0 | else if (readVal >= (unsigned char)0x80 && readVal <= (unsigned char)0xBF) |
103 | 0 | { |
104 | 0 | WPD_DEBUG_MSG(("Offset: %i, Handling Single Character Function 0x%2x\n", (unsigned int)input->tell(), readVal)); |
105 | | |
106 | | // single character function codes |
107 | 0 | switch (readVal) |
108 | 0 | { |
109 | 0 | case 0x90: |
110 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_REDLINE); |
111 | 0 | break; |
112 | 0 | case 0x91: |
113 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_REDLINE); |
114 | 0 | break; |
115 | | |
116 | 0 | case 0x92: |
117 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_STRIKE_OUT); |
118 | 0 | break; |
119 | 0 | case 0x93: |
120 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_STRIKE_OUT); |
121 | 0 | break; |
122 | | |
123 | 0 | case 0x94: |
124 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_UNDERLINE); |
125 | 0 | break; |
126 | 0 | case 0x95: |
127 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_UNDERLINE); |
128 | 0 | break; |
129 | | |
130 | 0 | case 0x9C: |
131 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_BOLD); |
132 | 0 | break; |
133 | 0 | case 0x9D: |
134 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_BOLD); |
135 | 0 | break; |
136 | | |
137 | 0 | case 0xB2: |
138 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_ITALICS); |
139 | 0 | break; |
140 | 0 | case 0xB3: |
141 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_ITALICS); |
142 | 0 | break; |
143 | | |
144 | 0 | case 0xB4: |
145 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_SHADOW); |
146 | 0 | break; |
147 | 0 | case 0xB5: |
148 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_SHADOW); |
149 | 0 | break; |
150 | | |
151 | 0 | case 0xB6: |
152 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_OUTLINE); |
153 | 0 | break; |
154 | 0 | case 0xB7: |
155 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_OUTLINE); |
156 | 0 | break; |
157 | | |
158 | 0 | case 0xBC: |
159 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_SUPERSCRIPT); |
160 | 0 | break; |
161 | 0 | case 0xB9: |
162 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_SUPERSCRIPT); |
163 | 0 | break; |
164 | | |
165 | 0 | case 0xBD: |
166 | 0 | listener->attributeChange(true, WP1_ATTRIBUTE_SUBSCRIPT); |
167 | 0 | break; |
168 | 0 | case 0xB8: |
169 | 0 | listener->attributeChange(false, WP1_ATTRIBUTE_SUBSCRIPT); |
170 | 0 | break; |
171 | | |
172 | 0 | default: |
173 | | // unsupported or undocumented token, ignore |
174 | 0 | break; |
175 | 0 | } |
176 | 0 | } |
177 | 0 | else if (readVal >= (unsigned char)0xC0 && readVal <= (unsigned char)0xFE) |
178 | 0 | { |
179 | 0 | std::unique_ptr<WP1Part> part(WP1Part::constructPart(input, encryption, readVal)); |
180 | 0 | if (part) |
181 | 0 | part->parse(listener); |
182 | 0 | } |
183 | | // ignore the rest since they are not documented and at least 0xFF is a special character that |
184 | | // marks end of variable length part in variable length multi-byte functions |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | void WP1Parser::parse(librevenge::RVNGTextInterface *documentInterface) |
189 | 0 | { |
190 | 0 | librevenge::RVNGInputStream *input = getInput(); |
191 | 0 | WPXEncryption *encryption = getEncryption(); |
192 | 0 | std::list<WPXPageSpan> pageList; |
193 | |
|
194 | 0 | try |
195 | 0 | { |
196 | | // do a "first-pass" parse of the document |
197 | | // gather table border information, page properties (per-page) |
198 | 0 | WP1StylesListener stylesListener(pageList); |
199 | 0 | parse(input, encryption, &stylesListener); |
200 | | |
201 | | // postprocess the pageList == remove duplicate page spans due to the page breaks |
202 | 0 | auto previousPage = pageList.begin(); |
203 | 0 | for (auto Iter=pageList.begin(); Iter != pageList.end();) |
204 | 0 | { |
205 | 0 | if ((Iter != previousPage) && ((*previousPage)==(*Iter))) |
206 | 0 | { |
207 | 0 | (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan()); |
208 | 0 | Iter = pageList.erase(Iter); |
209 | 0 | } |
210 | 0 | else |
211 | 0 | { |
212 | 0 | previousPage = Iter; |
213 | 0 | ++Iter; |
214 | 0 | } |
215 | 0 | } |
216 | | |
217 | | // second pass: here is where we actually send the messages to the target app |
218 | | // that are necessary to emit the body of the target document |
219 | 0 | WP1ContentListener listener(pageList, documentInterface); |
220 | 0 | parse(input, encryption, &listener); |
221 | 0 | } |
222 | 0 | catch (FileException) |
223 | 0 | { |
224 | 0 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
225 | 0 | throw FileException(); |
226 | 0 | } |
227 | |
|
228 | 0 | } |
229 | | |
230 | | void WP1Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface) |
231 | 0 | { |
232 | 0 | std::list<WPXPageSpan> pageList; |
233 | |
|
234 | 0 | librevenge::RVNGInputStream *input = getInput(); |
235 | |
|
236 | 0 | try |
237 | 0 | { |
238 | 0 | WP1StylesListener stylesListener(pageList); |
239 | 0 | stylesListener.startSubDocument(); |
240 | 0 | parseDocument(input, nullptr, &stylesListener); |
241 | 0 | stylesListener.endSubDocument(); |
242 | |
|
243 | 0 | input->seek(0, librevenge::RVNG_SEEK_SET); |
244 | |
|
245 | 0 | WP1ContentListener listener(pageList, documentInterface); |
246 | 0 | listener.startSubDocument(); |
247 | 0 | parseDocument(input, nullptr, &listener); |
248 | 0 | listener.endSubDocument(); |
249 | 0 | } |
250 | 0 | catch (FileException) |
251 | 0 | { |
252 | 0 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
253 | 0 | throw FileException(); |
254 | 0 | } |
255 | 0 | } |
256 | | /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */ |