/src/libwpd/src/lib/WP42Parser.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */ |
2 | | /* libwpd |
3 | | * Version: MPL 2.0 / LGPLv2.1+ |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * Major Contributor(s): |
10 | | * Copyright (C) 2003 William Lachance (wrlach@gmail.com) |
11 | | * Copyright (C) 2003 Marc Maurer (uwog@uwog.net) |
12 | | * Copyright (C) 2006 Fridrich Strba (fridrich.strba@bluewin.ch) |
13 | | * |
14 | | * For minor contributions see the git repository. |
15 | | * |
16 | | * Alternatively, the contents of this file may be used under the terms |
17 | | * of the GNU Lesser General Public License Version 2.1 or later |
18 | | * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are |
19 | | * applicable instead of those above. |
20 | | * |
21 | | * For further information visit http://libwpd.sourceforge.net |
22 | | */ |
23 | | |
24 | | /* "This product is not manufactured, approved, or supported by |
25 | | * Corel Corporation or Corel Corporation Limited." |
26 | | */ |
27 | | |
28 | | #include "WP42Parser.h" |
29 | | |
30 | | #include <memory> |
31 | | |
32 | | #include "WP42Part.h" |
33 | | #include "WPXHeader.h" |
34 | | #include "libwpd_internal.h" |
35 | | #include "WPXTable.h" |
36 | | #include "WP42FileStructure.h" |
37 | | #include "WP42StylesListener.h" |
38 | | #include "WP42ContentListener.h" |
39 | | |
40 | | WP42Parser::WP42Parser(librevenge::RVNGInputStream *input, WPXEncryption *encryption) : |
41 | 873 | WPXParser(input, nullptr, encryption) |
42 | 873 | { |
43 | 873 | } |
44 | | |
45 | | WP42Parser::~WP42Parser() |
46 | 873 | { |
47 | 873 | } |
48 | | |
49 | | void WP42Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP42Listener *listener) |
50 | 1.61k | { |
51 | 1.61k | listener->startDocument(); |
52 | | |
53 | 1.61k | input->seek(0, librevenge::RVNG_SEEK_SET); |
54 | | |
55 | 1.61k | WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell())); |
56 | | |
57 | 1.61k | parseDocument(input, encryption, listener); |
58 | | |
59 | 1.61k | listener->endDocument(); |
60 | 1.61k | } |
61 | | |
62 | | // parseDocument: parses a document body (may call itself recursively, on other streams, or itself) |
63 | | void WP42Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP42Listener *listener) |
64 | 66.6k | { |
65 | 6.06M | while (!input->isEnd()) |
66 | 6.00M | { |
67 | 6.00M | unsigned char readVal; |
68 | 6.00M | readVal = readU8(input, encryption); |
69 | | |
70 | 6.00M | if (readVal < (unsigned char)0x20) |
71 | 2.14M | { |
72 | 2.14M | WPD_DEBUG_MSG(("Offset: %i, Handling Control Character 0x%2x\n", (unsigned int)input->tell(), readVal)); |
73 | | |
74 | 2.14M | switch (readVal) |
75 | 2.14M | { |
76 | 2.56k | case 0x09: // tab |
77 | 2.56k | listener->insertTab(0, 0.0); |
78 | 2.56k | break; |
79 | 49.1k | case 0x0A: // hard new line |
80 | 49.1k | listener->insertEOL(); |
81 | 49.1k | break; |
82 | 2.32k | case 0x0B: // soft new page |
83 | 2.32k | listener->insertBreak(WPX_SOFT_PAGE_BREAK); |
84 | 2.32k | break; |
85 | 1.94M | case 0x0C: // hard new page |
86 | 1.94M | listener->insertBreak(WPX_PAGE_BREAK); |
87 | 1.94M | break; |
88 | 1.86k | case 0x0D: // soft new line |
89 | 1.86k | listener->insertCharacter(' '); |
90 | 1.86k | break; |
91 | 143k | default: |
92 | | // unsupported or undocumented token, ignore |
93 | 143k | break; |
94 | 2.14M | } |
95 | 2.14M | } |
96 | 3.85M | else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7F) |
97 | 3.67M | { |
98 | 3.67M | WPD_DEBUG_MSG(("Offset: %i, Handling Ascii Character 0x%2x\n", (unsigned int)input->tell(), readVal)); |
99 | | |
100 | | // normal ASCII characters |
101 | 3.67M | listener->insertCharacter(readVal); |
102 | 3.67M | } |
103 | 178k | else if (readVal >= (unsigned char)0x80 && readVal <= (unsigned char)0xBF) |
104 | 12.0k | { |
105 | 12.0k | WPD_DEBUG_MSG(("Offset: %i, Handling Single Character Function 0x%2x\n", (unsigned int)input->tell(), readVal)); |
106 | | |
107 | | // single character function codes |
108 | 12.0k | switch (readVal) |
109 | 12.0k | { |
110 | 807 | case 0x92: |
111 | 807 | listener->attributeChange(true, WP42_ATTRIBUTE_STRIKE_OUT); |
112 | 807 | break; |
113 | 243 | case 0x93: |
114 | 243 | listener->attributeChange(false, WP42_ATTRIBUTE_STRIKE_OUT); |
115 | 243 | break; |
116 | 369 | case 0x94: |
117 | 369 | listener->attributeChange(true, WP42_ATTRIBUTE_UNDERLINE); |
118 | 369 | break; |
119 | 257 | case 0x95: |
120 | 257 | listener->attributeChange(false, WP42_ATTRIBUTE_UNDERLINE); |
121 | 257 | break; |
122 | | |
123 | 320 | case 0x90: |
124 | 320 | listener->attributeChange(true, WP42_ATTRIBUTE_REDLINE); |
125 | 320 | break; |
126 | 215 | case 0x91: |
127 | 215 | listener->attributeChange(false, WP42_ATTRIBUTE_REDLINE); |
128 | 215 | break; |
129 | | |
130 | 419 | case 0x9C: |
131 | 419 | listener->attributeChange(false, WP42_ATTRIBUTE_BOLD); |
132 | 419 | break; |
133 | 288 | case 0x9D: |
134 | 288 | listener->attributeChange(true, WP42_ATTRIBUTE_BOLD); |
135 | 288 | break; |
136 | | |
137 | 836 | case 0xB2: |
138 | 836 | listener->attributeChange(true, WP42_ATTRIBUTE_ITALICS); |
139 | 836 | break; |
140 | 219 | case 0xB3: |
141 | 219 | listener->attributeChange(false, WP42_ATTRIBUTE_ITALICS); |
142 | 219 | break; |
143 | 3.36k | case 0xB4: |
144 | 3.36k | listener->attributeChange(true, WP42_ATTRIBUTE_SHADOW); |
145 | 3.36k | break; |
146 | 214 | case 0xB5: |
147 | 214 | listener->attributeChange(false, WP42_ATTRIBUTE_SHADOW); |
148 | 214 | break; |
149 | | |
150 | 4.45k | default: |
151 | | // unsupported or undocumented token, ignore |
152 | 4.45k | break; |
153 | 12.0k | } |
154 | 12.0k | } |
155 | 166k | else if (readVal >= (unsigned char)0xC0 && readVal <= (unsigned char)0xFE) |
156 | 152k | { |
157 | 152k | std::unique_ptr<WP42Part> part(WP42Part::constructPart(input, encryption, readVal)); |
158 | 152k | if (part) |
159 | 151k | part->parse(listener); |
160 | 152k | } |
161 | | // ignore the rest since they are not documented and at least 0xFF is a special character that |
162 | | // marks end of variable length part in variable length multi-byte functions |
163 | 6.00M | } |
164 | 66.6k | } |
165 | | |
166 | | void WP42Parser::parse(librevenge::RVNGTextInterface *documentInterface) |
167 | 873 | { |
168 | 873 | librevenge::RVNGInputStream *input = getInput(); |
169 | 873 | WPXEncryption *encryption = getEncryption(); |
170 | 873 | std::list<WPXPageSpan> pageList; |
171 | | |
172 | 873 | try |
173 | 873 | { |
174 | | // do a "first-pass" parse of the document |
175 | | // gather table border information, page properties (per-page) |
176 | 873 | WP42StylesListener stylesListener(pageList); |
177 | 873 | parse(input, encryption, &stylesListener); |
178 | | |
179 | | // postprocess the pageList == remove duplicate page spans due to the page breaks |
180 | 873 | auto previousPage = pageList.begin(); |
181 | 958k | for (auto Iter=pageList.begin(); Iter != pageList.end();) |
182 | 957k | { |
183 | 957k | if ((Iter != previousPage) && ((*previousPage)==(*Iter))) |
184 | 892k | { |
185 | 892k | (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan()); |
186 | 892k | Iter = pageList.erase(Iter); |
187 | 892k | } |
188 | 64.6k | else |
189 | 64.6k | { |
190 | 64.6k | previousPage = Iter; |
191 | 64.6k | ++Iter; |
192 | 64.6k | } |
193 | 957k | } |
194 | | |
195 | | // second pass: here is where we actually send the messages to the target app |
196 | | // that are necessary to emit the body of the target document |
197 | 873 | WP42ContentListener listener(pageList, documentInterface); |
198 | 873 | parse(input, encryption, &listener); |
199 | 873 | } |
200 | 873 | catch (FileException) |
201 | 873 | { |
202 | 130 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
203 | 130 | throw FileException(); |
204 | 130 | } |
205 | | |
206 | 873 | } |
207 | | |
208 | | void WP42Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface) |
209 | 0 | { |
210 | 0 | std::list<WPXPageSpan> pageList; |
211 | |
|
212 | 0 | librevenge::RVNGInputStream *input = getInput(); |
213 | |
|
214 | 0 | try |
215 | 0 | { |
216 | 0 | WP42StylesListener stylesListener(pageList); |
217 | 0 | stylesListener.startSubDocument(); |
218 | 0 | parseDocument(input, nullptr, &stylesListener); |
219 | 0 | stylesListener.endSubDocument(); |
220 | |
|
221 | 0 | WP42ContentListener listener(pageList, documentInterface); |
222 | 0 | listener.startSubDocument(); |
223 | 0 | parseDocument(input, nullptr, &listener); |
224 | 0 | listener.endSubDocument(); |
225 | 0 | } |
226 | 0 | catch (FileException) |
227 | 0 | { |
228 | 0 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
229 | 0 | throw FileException(); |
230 | 0 | } |
231 | 0 | } |
232 | | /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */ |