/src/libwpd/src/lib/WP3Parser.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */ |
2 | | /* libwpd |
3 | | * Version: MPL 2.0 / LGPLv2.1+ |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * Major Contributor(s): |
10 | | * Copyright (C) 2004 Marc Maurer (uwog@uwog.net) |
11 | | * Copyright (C) 2004 Fridrich Strba (fridrich.strba@bluewin.ch) |
12 | | * |
13 | | * For minor contributions see the git repository. |
14 | | * |
15 | | * Alternatively, the contents of this file may be used under the terms |
16 | | * of the GNU Lesser General Public License Version 2.1 or later |
17 | | * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are |
18 | | * applicable instead of those above. |
19 | | * |
20 | | * For further information visit http://libwpd.sourceforge.net |
21 | | */ |
22 | | |
23 | | /* "This product is not manufactured, approved, or supported by |
24 | | * Corel Corporation or Corel Corporation Limited." |
25 | | */ |
26 | | |
27 | | #include "WP3Parser.h" |
28 | | |
29 | | #include <memory> |
30 | | |
31 | | #include "WPXHeader.h" |
32 | | #include "WP3Part.h" |
33 | | #include "WP3ContentListener.h" |
34 | | #include "WP3StylesListener.h" |
35 | | #include "WP3ResourceFork.h" |
36 | | #include "libwpd_internal.h" |
37 | | #include "WPXTable.h" |
38 | | #include "WPXTableList.h" |
39 | | |
40 | | WP3Parser::WP3Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) : |
41 | 5.00k | WPXParser(input, header, encryption) |
42 | 5.00k | { |
43 | 5.00k | } |
44 | | |
45 | | WP3Parser::~WP3Parser() |
46 | 5.00k | { |
47 | 5.00k | } |
48 | | |
49 | | WP3ResourceFork *WP3Parser::getResourceFork(librevenge::RVNGInputStream *input, WPXEncryption *encryption) |
50 | 5.00k | { |
51 | | // Certain WP2 documents actually don't contain resource fork, so check for its existence |
52 | 5.00k | if (!getHeader() || getHeader()->getDocumentOffset() <= 0x10) |
53 | 4.65k | { |
54 | 4.65k | WPD_DEBUG_MSG(("WP3Parser: Document does not contain resource fork\n")); |
55 | 4.65k | return nullptr; |
56 | 4.65k | } |
57 | | |
58 | 356 | return new WP3ResourceFork(input, encryption); |
59 | 5.00k | } |
60 | | |
61 | | void WP3Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener) |
62 | 9.34k | { |
63 | 9.34k | listener->startDocument(); |
64 | | |
65 | 9.34k | input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET); |
66 | | |
67 | 9.34k | WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell())); |
68 | | |
69 | 9.34k | parseDocument(input, encryption, listener); |
70 | | |
71 | 9.34k | listener->endDocument(); |
72 | 9.34k | } |
73 | | |
74 | | // parseDocument: parses a document body (may call itself recursively, on other streams, or itself) |
75 | | void WP3Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener) |
76 | 205k | { |
77 | 97.0M | while (!input->isEnd()) |
78 | 96.8M | { |
79 | 96.8M | unsigned char readVal; |
80 | 96.8M | readVal = readU8(input, encryption); |
81 | | |
82 | 96.8M | if (readVal == 0 || readVal == 0x7F || readVal == 0xFF) |
83 | 28.9M | { |
84 | | // FIXME: VERIFY: is this IF clause correct? (0xFF seems to be OK at least) |
85 | | // do nothing: this token is meaningless and is likely just corruption |
86 | 28.9M | } |
87 | 67.8M | else if (readVal >= (unsigned char)0x01 && readVal <= (unsigned char)0x1F) |
88 | 21.2M | { |
89 | | // control characters ? |
90 | 21.2M | } |
91 | 46.6M | else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7E) |
92 | 20.8M | { |
93 | 20.8M | listener->insertCharacter(readVal); |
94 | 20.8M | } |
95 | 25.7M | else |
96 | 25.7M | { |
97 | 25.7M | std::unique_ptr<WP3Part> part(WP3Part::constructPart(input, encryption, readVal)); |
98 | 25.7M | if (part) |
99 | 4.61M | part->parse(listener); |
100 | 25.7M | } |
101 | 96.8M | } |
102 | 205k | } |
103 | | |
104 | | void WP3Parser::parse(librevenge::RVNGTextInterface *textInterface) |
105 | 5.00k | { |
106 | 5.00k | librevenge::RVNGInputStream *input = getInput(); |
107 | 5.00k | WPXEncryption *encryption = getEncryption(); |
108 | 5.00k | std::list<WPXPageSpan> pageList; |
109 | 5.00k | WPXTableList tableList; |
110 | | |
111 | 5.00k | try |
112 | 5.00k | { |
113 | 5.00k | const std::unique_ptr<WP3ResourceFork> resourceFork{getResourceFork(input, encryption)}; |
114 | | |
115 | | // do a "first-pass" parse of the document |
116 | | // gather table border information, page properties (per-page) |
117 | 5.00k | WP3StylesListener stylesListener(pageList, tableList); |
118 | 5.00k | stylesListener.setResourceFork(resourceFork.get()); |
119 | 5.00k | parse(input, encryption, &stylesListener); |
120 | | |
121 | | // postprocess the pageList == remove duplicate page spans due to the page breaks |
122 | 5.00k | auto previousPage = pageList.begin(); |
123 | 35.3k | for (auto Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */) |
124 | 30.3k | { |
125 | 30.3k | if ((Iter != previousPage) && (*previousPage==*Iter)) |
126 | 18.8k | { |
127 | 18.8k | (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan()); |
128 | 18.8k | Iter = pageList.erase(Iter); |
129 | 18.8k | } |
130 | 11.4k | else |
131 | 11.4k | { |
132 | 11.4k | previousPage = Iter; |
133 | 11.4k | ++Iter; |
134 | 11.4k | } |
135 | 30.3k | } |
136 | | |
137 | | // second pass: here is where we actually send the messages to the target app |
138 | | // that are necessary to emit the body of the target document |
139 | 5.00k | WP3ContentListener listener(pageList, textInterface); // FIXME: SHOULD BE CONTENT_LISTENER, AND SHOULD BE PASSED TABLE DATA! |
140 | 5.00k | listener.setResourceFork(resourceFork.get()); |
141 | 5.00k | parse(input, encryption, &listener); |
142 | 5.00k | } |
143 | 5.00k | catch (FileException) |
144 | 5.00k | { |
145 | 515 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
146 | 515 | throw FileException(); |
147 | 515 | } |
148 | 5.00k | } |
149 | | |
150 | | void WP3Parser::parseSubDocument(librevenge::RVNGTextInterface *textInterface) |
151 | 0 | { |
152 | 0 | std::list<WPXPageSpan> pageList; |
153 | 0 | WPXTableList tableList; |
154 | |
|
155 | 0 | librevenge::RVNGInputStream *input = getInput(); |
156 | |
|
157 | 0 | try |
158 | 0 | { |
159 | 0 | WP3StylesListener stylesListener(pageList, tableList); |
160 | 0 | stylesListener.startSubDocument(); |
161 | 0 | parseDocument(input, nullptr, &stylesListener); |
162 | 0 | stylesListener.endSubDocument(); |
163 | |
|
164 | 0 | input->seek(0, librevenge::RVNG_SEEK_SET); |
165 | |
|
166 | 0 | WP3ContentListener listener(pageList, textInterface); |
167 | 0 | listener.startSubDocument(); |
168 | 0 | parseDocument(input, nullptr, &listener); |
169 | 0 | listener.endSubDocument(); |
170 | 0 | } |
171 | 0 | catch (FileException) |
172 | 0 | { |
173 | 0 | WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely.")); |
174 | 0 | throw FileException(); |
175 | 0 | } |
176 | 0 | } |
177 | | |
178 | | /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */ |