/src/libmwaw/src/lib/MacWrtProParser.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */ |
2 | | |
3 | | /* libmwaw |
4 | | * Version: MPL 2.0 / LGPLv2+ |
5 | | * |
6 | | * The contents of this file are subject to the Mozilla Public License Version |
7 | | * 2.0 (the "License"); you may not use this file except in compliance with |
8 | | * the License or as specified alternatively below. You may obtain a copy of |
9 | | * the License at http://www.mozilla.org/MPL/ |
10 | | * |
11 | | * Software distributed under the License is distributed on an "AS IS" basis, |
12 | | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
13 | | * for the specific language governing rights and limitations under the |
14 | | * License. |
15 | | * |
16 | | * Major Contributor(s): |
17 | | * Copyright (C) 2002 William Lachance (wrlach@gmail.com) |
18 | | * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net) |
19 | | * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch) |
20 | | * Copyright (C) 2006, 2007 Andrew Ziem |
21 | | * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr) |
22 | | * |
23 | | * |
24 | | * All Rights Reserved. |
25 | | * |
26 | | * For minor contributions see the git repository. |
27 | | * |
28 | | * Alternatively, the contents of this file may be used under the terms of |
29 | | * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"), |
30 | | * in which case the provisions of the LGPLv2+ are applicable |
31 | | * instead of those above. |
32 | | */ |
33 | | |
34 | | #include <algorithm> |
35 | | #include <iomanip> |
36 | | #include <iostream> |
37 | | #include <limits> |
38 | | #include <map> |
39 | | #include <memory> |
40 | | #include <set> |
41 | | #include <sstream> |
42 | | |
43 | | #include <librevenge/librevenge.h> |
44 | | |
45 | | #include "MWAWCell.hxx" |
46 | | #include "MWAWFontConverter.hxx" |
47 | | #include "MWAWHeader.hxx" |
48 | | #include "MWAWPosition.hxx" |
49 | | #include "MWAWPictMac.hxx" |
50 | | #include "MWAWPrinter.hxx" |
51 | | #include "MWAWStream.hxx" |
52 | | #include "MWAWStringStream.hxx" |
53 | | #include "MWAWSubDocument.hxx" |
54 | | #include "MWAWTextListener.hxx" |
55 | | |
56 | | #include "MacWrtProStructures.hxx" |
57 | | |
58 | | #include "MacWrtProParser.hxx" |
59 | | |
60 | | /** Internal: the structures of a MacWrtProParser */ |
61 | | namespace MacWrtProParserInternal |
62 | | { |
63 | | //////////////////////////////////////// |
64 | | //! Internal: a struct used to store a zone |
65 | | struct Zone { |
66 | | Zone() |
67 | 19.7k | : m_type(-1) |
68 | 19.7k | , m_blockId(0) |
69 | 19.7k | , m_stream() |
70 | 19.7k | , m_parsed(false) |
71 | 19.7k | { |
72 | 19.7k | } |
73 | | ~Zone() |
74 | 19.7k | { |
75 | 19.7k | } |
76 | | |
77 | | //! the type : 0(text), 1(graphic) |
78 | | int m_type; |
79 | | |
80 | | //! the first block id |
81 | | int m_blockId; |
82 | | |
83 | | //! the storage |
84 | | std::shared_ptr<MWAWStream> m_stream; |
85 | | |
86 | | //! true if the zone is sended |
87 | | bool m_parsed; |
88 | | }; |
89 | | |
90 | | //! Internal: a struct used to store a text zone |
91 | | struct TextZoneData { |
92 | | TextZoneData() |
93 | 141k | : m_type(-1) |
94 | 141k | , m_length(0) |
95 | 141k | , m_id(0) |
96 | 141k | { |
97 | 141k | } |
98 | | friend std::ostream &operator<<(std::ostream &o, TextZoneData const &tData) |
99 | 0 | { |
100 | 0 | switch (tData.m_type) { |
101 | 0 | case 0: |
102 | 0 | o << "C" << tData.m_id << ","; |
103 | 0 | break; |
104 | 0 | case 1: |
105 | 0 | o << "P" << tData.m_id << ","; |
106 | 0 | break; |
107 | 0 | default: |
108 | 0 | o << "type=" << tData.m_type << ",id=" << tData.m_id << ","; |
109 | 0 | break; |
110 | 0 | } |
111 | 0 | o << "nC=" << tData.m_length << ","; |
112 | 0 | return o; |
113 | 0 | } |
114 | | //! the type |
115 | | int m_type; |
116 | | //! the text length |
117 | | int m_length; |
118 | | //! an id |
119 | | int m_id; |
120 | | }; |
121 | | |
122 | | //! Internal: a struct used to store a text zone |
123 | | struct Token { |
124 | | Token() |
125 | 9.79k | : m_type(-1) |
126 | 9.79k | , m_length(0) |
127 | 9.79k | , m_blockId(-1) |
128 | 9.79k | , m_box() |
129 | 9.79k | { |
130 | 39.1k | for (auto &fl : m_flags) fl = 0; |
131 | 9.79k | } |
132 | | //! operator<< |
133 | | friend std::ostream &operator<<(std::ostream &o, Token const &tkn) |
134 | 0 | { |
135 | 0 | o << "nC=" << tkn.m_length << ","; |
136 | 0 | switch (tkn.m_type) { |
137 | 0 | case 1: |
138 | 0 | o << "pagenumber,"; |
139 | 0 | break; |
140 | 0 | case 2: |
141 | 0 | o << "footnote(pos),"; |
142 | 0 | break; |
143 | 0 | case 3: |
144 | 0 | o << "footnote(content),"; |
145 | 0 | break; |
146 | 0 | case 4: |
147 | 0 | o << "figure,"; |
148 | 0 | break; |
149 | 0 | case 5: |
150 | 0 | o << "hyphen,"; |
151 | 0 | break; |
152 | 0 | case 6: |
153 | 0 | o << "date,"; |
154 | 0 | break; |
155 | 0 | case 7: |
156 | 0 | o << "time,"; |
157 | 0 | break; |
158 | 0 | case 8: |
159 | 0 | o << "title,"; |
160 | 0 | break; |
161 | 0 | case 9: |
162 | 0 | o << "revision,"; |
163 | 0 | break; |
164 | 0 | case 10: |
165 | 0 | o << "sectionnumber,"; |
166 | 0 | break; |
167 | 0 | default: |
168 | 0 | o << "#type=" << tkn.m_type << ","; |
169 | 0 | } |
170 | 0 | if (tkn.m_blockId >= 0) o << "blockId=" << tkn.m_blockId << ","; |
171 | 0 | for (int i = 0; i < 4; i++) { |
172 | 0 | if (tkn.m_flags[i]) o << "fl" << i << "=" << std::hex << tkn.m_flags[i] << ","; |
173 | 0 | } |
174 | 0 | return o; |
175 | 0 | } |
176 | | //! the type |
177 | | int m_type; |
178 | | //! the text length |
179 | | int m_length; |
180 | | //! the block id |
181 | | int m_blockId; |
182 | | //! the bdbox ( filled in MWII for figure) |
183 | | MWAWBox2f m_box; |
184 | | //! some flags |
185 | | unsigned int m_flags[4]; |
186 | | }; |
187 | | |
188 | | //! Internal: a struct used to store a text zone |
189 | | struct TextZone { |
190 | | TextZone() |
191 | 17.0k | : m_textLength(0) |
192 | 17.0k | , m_entries() |
193 | 17.0k | , m_tokens() |
194 | 17.0k | , m_parsed(false) |
195 | 17.0k | { |
196 | 17.0k | } |
197 | | |
198 | | //! the text length |
199 | | int m_textLength; |
200 | | |
201 | | //! the list of entries |
202 | | std::vector<MWAWEntry> m_entries; |
203 | | |
204 | | //! two vector list of id ( charIds, paragraphIds) |
205 | | std::vector<TextZoneData> m_ids[2]; |
206 | | |
207 | | //! the tokens list |
208 | | std::vector<Token> m_tokens; |
209 | | |
210 | | //! true if the zone is sended |
211 | | bool m_parsed; |
212 | | }; |
213 | | |
214 | | |
215 | | //////////////////////////////////////// |
216 | | //! Internal: the state of a MacWrtProParser |
217 | | struct State { |
218 | | //! constructor |
219 | | State() |
220 | 86.9k | : m_parsedBlocks() |
221 | 86.9k | , m_dataMap() |
222 | 86.9k | , m_textMap() |
223 | 86.9k | , m_graphicIdsCallByTokens() |
224 | 86.9k | , m_fileNumPages(0) |
225 | 86.9k | , m_col(1) |
226 | 86.9k | , m_colSeparator(0.16667) |
227 | 86.9k | , m_actPage(0) |
228 | 86.9k | , m_numPages(0) |
229 | 86.9k | , m_hasTitlePage(false) |
230 | 86.9k | { |
231 | 86.9k | } |
232 | | |
233 | | //! the list of retrieved block : block |
234 | | std::set<int> m_parsedBlocks; |
235 | | |
236 | | //! the list of blockId->data zone |
237 | | std::map<int, std::shared_ptr<Zone> > m_dataMap; |
238 | | |
239 | | //! the list of blockId->text zone |
240 | | std::map<int, std::shared_ptr<TextZone> > m_textMap; |
241 | | |
242 | | //! the list of graphicId called by tokens |
243 | | std::vector<int> m_graphicIdsCallByTokens; |
244 | | |
245 | | int m_fileNumPages /** the number of page in MWII */; |
246 | | int m_col /** the number of columns in MWII */; |
247 | | double m_colSeparator /** the columns separator in inch MWII */; |
248 | | int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */; |
249 | | bool m_hasTitlePage /** flag to know if we have a title page */; |
250 | | }; |
251 | | |
252 | | //////////////////////////////////////// |
253 | | //! Internal: the subdocument of a MacWrtProParser |
254 | | class SubDocument final : public MWAWSubDocument |
255 | | { |
256 | | public: |
257 | | SubDocument(MacWrtProParser &pars, MWAWInputStreamPtr const &input, int zoneId) |
258 | 19.3k | : MWAWSubDocument(&pars, input, MWAWEntry()) |
259 | 19.3k | , m_id(zoneId) |
260 | 19.3k | { |
261 | 19.3k | } |
262 | | |
263 | | //! destructor |
264 | 0 | ~SubDocument() final {} |
265 | | |
266 | | //! operator!= |
267 | | bool operator!=(MWAWSubDocument const &doc) const final; |
268 | | |
269 | | //! the parser function |
270 | | void parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType type) final; |
271 | | |
272 | | protected: |
273 | | //! the subdocument id |
274 | | int m_id; |
275 | | }; |
276 | | |
277 | | void SubDocument::parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType /*type*/) |
278 | 16.9k | { |
279 | 16.9k | if (m_id == -3) return; // empty block |
280 | 16.9k | if (!listener.get()) { |
281 | 0 | MWAW_DEBUG_MSG(("MacWrtProParserInternal::SubDocument::parse: no listener\n")); |
282 | 0 | return; |
283 | 0 | } |
284 | 16.9k | auto *parser = dynamic_cast<MacWrtProParser *>(m_parser); |
285 | 16.9k | if (!parser) { |
286 | 0 | MWAW_DEBUG_MSG(("MacWrtProParserInternal::SubDocument::parse: no parser\n")); |
287 | 0 | return; |
288 | 0 | } |
289 | | |
290 | 16.9k | long pos = m_input->tell(); |
291 | 16.9k | if (parser->m_structures.get()) |
292 | 16.9k | parser->m_structures->send(m_id); |
293 | 16.9k | m_input->seek(pos, librevenge::RVNG_SEEK_SET); |
294 | 16.9k | } |
295 | | |
296 | | bool SubDocument::operator!=(MWAWSubDocument const &doc) const |
297 | 42 | { |
298 | 42 | if (MWAWSubDocument::operator!=(doc)) return true; |
299 | 42 | auto const *sDoc = dynamic_cast<SubDocument const *>(&doc); |
300 | 42 | if (!sDoc) return true; |
301 | 42 | if (m_id != sDoc->m_id) return true; |
302 | 20 | return false; |
303 | 42 | } |
304 | | } |
305 | | |
306 | | |
307 | | //////////////////////////////////////////////////////////// |
308 | | // constructor/destructor, ... |
309 | | //////////////////////////////////////////////////////////// |
310 | | MacWrtProParser::MacWrtProParser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header) |
311 | 36.5k | : MWAWTextParser(input, rsrcParser, header) |
312 | 36.5k | , m_state() |
313 | 36.5k | , m_structures() |
314 | 36.5k | { |
315 | 36.5k | init(); |
316 | 36.5k | } |
317 | | |
318 | | MacWrtProParser::~MacWrtProParser() |
319 | 36.5k | { |
320 | 36.5k | } |
321 | | |
322 | | void MacWrtProParser::init() |
323 | 36.5k | { |
324 | 36.5k | resetTextListener(); |
325 | 36.5k | setAsciiName("main-1"); |
326 | | |
327 | 36.5k | m_state.reset(new MacWrtProParserInternal::State); |
328 | 36.5k | m_structures.reset(new MacWrtProStructures(*this)); |
329 | | |
330 | | // reduce the margin (in case, the page is not defined) |
331 | 36.5k | getPageSpan().setMargins(0.1); |
332 | 36.5k | } |
333 | | |
334 | | //////////////////////////////////////////////////////////// |
335 | | // position and height |
336 | | //////////////////////////////////////////////////////////// |
337 | | int MacWrtProParser::numColumns(double &sep) const |
338 | 5.21k | { |
339 | 5.21k | sep=m_state->m_colSeparator; |
340 | 5.21k | if (m_state->m_col <= 1) return 1; |
341 | 2.72k | return m_state->m_col; |
342 | 5.21k | } |
343 | | |
344 | | //////////////////////////////////////////////////////////// |
345 | | // new page |
346 | | //////////////////////////////////////////////////////////// |
347 | | void MacWrtProParser::newPage(int number, bool softBreak) |
348 | 210k | { |
349 | 210k | if (number <= m_state->m_actPage) return; |
350 | 210k | if (number > m_state->m_numPages) { |
351 | 193k | MWAW_DEBUG_MSG(("MacWrtProParser::newPage: can not create new page\n")); |
352 | 193k | return; |
353 | 193k | } |
354 | | |
355 | 35.0k | while (m_state->m_actPage < number) { |
356 | 17.5k | m_state->m_actPage++; |
357 | 17.5k | if (!getTextListener() || m_state->m_actPage == 1) |
358 | 13.3k | continue; |
359 | 4.15k | if (softBreak) |
360 | 866 | getTextListener()->insertBreak(MWAWTextListener::SoftPageBreak); |
361 | 3.29k | else |
362 | 3.29k | getTextListener()->insertBreak(MWAWTextListener::PageBreak); |
363 | 4.15k | } |
364 | 17.5k | } |
365 | | |
366 | | bool MacWrtProParser::hasTitlePage() const |
367 | 3.16k | { |
368 | 3.16k | return m_state->m_hasTitlePage; |
369 | 3.16k | } |
370 | | |
371 | | std::vector<int> const &MacWrtProParser::getGraphicIdCalledByToken() const |
372 | 10.4k | { |
373 | 10.4k | return m_state->m_graphicIdsCallByTokens; |
374 | 10.4k | } |
375 | | |
376 | | std::shared_ptr<MWAWSubDocument> MacWrtProParser::getSubDocument(int blockId) |
377 | 13.2k | { |
378 | 13.2k | return std::make_shared<MacWrtProParserInternal::SubDocument>(*this, getInput(), blockId); |
379 | 13.2k | } |
380 | | |
381 | | //////////////////////////////////////////////////////////// |
382 | | // the parser |
383 | | //////////////////////////////////////////////////////////// |
384 | | void MacWrtProParser::parse(librevenge::RVNGTextInterface *docInterface) |
385 | 13.9k | { |
386 | 13.9k | if (!getInput().get() || !checkHeader(nullptr)) throw(libmwaw::ParseException()); |
387 | 13.9k | bool ok = true; |
388 | 13.9k | try { |
389 | 13.9k | m_state->m_parsedBlocks.clear(); |
390 | | |
391 | | // create the asciiFile |
392 | 13.9k | ascii().setStream(getInput()); |
393 | 13.9k | ascii().open(asciiName()); |
394 | | |
395 | 13.9k | checkHeader(nullptr); |
396 | | |
397 | 13.9k | ok = createZones(); |
398 | 13.9k | if (ok) { |
399 | 13.6k | createDocument(docInterface); |
400 | 13.6k | if (m_structures) { |
401 | 13.6k | m_structures->sendMainZone(); |
402 | 13.6k | m_structures->flushExtra(); |
403 | 13.6k | } |
404 | 13.6k | } |
405 | | |
406 | | #ifdef DEBUG |
407 | | if (version()>0) { |
408 | | std::vector<int> freeList; |
409 | | getFreeZoneList(freeList); |
410 | | for (auto bl : freeList) { |
411 | | ascii().addPos((bl-1)*0x100); |
412 | | ascii().addNote("Entries(Free)"); |
413 | | } |
414 | | } |
415 | | checkUnparsed(); |
416 | | #endif |
417 | | |
418 | 13.9k | ascii().reset(); |
419 | 13.9k | } |
420 | 13.9k | catch (...) { |
421 | 234 | MWAW_DEBUG_MSG(("MacWrtProParser::parse: exception catched when parsing\n")); |
422 | 234 | ok = false; |
423 | 234 | } |
424 | | |
425 | 13.9k | resetTextListener(); |
426 | 13.9k | if (!ok) throw(libmwaw::ParseException()); |
427 | 13.9k | } |
428 | | |
429 | | //////////////////////////////////////////////////////////// |
430 | | // stream (internal) |
431 | | //////////////////////////////////////////////////////////// |
432 | | std::shared_ptr<MWAWStream> MacWrtProParser::getStreamForBlock(int block) |
433 | 33.7k | { |
434 | 33.7k | MWAWInputStreamPtr input=getInput(); |
435 | 33.7k | std::shared_ptr<MWAWStream> res; |
436 | 33.7k | int const vers=version(); |
437 | 33.7k | if (block<3 || !input->checkPosition(0x100*block)) { |
438 | 1.63k | MWAW_DEBUG_MSG(("MacWrtProParser::getStreamForBlock: unexpected block=%d\n", block)); |
439 | 1.63k | return res; |
440 | 1.63k | } |
441 | 32.0k | int const linkSz = vers<= 0 ? 2 : 4; |
442 | 32.0k | input->seek(0x100*block-linkSz, librevenge::RVNG_SEEK_SET); |
443 | 32.0k | if (input->readULong(linkSz)==0) { |
444 | 12.6k | m_state->m_parsedBlocks.insert(block-1); |
445 | 12.6k | input->seek(0x100*(block-1), librevenge::RVNG_SEEK_SET); |
446 | 12.6k | res.reset(new MWAWStream(input, ascii())); |
447 | 12.6k | res->m_bof=0x100*(block-1); |
448 | 12.6k | res->m_eof=0x100*block-linkSz; |
449 | 12.6k | return res; |
450 | 12.6k | } |
451 | 19.4k | int const fBlock=block; |
452 | 19.4k | int numBlock=1, decal=0; |
453 | 19.4k | std::shared_ptr<MWAWStringStream> resInput; |
454 | 90.9k | while (true) { |
455 | 90.9k | --block; // block i : (i-1)00..i00 |
456 | 90.9k | if (numBlock<=0 || block<2 || unsigned(block)+unsigned(numBlock)>=std::numeric_limits<int>::max() || |
457 | 90.5k | !input->checkPosition(0x100*unsigned(block+numBlock))) { |
458 | 10.5k | MWAW_DEBUG_MSG(("MacWrtProParser::getStreamForBlock: can not read some block %dx%d\n", block, numBlock)); |
459 | 10.5k | break; |
460 | 10.5k | } |
461 | 80.3k | bool ok=true; |
462 | 276k | for (int bl=block; bl<block+numBlock; ++bl) { |
463 | 196k | if (m_state->m_parsedBlocks.find(bl)!=m_state->m_parsedBlocks.end()) { |
464 | 433 | MWAW_DEBUG_MSG(("MacWrtProParser::getStreamForBlock: block %d is already m_state->m_parsedBlockss\n", bl)); |
465 | 433 | ok=false; |
466 | 433 | break; |
467 | 433 | } |
468 | 195k | m_state->m_parsedBlocks.insert(bl); |
469 | 195k | } |
470 | 80.3k | if (!ok) break; |
471 | 79.9k | ascii().skipZone(0x100*block, 0x100*(block+numBlock)-1); |
472 | 79.9k | input->seek(0x100*block+decal, librevenge::RVNG_SEEK_SET); |
473 | 79.9k | unsigned long read; |
474 | 79.9k | unsigned long sz=static_cast<unsigned long>(0x100*numBlock-linkSz-decal); |
475 | 79.9k | const unsigned char *dt = input->read(sz, read); |
476 | 79.9k | if (!dt || read != sz) { |
477 | 549 | MWAW_DEBUG_MSG(("MacWrtProParser::getStreamForBlock: can not read some data\n")); |
478 | 549 | break; |
479 | 549 | } |
480 | 79.4k | if (!resInput) |
481 | 19.4k | resInput.reset(new MWAWStringStream(dt, unsigned(sz))); |
482 | 59.9k | else |
483 | 59.9k | resInput->append(dt, unsigned(sz)); |
484 | 79.4k | decal=0; |
485 | 79.4k | numBlock=1; |
486 | 79.4k | block=int(input->readLong(linkSz)); |
487 | 79.4k | if (block==0) break; |
488 | 75.8k | if (block<0) { |
489 | 34.3k | block*=-1; |
490 | 34.3k | if (block<3 || !input->checkPosition(0x100*(block-1)+linkSz)) { |
491 | 4.28k | MWAW_DEBUG_MSG(("MacWrtProParser::getStreamForBlock: bad block %d\n", block)); |
492 | 4.28k | break; |
493 | 4.28k | } |
494 | 30.0k | input->seek(0x100*(block-1), librevenge::RVNG_SEEK_SET); |
495 | 30.0k | numBlock=int(input->readULong(linkSz)); |
496 | 30.0k | decal=linkSz; |
497 | 30.0k | } |
498 | 75.8k | } |
499 | 19.4k | if (!resInput) return res; |
500 | 19.4k | res.reset(new MWAWStream(std::make_shared<MWAWInputStream>(resInput, false))); |
501 | 19.4k | std::stringstream s; |
502 | 19.4k | s << "DataZone" << std::hex << fBlock << std::dec; |
503 | 19.4k | res->m_ascii.open(s.str()); |
504 | 19.4k | res->m_input->seek(0, librevenge::RVNG_SEEK_SET); |
505 | 19.4k | return res; |
506 | 19.4k | } |
507 | | |
508 | | //////////////////////////////////////////////////////////// |
509 | | // return the chain list of block ( used to get free blocks) |
510 | | //////////////////////////////////////////////////////////// |
511 | | bool MacWrtProParser::getFreeZoneList(std::vector<int> &blockLists) |
512 | 0 | { |
513 | 0 | blockLists.clear(); |
514 | 0 | MWAWInputStreamPtr input = getInput(); |
515 | 0 | if (!input->checkPosition(0x200) || version() <= 0) |
516 | 0 | return false; |
517 | 0 | input->seek(0x200-4, librevenge::RVNG_SEEK_SET); |
518 | 0 | int blockId=int(input->readULong(4)); |
519 | 0 | if (!blockId) return true; |
520 | | |
521 | 0 | if (blockId<2 || !input->checkPosition(blockId*0x100)) { |
522 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::getFreeZoneList: find a bad free block=%x\n", unsigned(blockId))); |
523 | 0 | return false; |
524 | 0 | } |
525 | 0 | libmwaw::DebugStream f; |
526 | 0 | f << "Entries(Free):"; |
527 | 0 | input->seek((blockId-1)*0x100, librevenge::RVNG_SEEK_SET); |
528 | | // FIXME: use (blockId)*0x100-4 to get the complete list when there is more than 63/64 free blocks |
529 | | // Note: the different blocks seems linked together N free block -> N-1 free block -> ... |
530 | 0 | for (int b=0; b<63; ++b) { // checkme limit=63 or 64 |
531 | 0 | int bId=int(input->readULong(4)); |
532 | 0 | if (bId==0) break; |
533 | 0 | if (bId<2 || !input->checkPosition(bId*0x100) || m_state->m_parsedBlocks.find(bId-1)!=m_state->m_parsedBlocks.end()) { |
534 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::getFreeZoneList: find a bad block %x\n", unsigned(bId))); |
535 | 0 | f << "###" << std::hex << bId << std::dec << ","; |
536 | 0 | break; |
537 | 0 | } |
538 | 0 | f << std::hex << bId << std::dec << ","; |
539 | 0 | blockLists.push_back(bId); |
540 | 0 | m_state->m_parsedBlocks.insert(bId-1); |
541 | 0 | } |
542 | 0 | ascii().addPos((blockId-1)*0x100); |
543 | 0 | ascii().addNote(f.str().c_str()); |
544 | 0 | if (input->tell()!=blockId*0x100) ascii().addDelimiter(input->tell(),'|'); |
545 | 0 | return blockLists.size() != 0; |
546 | 0 | } |
547 | | |
548 | | //////////////////////////////////////////////////////////// |
549 | | // create the document |
550 | | //////////////////////////////////////////////////////////// |
551 | | void MacWrtProParser::createDocument(librevenge::RVNGTextInterface *documentInterface) |
552 | 13.6k | { |
553 | 13.6k | if (!documentInterface) return; |
554 | 13.6k | if (getTextListener()) { |
555 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::createDocument: listener already exist\n")); |
556 | 0 | return; |
557 | 0 | } |
558 | | |
559 | | // update the page |
560 | 13.6k | m_state->m_actPage = 0; |
561 | 13.6k | int numPages = m_structures ? m_structures->numPages() : 0; |
562 | 13.6k | if (numPages <= 0) numPages = 1; |
563 | 13.6k | m_state->m_numPages = numPages; |
564 | | |
565 | | // create the page list |
566 | 13.6k | std::vector<MWAWPageSpan> pageList; |
567 | 31.3k | for (int i = 0; i < m_state->m_numPages;) { |
568 | 17.6k | MWAWPageSpan ps(getPageSpan()); |
569 | 17.6k | if (m_structures) |
570 | 17.6k | m_structures->updatePageSpan(i, m_state->m_hasTitlePage, ps); |
571 | 17.6k | pageList.push_back(ps); |
572 | 17.6k | i+=std::max<int>(1,ps.getPageSpan()); |
573 | 17.6k | } |
574 | | |
575 | | // |
576 | 13.6k | MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface)); |
577 | 13.6k | setTextListener(listen); |
578 | 13.6k | listen->startDocument(); |
579 | 13.6k | } |
580 | | |
581 | | |
582 | | //////////////////////////////////////////////////////////// |
583 | | // |
584 | | // Intermediate level |
585 | | // |
586 | | //////////////////////////////////////////////////////////// |
587 | | bool MacWrtProParser::createZones() |
588 | 13.9k | { |
589 | 13.9k | MWAWInputStreamPtr input = getInput(); |
590 | 13.9k | long pos = input->tell(); |
591 | | |
592 | 13.9k | if (!readPrintInfo()) { |
593 | | // can happens in some valid file |
594 | 6.93k | ascii().addPos(pos); |
595 | 6.93k | ascii().addNote("Entries(PrintInfo):###"); |
596 | 6.93k | input->seek(pos+0x78, librevenge::RVNG_SEEK_SET); |
597 | 6.93k | } |
598 | | |
599 | 13.9k | pos = input->tell(); |
600 | 13.9k | if (!readDocHeader()) { |
601 | 0 | ascii().addPos(pos); |
602 | 0 | ascii().addNote("##Entries(Data0)"); |
603 | 0 | } |
604 | | |
605 | | // ok now ask the structure manager to retrieve its data |
606 | 13.9k | auto stream=getStreamForBlock(3); |
607 | 13.9k | if (!stream) |
608 | 0 | return false; |
609 | 13.9k | return m_structures->createZones(stream, m_state->m_fileNumPages); |
610 | 13.9k | } |
611 | | |
612 | | //////////////////////////////////////////////////////////// |
613 | | // |
614 | | // Low level |
615 | | // |
616 | | //////////////////////////////////////////////////////////// |
617 | | |
618 | | //////////////////////////////////////////////////////////// |
619 | | // read the header |
620 | | //////////////////////////////////////////////////////////// |
621 | | bool MacWrtProParser::checkHeader(MWAWHeader *header, bool strict) |
622 | 50.4k | { |
623 | 50.4k | *m_state = MacWrtProParserInternal::State(); |
624 | | |
625 | 50.4k | MWAWInputStreamPtr input = getInput(); |
626 | 50.4k | if (!input || !input->hasDataFork()) |
627 | 0 | return false; |
628 | | |
629 | 50.4k | libmwaw::DebugStream f; |
630 | 50.4k | int const headerSize=4; |
631 | 50.4k | if (!input->checkPosition(0x300)) { |
632 | 313 | MWAW_DEBUG_MSG(("MacWrtProParser::checkHeader: file is too short\n")); |
633 | 313 | return false; |
634 | 313 | } |
635 | 50.1k | input->seek(0,librevenge::RVNG_SEEK_SET); |
636 | | |
637 | 50.1k | auto vers = static_cast<int>(input->readULong(2)); |
638 | 50.1k | auto val = static_cast<int>(input->readULong(2)); |
639 | | |
640 | 50.1k | f << "FileHeader:"; |
641 | 50.1k | switch (vers) { |
642 | 13.3k | case 0x2e: |
643 | 13.3k | vers = 0; |
644 | 13.3k | if (val != 0x2e) |
645 | 0 | return false; |
646 | 13.3k | break; |
647 | 36.7k | case 4: |
648 | 36.7k | vers = 1; |
649 | 36.7k | if (val != 4) { |
650 | | #ifdef DEBUG |
651 | | if (strict || val < 3 || val > 5) |
652 | | return false; |
653 | | f << "#unk=" << val << ","; |
654 | | #else |
655 | 0 | return false; |
656 | 0 | #endif |
657 | 0 | } |
658 | 36.7k | break; |
659 | 36.7k | default: |
660 | 9 | MWAW_DEBUG_MSG(("MacWrtProParser::checkHeader: unknown version\n")); |
661 | 9 | return false; |
662 | 50.1k | } |
663 | 50.1k | setVersion(vers); |
664 | 50.1k | f << "vers=" << vers << ","; |
665 | 50.1k | if (strict) { |
666 | 15.5k | if (vers) { |
667 | 11.2k | input->seek(0xdd, librevenge::RVNG_SEEK_SET); |
668 | | // "MP" seems always in this position |
669 | 11.2k | if (input->readULong(2) != 0x4d50) |
670 | 4.71k | return false; |
671 | 11.2k | } |
672 | 4.30k | else if (!readPrintInfo()) { // last chance, check DocHeader |
673 | 2.90k | input->seek(4+0x78+2, librevenge::RVNG_SEEK_SET); |
674 | 2.90k | val=static_cast<int>(input->readULong(2)); |
675 | 2.90k | if ((val&0x0280)!=0x0280) return false; |
676 | 4.34k | for (int i=0; i<4; ++i) { |
677 | 3.60k | val=static_cast<int>(input->readLong(1)); |
678 | 3.60k | if (val<-1 || val>1) return false; |
679 | 3.60k | } |
680 | 1.04k | } |
681 | 15.5k | } |
682 | | |
683 | | |
684 | | // ok, we can finish initialization |
685 | 43.2k | if (header) |
686 | 15.4k | header->reset(MWAWDocument::MWAW_T_MACWRITEPRO, version()); |
687 | | |
688 | | // |
689 | 43.2k | input->seek(headerSize, librevenge::RVNG_SEEK_SET); |
690 | | |
691 | 43.2k | ascii().addPos(0); |
692 | 43.2k | ascii().addNote(f.str().c_str()); |
693 | 43.2k | ascii().addPos(headerSize); |
694 | | |
695 | 43.2k | return true; |
696 | 50.1k | } |
697 | | |
698 | | //////////////////////////////////////////////////////////// |
699 | | // read the print info |
700 | | //////////////////////////////////////////////////////////// |
701 | | bool MacWrtProParser::readPrintInfo() |
702 | 18.2k | { |
703 | 18.2k | MWAWInputStreamPtr input = getInput(); |
704 | 18.2k | long pos = input->tell(); |
705 | 18.2k | libmwaw::DebugStream f; |
706 | | // print info |
707 | 18.2k | libmwaw::PrinterInfo info; |
708 | 18.2k | if (!info.read(input)) return false; |
709 | 9.93k | f << "Entries(PrintInfo):"<< info; |
710 | | |
711 | 9.93k | MWAWVec2i paperSize = info.paper().size(); |
712 | 9.93k | MWAWVec2i pageSize = info.page().size(); |
713 | 9.93k | if (pageSize.x() <= 0 || pageSize.y() <= 0 || |
714 | 9.05k | paperSize.x() <= 0 || paperSize.y() <= 0) return false; |
715 | | |
716 | | // define margin from print info |
717 | 8.37k | MWAWVec2i lTopMargin= -1 * info.paper().pos(0); |
718 | 8.37k | MWAWVec2i rBotMargin=info.paper().pos(1) - info.page().pos(1); |
719 | | |
720 | | // move margin left | top |
721 | 8.37k | int decalX = lTopMargin.x() > 14 ? lTopMargin.x()-14 : 0; |
722 | 8.37k | int decalY = lTopMargin.y() > 14 ? lTopMargin.y()-14 : 0; |
723 | 8.37k | lTopMargin -= MWAWVec2i(decalX, decalY); |
724 | 8.37k | rBotMargin += MWAWVec2i(decalX, decalY); |
725 | | |
726 | | // decrease right | bottom |
727 | 8.37k | int rightMarg = rBotMargin.x() -10; |
728 | 8.37k | if (rightMarg < 0) rightMarg=0; |
729 | 8.37k | int botMarg = rBotMargin.y() -10; |
730 | 8.37k | if (botMarg < 0) botMarg=0; |
731 | | |
732 | 8.37k | getPageSpan().setMarginTop(lTopMargin.y()/72.0); |
733 | 8.37k | getPageSpan().setMarginBottom(botMarg/72.0); |
734 | 8.37k | getPageSpan().setMarginLeft(lTopMargin.x()/72.0); |
735 | 8.37k | getPageSpan().setMarginRight(rightMarg/72.0); |
736 | 8.37k | getPageSpan().setFormLength(paperSize.y()/72.); |
737 | 8.37k | getPageSpan().setFormWidth(paperSize.x()/72.); |
738 | | |
739 | 8.37k | ascii().addPos(pos); |
740 | 8.37k | ascii().addNote(f.str().c_str()); |
741 | 8.37k | input->seek(pos+0x78, librevenge::RVNG_SEEK_SET); |
742 | 8.37k | if (long(input->tell()) != pos+0x78) { |
743 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::readPrintInfo: file is too short\n")); |
744 | 0 | return false; |
745 | 0 | } |
746 | 8.37k | ascii().addPos(input->tell()); |
747 | | |
748 | 8.37k | return true; |
749 | 8.37k | } |
750 | | |
751 | | //////////////////////////////////////////////////////////// |
752 | | // read the document header |
753 | | //////////////////////////////////////////////////////////// |
754 | | bool MacWrtProParser::readDocHeader() |
755 | 13.9k | { |
756 | 13.9k | MWAWInputStreamPtr input = getInput(); |
757 | 13.9k | long pos = input->tell(); |
758 | 13.9k | libmwaw::DebugStream f; |
759 | | |
760 | 13.9k | f << "Entries(Data0):"; |
761 | 13.9k | long val; |
762 | 13.9k | if (version()==0) { |
763 | 3.48k | val = input->readLong(2); // always 0 ? |
764 | 3.48k | if (val) f << "f0=" << val << ","; |
765 | | /* fl0=[2|6|82|86], fl1=[80|a0|a4], other 0|1|-1 */ |
766 | 34.8k | for (int i = 0; i < 9; i++) { |
767 | 31.3k | val = (i<2) ? int(input->readULong(1)) : input->readLong(1); |
768 | 31.3k | if (i==0 && (val&4)) { |
769 | 1.24k | f << "has[titlePage],"; |
770 | 1.24k | m_state->m_hasTitlePage=true; |
771 | 1.24k | val &= 0xfb; // 2 or 82 |
772 | 1.24k | } |
773 | 31.3k | if (!val) continue; |
774 | 19.1k | if (i < 2) |
775 | 4.40k | f << "fl" << i << "=" << std::hex << val << std::dec << ","; |
776 | 14.6k | else |
777 | 14.6k | f << "fl" << i << "=" << val << ","; |
778 | 19.1k | } |
779 | 3.48k | val = input->readLong(2); // always 612 ? |
780 | 3.48k | if (val != 0x612) f << "f1=" << val << ","; |
781 | 3.48k | val = input->readLong(1); // always 1 ? |
782 | 3.48k | if (val != 1) f << "f2=" << val << ","; |
783 | 3.48k | val = input->readLong(2); // always 2 ? |
784 | 3.48k | if (val != 2) f << "f3=" << val << ","; |
785 | 3.48k | val = input->readLong(2); // always 12c ? |
786 | 3.48k | if (val != 0x12c) f << "f4=" << val << ","; |
787 | 13.9k | for (int i = 0; i < 3; i++) { // 0, 0, 3c, a small number |
788 | 10.4k | val = input->readLong(2); |
789 | 10.4k | if (val) f << "g" << i << "=" << val << ","; |
790 | 10.4k | } |
791 | 3.48k | m_state->m_fileNumPages = int(input->readLong(2)); |
792 | 3.48k | if (m_state->m_fileNumPages!=1) |
793 | 3.42k | f << "num[pages]=" << m_state->m_fileNumPages << ","; |
794 | | /* then |
795 | | 0009000020000000fd803333000600000000000120 | |
796 | | 000c000020000000fd803333000600000000000180 | |
797 | | 000c000020000000fd8033330006000000000001a0 | |
798 | | 000c0000200e0000fd8033330006000000000001a0 | |
799 | | 00240000200e0000fd8033330006000000000001a0 |
800 | | |
801 | | and |
802 | | 000001000000016f66000000000000000800090001000000 |
803 | | */ |
804 | 3.48k | } |
805 | 10.4k | else { |
806 | 10.4k | val = input->readLong(1); // always 0 ? |
807 | 10.4k | if (val) f << "unkn=" << val << ","; |
808 | 10.4k | auto N=static_cast<int>(input->readLong(2)); // find 2, a, 9e, 1a |
809 | 10.4k | f << "N?=" << N << ","; |
810 | 10.4k | N = static_cast<int>(input->readLong(1)); // almost always 0, find one time 6 ? |
811 | 10.4k | if (N) f << "N1?=" << N << ","; |
812 | 10.4k | val = static_cast<int>(input->readLong(2)); // almost always 0x622, find also 0 and 12 |
813 | 10.4k | f << "f0=" << std::hex << val << std::dec << ","; |
814 | 10.4k | val = static_cast<int>(input->readLong(1)); // always 0 ? |
815 | 10.4k | if (val) f << "unkn1=" << val << ","; |
816 | 10.4k | N = static_cast<int>(input->readLong(2)); |
817 | 10.4k | f << "N2?=" << N << ","; |
818 | 10.4k | val = input->readLong(1); // almost always 1 ( find one time 2) |
819 | 10.4k | f << "f1=" << val << ","; |
820 | 10.4k | int const defVal[] = { 0x64, 0/*small number between 1 and 8*/, 0x24 }; |
821 | 41.7k | for (int i = 0; i < 3; i++) { |
822 | 31.3k | val = input->readLong(2); |
823 | 31.3k | if (i==1) { |
824 | 10.4k | m_state->m_fileNumPages = int(val); |
825 | 10.4k | if (m_state->m_fileNumPages!=1) |
826 | 10.4k | f << "num[pages]=" << val << ","; |
827 | 10.4k | continue; |
828 | 10.4k | } |
829 | 20.8k | if (val != defVal[i]) |
830 | 9.27k | f << "f" << i+2 << "=" << val << ","; |
831 | 20.8k | } |
832 | 62.6k | for (int i = 5; i < 10; i++) { // always 0 ? |
833 | 52.1k | val = input->readLong(1); |
834 | 52.1k | if (val) |
835 | 15.7k | f << "f" << i << "=" << val << ","; |
836 | 52.1k | } |
837 | 10.4k | val = input->readLong(2); // always 480 ? |
838 | 10.4k | if (val != 0x480) f << "f10=" << val << ","; |
839 | 10.4k | val = long(input->readULong(1)); // always 0 ? |
840 | 10.4k | if (val) f << "f11=" << val << ","; |
841 | 10.4k | } |
842 | 13.9k | float dim[6]; |
843 | 13.9k | bool ok = true; |
844 | 83.4k | for (auto &d : dim) { |
845 | 83.4k | d = float(input->readLong(4))/65356.f; |
846 | 83.4k | if (d<0) ok=false; |
847 | 83.4k | } |
848 | 13.9k | if (ok) ok = dim[0] > dim[2]+dim[3] && dim[1] > dim[4]+dim[5]; |
849 | | |
850 | 13.9k | if (ok) { |
851 | 6.10k | getPageSpan().setMarginTop(double(dim[2])/72.0); |
852 | 6.10k | getPageSpan().setMarginLeft(double(dim[4])/72.0); |
853 | | /* decrease a little the right/bottom margin to allow fonts discrepancy*/ |
854 | 6.10k | getPageSpan().setMarginBottom((dim[3]<36) ? 0.0 : double(dim[3])/72.0-0.1); |
855 | 6.10k | getPageSpan().setMarginRight((dim[5]<18) ? 0.0 : double(dim[5])/72.0-0.1); |
856 | 6.10k | getPageSpan().setFormLength(double(dim[0])/72.); |
857 | 6.10k | getPageSpan().setFormWidth(double(dim[1])/72.); |
858 | 6.10k | } |
859 | 7.80k | else { |
860 | 7.80k | MWAW_DEBUG_MSG(("MacWrtProParser::readDocHeader: find odd page dimensions, ignored\n")); |
861 | 7.80k | f << "#"; |
862 | 7.80k | } |
863 | 13.9k | f << "dim=" << dim[1] << "x" << dim[0] << ","; |
864 | 13.9k | f << "margins=["; // top, bottom, left, right |
865 | 69.5k | for (int i = 2; i < 6; i++) f << dim[i] << ","; |
866 | 13.9k | f << "],"; |
867 | 13.9k | if (version()==0) { |
868 | 3.48k | m_state->m_col = static_cast<int>(input->readLong(2)); |
869 | 3.48k | if (m_state->m_col != 1) f << "col=" << m_state->m_col << ","; |
870 | 3.48k | m_state->m_colSeparator=double(input->readLong(4))/65536./72.; |
871 | 3.48k | f << "col[sep]=" << m_state->m_colSeparator << "in,"; |
872 | 3.48k | } |
873 | | |
874 | 13.9k | ascii().addDelimiter(input->tell(), '|'); |
875 | 13.9k | if (version()>=1) { |
876 | | /** then find |
877 | | 000000fd0000000000018200000100002f00 |
878 | | 44[40|80] followed by something like a7c3ec07|a7c4c3c6 : 2 date |
879 | | 6f6600000000000000080009000105050506010401 |
880 | | */ |
881 | 10.4k | input->seek(20, librevenge::RVNG_SEEK_CUR); |
882 | 10.4k | ascii().addDelimiter(input->tell(), '|'); |
883 | 31.3k | for (int i=0; i<2; ++i) |
884 | 20.8k | f << "date" << i << "=" << convertDateToDebugString(unsigned(input->readULong(4))); |
885 | 10.4k | ascii().addDelimiter(input->tell(), '|'); |
886 | 10.4k | } |
887 | 13.9k | ascii().addPos(pos); |
888 | 13.9k | ascii().addNote(f.str().c_str()); |
889 | | |
890 | 13.9k | f.str(""); |
891 | 13.9k | f << "Data0-A:"; |
892 | 13.9k | if (version()==0) { |
893 | 3.48k | input->seek(pos+120, librevenge::RVNG_SEEK_SET); |
894 | 3.48k | pos = input->tell(); |
895 | 3.48k | f << "selection=[" << input->readULong(4) << "x" << input->readULong(4) << ","; |
896 | 10.4k | for (int i=0; i<2; ++i) { |
897 | 6.96k | val = long(input->readULong(2)); |
898 | 6.96k | if (!val) continue; |
899 | 4.56k | f << (i==0 ? "zone" : "pg") << "=" << val << ","; |
900 | 4.56k | } |
901 | 3.48k | f << "],"; |
902 | 3.48k | } |
903 | 10.4k | else { |
904 | 10.4k | input->seek(pos+97, librevenge::RVNG_SEEK_SET); |
905 | 10.4k | pos = input->tell(); |
906 | 10.4k | val = long(input->readULong(2)); |
907 | 10.4k | if (val != 0x4d50) // MP |
908 | 4.62k | f << "#keyWord=" << std::hex << val <<std::dec; |
909 | | //always 4, 4, 6 ? |
910 | 41.7k | for (int i = 0; i < 3; i++) { |
911 | 31.3k | val = input->readLong(1); |
912 | 31.3k | if ((i==2 && val!=6) || (i < 2 && val != 4)) |
913 | 14.0k | f << "f" << i << "=" << val << ","; |
914 | 31.3k | } |
915 | 73.0k | for (int i = 3; i < 9; i++) { // always 0 ? |
916 | 62.6k | val = input->readLong(2); |
917 | 62.6k | if (val) f << "f" << i << "=" << val << ","; |
918 | 62.6k | } |
919 | 10.4k | } |
920 | | // some dim ? |
921 | 13.9k | f << "dim=["; |
922 | 69.5k | for (int i = 0; i < 4; i++) |
923 | 55.6k | f << input->readLong(2) << ","; |
924 | 13.9k | f << "],"; |
925 | | // always 0x48 0x48 |
926 | 41.7k | for (int i = 0; i < 2; i++) { |
927 | 27.8k | val = input->readLong(2); |
928 | 27.8k | if (val != 0x48) f << "g" << i << "=" << val << ","; |
929 | 27.8k | } |
930 | | // always 0 ? |
931 | 570k | for (int i = 2; i < 42; i++) { |
932 | 556k | val = long(input->readULong(2)); |
933 | 556k | if (val) f << "g" << i << "=" << std::hex << val << std::dec << ","; |
934 | 556k | } |
935 | 13.9k | ascii().addPos(pos); |
936 | 13.9k | ascii().addNote(f.str().c_str()); |
937 | | |
938 | | // then junk ? (ie. find a string portion, a list of 0...), |
939 | 13.9k | pos = input->tell(); |
940 | 13.9k | f.str(""); |
941 | 13.9k | f << "Data0-B:"; |
942 | 13.9k | ascii().addPos(pos); |
943 | 13.9k | ascii().addNote(f.str().c_str()); |
944 | | |
945 | | // interesting data seems to begin again in 0x200... |
946 | 13.9k | input->seek(0x200, librevenge::RVNG_SEEK_SET); |
947 | 13.9k | ascii().addPos(input->tell()); |
948 | 13.9k | ascii().addNote("_"); |
949 | 13.9k | return true; |
950 | 13.9k | } |
951 | | |
952 | | //////////////////////////////////////////////////////////// |
953 | | // try to parse a data zone |
954 | | //////////////////////////////////////////////////////////// |
955 | | bool MacWrtProParser::parseDataZone(int blockId, int type) |
956 | 20.1k | { |
957 | 20.1k | if (m_state->m_dataMap.find(blockId) != m_state->m_dataMap.end()) |
958 | 116 | return true; |
959 | 20.0k | if (blockId < 1) { |
960 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::parseDataZone: block %d seems bad\n", blockId)); |
961 | 0 | return false; |
962 | 0 | } |
963 | 20.0k | if (m_state->m_parsedBlocks.find(blockId-1) != m_state->m_parsedBlocks.end()) { |
964 | 241 | MWAW_DEBUG_MSG(("MacWrtProParser::parseDataZone: block %d is already parsed\n", blockId)); |
965 | 241 | return false; |
966 | 241 | } |
967 | | |
968 | 19.7k | auto input=getInput(); |
969 | 19.7k | long pos=input->tell(); |
970 | 19.7k | std::shared_ptr<MacWrtProParserInternal::Zone> zone(new MacWrtProParserInternal::Zone); |
971 | 19.7k | zone->m_blockId = blockId; |
972 | 19.7k | zone->m_type = type; |
973 | 19.7k | auto &stream = zone->m_stream = getStreamForBlock(blockId); |
974 | 19.7k | if (!stream) |
975 | 1.63k | return false; |
976 | 18.1k | m_state->m_dataMap[blockId] = zone; |
977 | | |
978 | | // ok init is done |
979 | 18.1k | if (type == 0) |
980 | 17.0k | parseTextZone(zone); |
981 | 1.11k | else if (type == 1) |
982 | 1.11k | ; |
983 | 0 | else { |
984 | 0 | libmwaw::DebugStream f; |
985 | 0 | f << "Entries(DataZone):type" << type; |
986 | 0 | stream->m_ascii.addPos(stream->m_input->tell()); |
987 | 0 | stream->m_ascii.addNote(f.str().c_str()); |
988 | 0 | } |
989 | 18.1k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
990 | 18.1k | return true; |
991 | 19.7k | } |
992 | | |
993 | | bool MacWrtProParser::parseTextZone(std::shared_ptr<MacWrtProParserInternal::Zone> zone) |
994 | 17.0k | { |
995 | 17.0k | if (!zone || !zone->m_stream) return false; |
996 | 17.0k | if (zone->m_type != 0) { |
997 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::parseTextZone: not a text zone\n")); |
998 | 0 | return false; |
999 | 0 | } |
1000 | | |
1001 | 17.0k | auto &stream = zone->m_stream; |
1002 | 17.0k | MWAWInputStreamPtr input = stream->m_input; |
1003 | 17.0k | MWAWInputStreamPtr fileInput = getInput(); |
1004 | 17.0k | libmwaw::DebugFile &asciiFile = stream->m_ascii; |
1005 | 17.0k | libmwaw::DebugStream f; |
1006 | | |
1007 | 17.0k | std::shared_ptr<MacWrtProParserInternal::TextZone> text(new MacWrtProParserInternal::TextZone); |
1008 | | |
1009 | 17.0k | long pos = input->tell(); |
1010 | 17.0k | f << "Entries(TextZone):"; |
1011 | 17.0k | text->m_textLength = static_cast<int>(input->readLong(4)); |
1012 | 17.0k | f << "textLength=" << text->m_textLength << ","; |
1013 | | |
1014 | 17.0k | asciiFile.addPos(pos); |
1015 | 17.0k | asciiFile.addNote(f.str().c_str()); |
1016 | | |
1017 | 17.0k | if (!readTextEntries(zone, text->m_entries, text->m_textLength)) |
1018 | 2.26k | return false; |
1019 | 14.7k | m_state->m_textMap[zone->m_blockId] = text; |
1020 | | |
1021 | 14.7k | int n=0; |
1022 | 17.8k | for (auto &entry : text->m_entries) { |
1023 | 17.8k | n++; |
1024 | 17.8k | if (!fileInput->checkPosition(entry.begin())) { |
1025 | 394 | MWAW_DEBUG_MSG(("MacWrtProParser::parseTextZone: bad block id for block %ld\n", long(n-1))); |
1026 | 394 | entry.setBegin(-1); |
1027 | 394 | } |
1028 | 17.8k | } |
1029 | 41.8k | for (int i = 0; i < 2; i++) { |
1030 | 28.7k | if (!readTextIds(zone, text->m_ids[i], text->m_textLength, i)) |
1031 | 1.70k | return true; |
1032 | 28.7k | } |
1033 | | |
1034 | 13.0k | if (!readTextTokens(zone, text->m_tokens, text->m_textLength)) |
1035 | 1.36k | return true; |
1036 | | |
1037 | 11.6k | asciiFile.addPos(input->tell()); |
1038 | 11.6k | asciiFile.addNote("TextZone(end)"); |
1039 | | |
1040 | 11.6k | return true; |
1041 | 13.0k | } |
1042 | | |
1043 | | bool MacWrtProParser::readTextEntries(std::shared_ptr<MacWrtProParserInternal::Zone> zone, |
1044 | | std::vector<MWAWEntry> &res, int textLength) |
1045 | 17.0k | { |
1046 | 17.0k | res.resize(0); |
1047 | 17.0k | int vers = version(); |
1048 | 17.0k | int expectedSize = vers == 0 ? 4 : 6; |
1049 | 17.0k | auto &stream=zone->m_stream; |
1050 | 17.0k | MWAWInputStreamPtr input = stream->m_input; |
1051 | 17.0k | libmwaw::DebugFile &asciiFile = stream->m_ascii; |
1052 | 17.0k | libmwaw::DebugStream f; |
1053 | 17.0k | long pos = input->tell(); |
1054 | | |
1055 | 17.0k | auto sz = static_cast<int>(input->readULong(4)); |
1056 | 17.0k | long endPos = pos+sz+4; |
1057 | 17.0k | if ((sz%expectedSize) != 0 || (long)((unsigned long)pos+(unsigned long)sz)<pos || !stream->checkPosition(endPos)) { |
1058 | 2.26k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextEntries: find an odd size\n")); |
1059 | 2.26k | return false; |
1060 | 2.26k | } |
1061 | | |
1062 | 14.7k | int numElt = sz/expectedSize; |
1063 | 14.7k | f << "TextZone:entry(header),N=" << numElt << ","; |
1064 | 14.7k | asciiFile.addPos(pos); |
1065 | 14.7k | asciiFile.addNote(f.str().c_str()); |
1066 | | |
1067 | 14.7k | int remainLength = textLength; |
1068 | 32.8k | for (int i = 0; i < numElt; i++) { |
1069 | 20.1k | pos = input->tell(); |
1070 | 20.1k | f.str(""); |
1071 | 20.1k | f << "TextZone-" << i << ":entry,"; |
1072 | 20.1k | int unkn = 0; |
1073 | 20.1k | if (vers >= 1) { |
1074 | 20.1k | unkn = static_cast<int>(input->readLong(2)); |
1075 | 20.1k | if (unkn) f << "unkn=" << unkn << ","; |
1076 | 20.1k | } |
1077 | 20.1k | auto bl = static_cast<int>(input->readLong(2)); |
1078 | 20.1k | f << "block=" << std::hex << bl << std::dec << ","; |
1079 | 20.1k | auto nChar = static_cast<int>(input->readULong(2)); |
1080 | 20.1k | f << "blockSz=" << nChar; |
1081 | | |
1082 | 20.1k | if (nChar > remainLength || nChar > 256) { |
1083 | 1.73k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextEntries: bad size for block %d\n", i)); |
1084 | 1.73k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1085 | 1.73k | break; |
1086 | 1.73k | } |
1087 | 18.3k | remainLength -= nChar; |
1088 | 18.3k | bool ok = bl >= 3 && m_state->m_parsedBlocks.find(bl-1) == m_state->m_parsedBlocks.end(); |
1089 | 18.3k | if (!ok) { |
1090 | 312 | MWAW_DEBUG_MSG(("MacWrtProParser::readTextEntries: bad block id for block %d\n", i)); |
1091 | 312 | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1092 | 312 | break; |
1093 | 312 | } |
1094 | | |
1095 | 18.0k | m_state->m_parsedBlocks.insert(bl-1); |
1096 | 18.0k | asciiFile.addPos(pos); |
1097 | 18.0k | asciiFile.addNote(f.str().c_str()); |
1098 | 18.0k | if (nChar==0) continue; |
1099 | | |
1100 | 17.8k | MWAWEntry entry; |
1101 | 17.8k | entry.setId(unkn); |
1102 | 17.8k | entry.setBegin((bl-1)*0x100); |
1103 | 17.8k | entry.setLength(nChar); |
1104 | 17.8k | res.push_back(entry); |
1105 | 17.8k | } |
1106 | | |
1107 | 14.7k | if (remainLength) { |
1108 | 2.73k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextEntries: can not find %d characters\n", remainLength)); |
1109 | 2.73k | asciiFile.addPos(input->tell()); |
1110 | 2.73k | asciiFile.addNote("TextEntry-#"); |
1111 | 2.73k | } |
1112 | | |
1113 | 14.7k | input->seek(endPos, librevenge::RVNG_SEEK_SET); |
1114 | 14.7k | return true; |
1115 | 17.0k | } |
1116 | | |
1117 | | bool MacWrtProParser::readTextIds(std::shared_ptr<MacWrtProParserInternal::Zone> zone, |
1118 | | std::vector<MacWrtProParserInternal::TextZoneData> &res, |
1119 | | int textLength, int type) |
1120 | 28.7k | { |
1121 | 28.7k | res.resize(0); |
1122 | 28.7k | auto &stream=zone->m_stream; |
1123 | 28.7k | MWAWInputStreamPtr input = stream->m_input; |
1124 | 28.7k | libmwaw::DebugFile &asciiFile = stream->m_ascii; |
1125 | 28.7k | libmwaw::DebugStream f; |
1126 | 28.7k | long pos = input->tell(); |
1127 | | |
1128 | 28.7k | auto val = static_cast<int>(input->readULong(2)); |
1129 | 28.7k | auto sz = static_cast<int>(input->readULong(2)); |
1130 | 28.7k | if (sz == 0) { |
1131 | 1.77k | asciiFile.addPos(pos); |
1132 | 1.77k | asciiFile.addNote("_"); |
1133 | 1.77k | return true; |
1134 | 1.77k | } |
1135 | | |
1136 | 26.9k | long endPos = pos+sz+4; |
1137 | 26.9k | if ((sz%6) != 0 || !stream->checkPosition(endPos)) { |
1138 | 1.28k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextIds: find an odd size\n")); |
1139 | 1.28k | return false; |
1140 | 1.28k | } |
1141 | | |
1142 | 25.6k | int numElt = sz/6; |
1143 | 25.6k | f << "TextZone:type=" << type << "(header),N=" << numElt << ","; |
1144 | 25.6k | if (val) f << "unkn=" << val << ","; |
1145 | 25.6k | asciiFile.addPos(pos); |
1146 | 25.6k | asciiFile.addNote(f.str().c_str()); |
1147 | | |
1148 | 25.6k | long remainLength = textLength; |
1149 | 164k | for (int i = 0; i < numElt; i++) { |
1150 | 141k | MacWrtProParserInternal::TextZoneData data; |
1151 | 141k | data.m_type = type; |
1152 | 141k | pos = input->tell(); |
1153 | 141k | data.m_id = static_cast<int>(input->readLong(2)); |
1154 | 141k | auto nChar = long(input->readULong(4)); |
1155 | 141k | data.m_length = static_cast<int>(nChar); |
1156 | 141k | f.str(""); |
1157 | 141k | f << "TextZone-" << i<< ":" << data; |
1158 | | |
1159 | 141k | if (nChar > remainLength) { |
1160 | 2.71k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextIds: bad size for block %d\n", i)); |
1161 | 2.71k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1162 | 2.71k | break; |
1163 | 2.71k | } |
1164 | 139k | remainLength -= nChar; |
1165 | 139k | asciiFile.addPos(pos); |
1166 | 139k | asciiFile.addNote(f.str().c_str()); |
1167 | 139k | if (nChar==0) continue; |
1168 | | |
1169 | 127k | res.push_back(data); |
1170 | 127k | } |
1171 | | |
1172 | 25.6k | if (remainLength) { |
1173 | 3.95k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextIds: can not find %ld characters\n", remainLength)); |
1174 | 3.95k | asciiFile.addPos(input->tell()); |
1175 | 3.95k | asciiFile.addNote("TextZone:id-#"); |
1176 | 3.95k | } |
1177 | | |
1178 | 25.6k | input->seek(endPos, librevenge::RVNG_SEEK_SET); |
1179 | 25.6k | return res.size() != 0; |
1180 | 26.9k | } |
1181 | | |
1182 | | bool MacWrtProParser::readTextTokens(std::shared_ptr<MacWrtProParserInternal::Zone> zone, |
1183 | | std::vector<MacWrtProParserInternal::Token> &res, |
1184 | | int textLength) |
1185 | 13.0k | { |
1186 | 13.0k | res.resize(0); |
1187 | 13.0k | int vers = version(); |
1188 | 13.0k | int expectedSz = vers==0 ? 8 : 10; |
1189 | 13.0k | auto &stream=zone->m_stream; |
1190 | 13.0k | MWAWInputStreamPtr input = stream->m_input; |
1191 | 13.0k | libmwaw::DebugFile &asciiFile = stream->m_ascii; |
1192 | 13.0k | libmwaw::DebugStream f; |
1193 | 13.0k | long pos = input->tell(); |
1194 | | |
1195 | 13.0k | auto val = static_cast<int>(input->readULong(2)); |
1196 | 13.0k | if (val && vers == 0) { |
1197 | 4 | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1198 | 4 | asciiFile.addPos(pos); |
1199 | 4 | asciiFile.addNote("_"); |
1200 | 4 | return true; |
1201 | 4 | } |
1202 | 13.0k | long sz = static_cast<int>(input->readULong(2)); |
1203 | 13.0k | if (sz == 0) { |
1204 | 8.85k | asciiFile.addPos(pos); |
1205 | 8.85k | asciiFile.addNote("_"); |
1206 | 8.85k | return true; |
1207 | 8.85k | } |
1208 | | |
1209 | 4.21k | long endPos = pos+sz+4; |
1210 | 4.21k | if ((sz%expectedSz) != 0 || !stream->checkPosition(endPos)) { |
1211 | 1.31k | MWAW_DEBUG_MSG(("MacWrtProParser::readTextTokens: find an odd size\n")); |
1212 | 1.31k | return false; |
1213 | 1.31k | } |
1214 | | |
1215 | 2.89k | auto numElt = int(sz/expectedSz); |
1216 | 2.89k | f << "TextZone:token(header),N=" << numElt << ","; |
1217 | 2.89k | if (val) f << "unkn=" << val << ","; |
1218 | 2.89k | asciiFile.addPos(pos); |
1219 | 2.89k | asciiFile.addNote(f.str().c_str()); |
1220 | | |
1221 | 2.89k | long remainLength = textLength; |
1222 | 2.89k | int numFootnotes = 0; |
1223 | 2.89k | std::vector<int> pictPos; |
1224 | 12.3k | for (int i = 0; i < numElt; i++) { |
1225 | 9.79k | f.str(""); |
1226 | 9.79k | pos = input->tell(); |
1227 | | |
1228 | 9.79k | MacWrtProParserInternal::Token data; |
1229 | 9.79k | data.m_type = static_cast<int>(input->readULong(1)); |
1230 | 9.79k | if (vers==0) { // check me |
1231 | 10 | switch (data.m_type) { |
1232 | 0 | case 2: // page number |
1233 | 0 | data.m_type=1; |
1234 | 0 | break; |
1235 | 1 | case 3: // footnote content |
1236 | 1 | break; |
1237 | 0 | case 4: // figure |
1238 | 0 | break; |
1239 | 0 | case 5: // footnote pos |
1240 | 0 | data.m_type=2; |
1241 | 0 | data.m_blockId = ++numFootnotes; // for MW2 |
1242 | 0 | break; |
1243 | 0 | case 0x15: // Fixme: must find other date |
1244 | 0 | case 0x17: // date alpha |
1245 | 0 | data.m_type=6; |
1246 | 0 | break; |
1247 | 0 | case 0x1a: // time |
1248 | 0 | data.m_type=7; |
1249 | 0 | break; |
1250 | 9 | default: |
1251 | 9 | MWAW_DEBUG_MSG(("MacWrtProParser::readTextTokens: unknown block type %d\n", data.m_type)); |
1252 | 9 | f << "#type=" << data.m_type << ","; |
1253 | 9 | data.m_type = -1; |
1254 | 9 | break; |
1255 | 10 | } |
1256 | 10 | } |
1257 | 9.79k | data.m_flags[0] = static_cast<unsigned int>(input->readULong(1)); |
1258 | 9.79k | auto nChar = long(input->readULong(vers == 0 ? 2 : 4)); |
1259 | 9.79k | data.m_length = static_cast<int>(nChar); |
1260 | | |
1261 | 9.79k | if (vers==0) |
1262 | 10 | data.m_flags[1]=static_cast<unsigned int>(input->readULong(4)); // some kind of ID |
1263 | 9.78k | else { |
1264 | 29.3k | for (int j = 1; j < 3; j++) data.m_flags[j] = static_cast<unsigned int>(input->readULong(1)); |
1265 | 9.78k | data.m_blockId = static_cast<int>(input->readULong(2)); |
1266 | 9.78k | } |
1267 | 9.79k | f << "TextZone-" << i<< ":token," << data; |
1268 | 9.79k | if (nChar > remainLength) { |
1269 | 302 | MWAW_DEBUG_MSG(("MacWrtProParser::readTextTokens: bad size for block %d\n", i)); |
1270 | 302 | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1271 | 302 | break; |
1272 | 302 | } |
1273 | 9.49k | remainLength -= nChar; |
1274 | 9.49k | asciiFile.addPos(pos); |
1275 | 9.49k | asciiFile.addNote(f.str().c_str()); |
1276 | 9.49k | if (data.m_type == 4) pictPos.push_back(static_cast<int>(res.size())); |
1277 | 9.49k | res.push_back(data); |
1278 | | |
1279 | 9.49k | if (vers == 1 && data.m_blockId && (data.m_type == 2 || data.m_type == 4)) |
1280 | 1.67k | m_state->m_graphicIdsCallByTokens.push_back(data.m_blockId); |
1281 | 9.49k | } |
1282 | 2.89k | input->seek(endPos, librevenge::RVNG_SEEK_SET); |
1283 | 2.89k | if (vers == 0 && pictPos.size()) { |
1284 | 0 | size_t numPict = pictPos.size(); |
1285 | | // checkme always inverted ? |
1286 | 0 | for (size_t i = numPict; i > 0; i--) { |
1287 | 0 | MacWrtProParserInternal::Token &token = res[size_t(pictPos[i-1])]; |
1288 | 0 | pos = input->tell(); |
1289 | 0 | f.str(""); |
1290 | 0 | f << "TextZone-pict" << i-1<< ":"; |
1291 | 0 | val = static_cast<int>(input->readLong(2)); |
1292 | 0 | if (val) f << "unkn=" << val << ","; |
1293 | 0 | auto blockId = static_cast<int>(input->readULong(2)); |
1294 | 0 | if (blockId) { |
1295 | 0 | token.m_blockId = blockId; |
1296 | 0 | f << "block=" << blockId << ","; |
1297 | 0 | parseDataZone(blockId,1); |
1298 | 0 | } |
1299 | 0 | sz = long(input->readULong(4)); |
1300 | 0 | f << "sz=" << std::hex << sz << std::dec << ","; |
1301 | 0 | int dim[4]; |
1302 | 0 | for (auto &d : dim) d = static_cast<int>(input->readLong(2)); |
1303 | 0 | token.m_box = MWAWBox2f(MWAWVec2f(float(dim[1]),float(dim[0])), MWAWVec2f(float(dim[3]),float(dim[2]))); |
1304 | 0 | f << "dim=" << token.m_box << ","; |
1305 | 0 | for (auto &d : dim) d = static_cast<int>(input->readLong(2)); |
1306 | 0 | f << "dim2=" << MWAWBox2i(MWAWVec2i(dim[1],dim[0]), MWAWVec2i(dim[3],dim[2])) << ","; |
1307 | | // followed by junk ? |
1308 | 0 | ascii().addDelimiter(input->tell(),'|'); |
1309 | 0 | input->seek(pos+62, librevenge::RVNG_SEEK_SET); |
1310 | 0 | asciiFile.addPos(pos); |
1311 | 0 | asciiFile.addNote(f.str().c_str()); |
1312 | 0 | } |
1313 | 0 | } |
1314 | | |
1315 | 2.89k | return res.size() != 0; |
1316 | 2.89k | } |
1317 | | |
1318 | | //////////////////////////////////////////////////////////// |
1319 | | // try to send a empty zone |
1320 | | //////////////////////////////////////////////////////////// |
1321 | | bool MacWrtProParser::sendEmptyFrameZone(MWAWPosition const &pos, MWAWGraphicStyle const &style) |
1322 | 32 | { |
1323 | 32 | std::shared_ptr<MacWrtProParserInternal::SubDocument> subdoc |
1324 | 32 | (new MacWrtProParserInternal::SubDocument(*this, getInput(), -3)); |
1325 | 32 | if (getTextListener()) |
1326 | 32 | getTextListener()->insertTextBox(pos, subdoc, style); |
1327 | 32 | return true; |
1328 | 32 | } |
1329 | | |
1330 | | //////////////////////////////////////////////////////////// |
1331 | | // try to send a text |
1332 | | //////////////////////////////////////////////////////////// |
1333 | | int MacWrtProParser::findNumHardBreaks(int blockId) |
1334 | 297 | { |
1335 | 297 | auto it = m_state->m_textMap.find(blockId); |
1336 | 297 | if (it == m_state->m_textMap.end()) { |
1337 | 262 | MWAW_DEBUG_MSG(("MacWrtProParser::findNumHardBreaks: can not find text zone\n")); |
1338 | 262 | return 0; |
1339 | 262 | } |
1340 | 35 | return findNumHardBreaks(it->second); |
1341 | 297 | } |
1342 | | |
1343 | | int MacWrtProParser::findNumHardBreaks(std::shared_ptr<MacWrtProParserInternal::TextZone> zone) |
1344 | 35 | { |
1345 | 35 | if (!zone->m_entries.size()) return 0; |
1346 | 1 | int num = 0; |
1347 | 1 | MWAWInputStreamPtr input = getInput(); |
1348 | 1 | for (auto const &entry : zone->m_entries) { |
1349 | 1 | input->seek(entry.begin(), librevenge::RVNG_SEEK_SET); |
1350 | 65 | for (long j = 0; j < entry.length(); j++) { |
1351 | 64 | switch (input->readULong(1)) { |
1352 | 2 | case 0xc: // hard page |
1353 | 2 | case 0xb: // difficult to differentiate column/page break so... |
1354 | 2 | num++; |
1355 | 2 | break; |
1356 | 62 | default: |
1357 | 62 | break; |
1358 | 64 | } |
1359 | 64 | } |
1360 | 1 | } |
1361 | 1 | return num; |
1362 | 1 | } |
1363 | | |
1364 | | //////////////////////////////////////////////////////////// |
1365 | | // try to send a text |
1366 | | //////////////////////////////////////////////////////////// |
1367 | | bool MacWrtProParser::sendTextZone(int blockId, bool mainZone) |
1368 | 19.2k | { |
1369 | 19.2k | auto it = m_state->m_textMap.find(blockId); |
1370 | 19.2k | if (it == m_state->m_textMap.end()) { |
1371 | 3.51k | MWAW_DEBUG_MSG(("MacWrtProParser::sendTextZone: can not find text zone %x\n", unsigned(blockId))); |
1372 | 3.51k | return false; |
1373 | 3.51k | } |
1374 | 15.7k | sendText(it->second, mainZone); |
1375 | 15.7k | return true; |
1376 | 19.2k | } |
1377 | | |
1378 | | bool MacWrtProParser::sendTextBoxZone(int blockId, MWAWPosition const &pos, MWAWGraphicStyle const &style) |
1379 | 5.41k | { |
1380 | 5.41k | std::shared_ptr<MacWrtProParserInternal::SubDocument> subdoc |
1381 | 5.41k | (new MacWrtProParserInternal::SubDocument(*this, getInput(), blockId)); |
1382 | 5.41k | if (getTextListener()) |
1383 | 5.41k | getTextListener()->insertTextBox(pos, subdoc, style); |
1384 | 5.41k | return true; |
1385 | 5.41k | } |
1386 | | |
1387 | | namespace MacWrtProParserInternal |
1388 | | { |
1389 | | /** Internal and low level: structure used to sort the position of data */ |
1390 | | struct DataPosition { |
1391 | | //! constructor |
1392 | | explicit DataPosition(int type=-1, int id=-1, long pos=0) |
1393 | 184k | : m_type(type) |
1394 | 184k | , m_id(id) |
1395 | 184k | , m_pos(pos) |
1396 | 184k | { |
1397 | 184k | } |
1398 | | //! the type |
1399 | | int m_type; |
1400 | | //! an id |
1401 | | int m_id; |
1402 | | //! the position |
1403 | | long m_pos; |
1404 | | //! the comparison structure |
1405 | | struct Compare { |
1406 | | //! comparaison function |
1407 | | bool operator()(DataPosition const &p1, DataPosition const &p2) const |
1408 | 1.49M | { |
1409 | 1.49M | long diff = p1.m_pos - p2.m_pos; |
1410 | 1.49M | if (diff) return (diff < 0); |
1411 | 50.6k | diff = p1.m_type - p2.m_type; |
1412 | 50.6k | if (diff) return (diff < 0); |
1413 | 1.05k | diff = p1.m_id - p2.m_id; |
1414 | 1.05k | return (diff < 0); |
1415 | 50.6k | } |
1416 | | }; |
1417 | | }; |
1418 | | } |
1419 | | |
1420 | | bool MacWrtProParser::sendText(std::shared_ptr<MacWrtProParserInternal::TextZone> zone, bool mainZone) |
1421 | 15.7k | { |
1422 | 15.7k | if (!zone->m_entries.size()) // can happen in header/footer |
1423 | 1.65k | return false; |
1424 | 14.0k | int vers = version(); |
1425 | 14.0k | MacWrtProStructuresListenerState listenerState(m_structures, mainZone, vers); |
1426 | 14.0k | MacWrtProParserInternal::DataPosition::Compare compareFunction; |
1427 | 14.0k | std::set<MacWrtProParserInternal::DataPosition, MacWrtProParserInternal::DataPosition::Compare> |
1428 | 14.0k | set(compareFunction); |
1429 | 14.0k | long cPos = 0; |
1430 | 32.9k | for (size_t i = 0; i < zone->m_entries.size(); i++) { |
1431 | 18.8k | set.insert(MacWrtProParserInternal::DataPosition(3, static_cast<int>(i), cPos)); |
1432 | 18.8k | cPos += zone->m_entries[i].length(); |
1433 | 18.8k | } |
1434 | 14.0k | set.insert(MacWrtProParserInternal::DataPosition(4, 0, cPos)); |
1435 | 14.0k | cPos = 0; |
1436 | 24.5k | for (size_t i = 0; i < zone->m_tokens.size(); i++) { |
1437 | 10.5k | cPos += zone->m_tokens[i].m_length; |
1438 | 10.5k | set.insert(MacWrtProParserInternal::DataPosition(2, static_cast<int>(i), cPos)); |
1439 | 10.5k | } |
1440 | 42.2k | for (int id = 0; id < 2; id++) { |
1441 | 28.1k | cPos = 0; |
1442 | 165k | for (size_t i = 0; i < zone->m_ids[id].size(); i++) { |
1443 | 137k | set.insert(MacWrtProParserInternal::DataPosition(1-id, static_cast<int>(i), cPos)); |
1444 | 137k | cPos += zone->m_ids[id][i].m_length; |
1445 | 137k | } |
1446 | 28.1k | } |
1447 | 14.0k | std::vector<int> pageBreaks=listenerState.getPageBreaksPos(); |
1448 | 17.8k | for (size_t i = 0; i < pageBreaks.size(); i++) { |
1449 | 3.78k | if (pageBreaks[i]<=0 || pageBreaks[i] >= zone->m_textLength) { |
1450 | 50 | if (pageBreaks[i] >= zone->m_textLength+1) { |
1451 | 50 | MWAW_DEBUG_MSG(("MacWrtProParser::sendText: page breaks seems bad\n")); |
1452 | 50 | } |
1453 | 50 | break; |
1454 | 50 | } |
1455 | 3.73k | set.insert(MacWrtProParserInternal::DataPosition(-1, static_cast<int>(i), pageBreaks[i])); |
1456 | 3.73k | } |
1457 | | |
1458 | 14.0k | MWAWInputStreamPtr input = getInput(); |
1459 | 14.0k | long pos = zone->m_entries[0].begin(); |
1460 | 14.0k | long asciiPos = pos; |
1461 | 14.0k | if (pos > 0) |
1462 | 13.7k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1463 | | |
1464 | 14.0k | libmwaw::DebugStream f, f2; |
1465 | 14.0k | cPos = 0; |
1466 | 184k | for (auto const &data : set) { |
1467 | 184k | long oldPos = pos; |
1468 | 184k | if (data.m_pos < cPos) { |
1469 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::sendText: position go backward, stop...\n")); |
1470 | 0 | break; |
1471 | 0 | } |
1472 | 184k | if (data.m_pos != cPos) { |
1473 | 135k | if (pos > 0) { |
1474 | 133k | std::string text(""); |
1475 | 41.8M | for (long i = cPos; i < data.m_pos && !input->isEnd(); i++) { |
1476 | 41.7M | auto ch = char(input->readULong(1)); |
1477 | 41.7M | if (!ch) |
1478 | 13.5M | text+= "#"; |
1479 | 28.1M | else { |
1480 | 28.1M | listenerState.sendChar(ch); |
1481 | 28.1M | if (ch > 0 && ch < 20 && ch != 0xd && ch != 0x9) text+="#"; |
1482 | 28.1M | text+=ch; |
1483 | 28.1M | } |
1484 | 41.7M | } |
1485 | 133k | f << "'" << text << "'"; |
1486 | 133k | } |
1487 | | |
1488 | 135k | if (pos > 0 && f.str().length()) { |
1489 | 0 | f2.str(""); |
1490 | 0 | f2 << "Entries(TextContent):" << f.str(); |
1491 | 0 | f.str(""); |
1492 | 0 | ascii().addPos(asciiPos); |
1493 | 0 | ascii().addNote(f2.str().c_str()); |
1494 | 0 | pos += (data.m_pos-cPos); |
1495 | 0 | } |
1496 | | |
1497 | 135k | cPos = data.m_pos; |
1498 | 135k | } |
1499 | 184k | switch (data.m_type) { |
1500 | 3.73k | case -1: |
1501 | 3.73k | listenerState.insertSoftPageBreak(); |
1502 | 3.73k | break; |
1503 | 14.0k | case 4: |
1504 | 32.9k | case 3: |
1505 | 32.9k | if (pos > 0 && (pos&0xFF)) |
1506 | 0 | ascii().addDelimiter(pos,'|'); |
1507 | 32.9k | if (data.m_type == 3) { |
1508 | 18.8k | pos = zone->m_entries[size_t(data.m_id)].begin(); |
1509 | 18.8k | if (pos > 0) |
1510 | 18.4k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1511 | 18.8k | } |
1512 | 32.9k | break; |
1513 | 10.5k | case 2: { |
1514 | | // save the position because we read some extra data ( footnote, table, textbox) |
1515 | 10.5k | long actPos = input->tell(); |
1516 | 10.5k | switch (zone->m_tokens[size_t(data.m_id)].m_type) { |
1517 | 1.13k | case 1: |
1518 | 1.13k | if (getTextListener()) getTextListener()->insertField(MWAWField(MWAWField::PageNumber)); |
1519 | 1.13k | break; |
1520 | 1.07k | case 2: |
1521 | 1.07k | if (vers == 1 && listenerState.isSent(zone->m_tokens[size_t(data.m_id)].m_blockId)) { |
1522 | 423 | MWAW_DEBUG_MSG(("MacWrtProParser::sendText: footnote is already sent...\n")); |
1523 | 423 | } |
1524 | 654 | else { |
1525 | 654 | int id = zone->m_tokens[size_t(data.m_id)].m_blockId; |
1526 | 654 | if (vers == 0) id = -id; |
1527 | 654 | MWAWSubDocumentPtr subdoc(new MacWrtProParserInternal::SubDocument(*this, getInput(), id)); |
1528 | 654 | getTextListener()->insertNote(MWAWNote(MWAWNote::FootNote), subdoc); |
1529 | 654 | } |
1530 | 1.07k | break; |
1531 | 1.53k | case 3: |
1532 | 1.53k | break; // footnote content, ok |
1533 | 939 | case 4: |
1534 | 939 | if (vers==0) { |
1535 | 0 | MWAWPosition pictPos(MWAWVec2f(0,0), zone->m_tokens[size_t(data.m_id)].m_box.size(), librevenge::RVNG_POINT); |
1536 | 0 | pictPos.setRelativePosition(MWAWPosition::Char, MWAWPosition::XLeft, MWAWPosition::YBottom); |
1537 | 0 | sendPictureZone(zone->m_tokens[size_t(data.m_id)].m_blockId, pictPos); |
1538 | 0 | } |
1539 | 939 | else |
1540 | 939 | listenerState.send(zone->m_tokens[size_t(data.m_id)].m_blockId); |
1541 | 939 | break; |
1542 | 9 | case 5: |
1543 | 9 | break; // hyphen ok |
1544 | 1.14k | case 6: |
1545 | 1.14k | if (getTextListener()) getTextListener()->insertField(MWAWField(MWAWField::Date)); |
1546 | 1.14k | break; |
1547 | 1.15k | case 7: |
1548 | 1.15k | if (getTextListener()) getTextListener()->insertField(MWAWField(MWAWField::Time)); |
1549 | 1.15k | break; |
1550 | 1.07k | case 8: |
1551 | 1.07k | if (getTextListener()) getTextListener()->insertField(MWAWField(MWAWField::Title)); |
1552 | 1.07k | break; |
1553 | 1.07k | case 9: |
1554 | 1.07k | if (getTextListener()) getTextListener()->insertUnicodeString(librevenge::RVNGString("#REVISION#")); |
1555 | 1.07k | break; |
1556 | 1.12k | case 10: |
1557 | 1.12k | if (getTextListener()) { |
1558 | 1.12k | int numSection = listenerState.numSection()+1; |
1559 | 1.12k | std::stringstream s; |
1560 | 1.12k | s << numSection; |
1561 | 1.12k | getTextListener()->insertUnicodeString(librevenge::RVNGString(s.str().c_str())); |
1562 | 1.12k | } |
1563 | 1.12k | break; |
1564 | 249 | default: |
1565 | 249 | break; |
1566 | 10.5k | } |
1567 | 10.5k | f << "token[" << zone->m_tokens[size_t(data.m_id)] << "],"; |
1568 | 10.5k | input->seek(actPos, librevenge::RVNG_SEEK_SET); |
1569 | 10.5k | break; |
1570 | 10.5k | } |
1571 | 75.6k | case 1: |
1572 | 75.6k | if (m_structures) |
1573 | 75.6k | listenerState.sendFont(zone->m_ids[0][size_t(data.m_id)].m_id); |
1574 | 75.6k | f << "[C" << zone->m_ids[0][size_t(data.m_id)].m_id << "],"; |
1575 | 75.6k | break; |
1576 | 61.4k | case 0: |
1577 | 61.4k | if (m_structures) |
1578 | 61.4k | listenerState.sendParagraph(zone->m_ids[1][size_t(data.m_id)].m_id); |
1579 | 61.4k | f << "[P" << zone->m_ids[1][size_t(data.m_id)].m_id << "],"; |
1580 | 61.4k | break; |
1581 | 0 | default: { |
1582 | 0 | static bool firstError = true; |
1583 | 0 | if (firstError) { |
1584 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::sendText: find unexpected data type...\n")); |
1585 | 0 | firstError = false; |
1586 | 0 | } |
1587 | 0 | f << "#"; |
1588 | 0 | break; |
1589 | 10.5k | } |
1590 | | |
1591 | 184k | } |
1592 | 184k | if (pos >= 0 && pos != oldPos) |
1593 | 4.69k | asciiPos = pos; |
1594 | 184k | } |
1595 | | |
1596 | 14.0k | return true; |
1597 | 14.0k | } |
1598 | | |
1599 | | |
1600 | | //////////////////////////////////////////////////////////// |
1601 | | // try to send a picture |
1602 | | //////////////////////////////////////////////////////////// |
1603 | | bool MacWrtProParser::sendPictureZone(int blockId, MWAWPosition const &pictPos, |
1604 | | MWAWGraphicStyle const &style) |
1605 | 1.68k | { |
1606 | 1.68k | auto it = m_state->m_dataMap.find(blockId); |
1607 | 1.68k | if (it == m_state->m_dataMap.end()) { |
1608 | 457 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPictureZone: can not find picture zone\n")); |
1609 | 457 | return false; |
1610 | 457 | } |
1611 | 1.22k | sendPicture(it->second, pictPos, style); |
1612 | 1.22k | return true; |
1613 | 1.68k | } |
1614 | | |
1615 | | bool MacWrtProParser::sendPicture(std::shared_ptr<MacWrtProParserInternal::Zone> zone, |
1616 | | MWAWPosition pictPos, MWAWGraphicStyle const &style) |
1617 | 1.22k | { |
1618 | 1.22k | if (!zone) return false; |
1619 | 1.22k | if (zone->m_type != 1) { |
1620 | 15 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPicture: not a picture date\n")); |
1621 | 15 | return false; |
1622 | 15 | } |
1623 | | |
1624 | 1.21k | zone->m_parsed = true; |
1625 | | |
1626 | | // ok init is done |
1627 | 1.21k | auto &stream=zone->m_stream; |
1628 | 1.21k | MWAWInputStreamPtr input = stream->m_input; |
1629 | 1.21k | libmwaw::DebugFile &asciiFile = stream->m_ascii; |
1630 | 1.21k | long pos=stream->m_bof; |
1631 | 1.21k | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1632 | 1.21k | libmwaw::DebugStream f; |
1633 | | |
1634 | 1.21k | f << "Entries(PICT),"; |
1635 | 1.21k | asciiFile.addPos(pos); |
1636 | 1.21k | asciiFile.addNote(f.str().c_str()); |
1637 | | |
1638 | 1.21k | auto pictSize = long(input->readULong(4)); |
1639 | 1.21k | if (pictSize < 10 || !stream->checkPosition(pos+4+pictSize)) { |
1640 | 792 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPicture: oops a pb with pictSize\n")); |
1641 | 792 | asciiFile.addPos(4); |
1642 | 792 | asciiFile.addNote("#PICT"); |
1643 | 792 | return false; |
1644 | 792 | } |
1645 | 420 | std::shared_ptr<MWAWPict> pict(MWAWPictData::get(input, static_cast<int>(pictSize))); |
1646 | 420 | if (!pict) { |
1647 | | // sometimes this just fails because the pictSize is not correct |
1648 | 217 | input->seek(pos+14, librevenge::RVNG_SEEK_SET); |
1649 | 217 | if (input->readULong(2) == 0x1101) { // try to force the size to be ok |
1650 | 0 | librevenge::RVNGBinaryData data; |
1651 | 0 | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1652 | 0 | input->readDataBlock(4+pictSize, data); |
1653 | 0 | auto *dataPtr=const_cast<unsigned char *>(data.getDataBuffer()); |
1654 | 0 | if (!dataPtr) { |
1655 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPicture: oops where is the picture...\n")); |
1656 | 0 | return false; |
1657 | 0 | } |
1658 | | |
1659 | 0 | dataPtr[4]=dataPtr[2]; |
1660 | 0 | dataPtr[5]=dataPtr[3]; |
1661 | |
|
1662 | 0 | MWAWInputStreamPtr pictInput=MWAWInputStream::get(data, false); |
1663 | 0 | if (!pictInput) { |
1664 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPicture: oops where is the picture input...\n")); |
1665 | 0 | return false; |
1666 | 0 | } |
1667 | | |
1668 | 0 | pictInput->seek(4, librevenge::RVNG_SEEK_SET); |
1669 | 0 | pict.reset(MWAWPictData::get(pictInput, static_cast<int>(pictSize))); |
1670 | 0 | } |
1671 | 217 | } |
1672 | | |
1673 | | #ifdef DEBUG_WITH_FILES |
1674 | | asciiFile.skipZone(pos+4, pos+4+pictSize-1); |
1675 | | librevenge::RVNGBinaryData file; |
1676 | | input->seek(pos+4, librevenge::RVNG_SEEK_SET); |
1677 | | input->readDataBlock(pictSize, file); |
1678 | | static int volatile pictName = 0; |
1679 | | f.str(""); |
1680 | | f << "PICT-" << ++pictName; |
1681 | | libmwaw::Debug::dumpFile(file, f.str().c_str()); |
1682 | | asciiFile.addPos(pos+4+pictSize); |
1683 | | asciiFile.addNote("PICT(end)"); |
1684 | | #endif |
1685 | | |
1686 | 420 | if (!pict) { // ok, we can not do anything except sending the data... |
1687 | 217 | MWAW_DEBUG_MSG(("MacWrtProParser::sendPicture: no sure this is a picture\n")); |
1688 | 217 | if (pictPos.size().x() <= 0 || pictPos.size().y() <= 0) |
1689 | 16 | pictPos=MWAWPosition(MWAWVec2f(0,0),MWAWVec2f(100.,100.), librevenge::RVNG_POINT); |
1690 | 217 | if (getTextListener()) { |
1691 | 217 | librevenge::RVNGBinaryData data; |
1692 | 217 | input->seek(pos+4, librevenge::RVNG_SEEK_SET); |
1693 | 217 | input->readDataBlock(pictSize, data); |
1694 | 217 | getTextListener()->insertPicture(pictPos, MWAWEmbeddedObject(data, "image/pict"), style); |
1695 | 217 | } |
1696 | 217 | return true; |
1697 | 217 | } |
1698 | | |
1699 | 203 | if (pictPos.size().x() <= 0 || pictPos.size().y() <= 0) { |
1700 | 20 | pictPos.setOrigin(MWAWVec2f(0,0)); |
1701 | 20 | pictPos.setSize(pict->getBdBox().size()); |
1702 | 20 | pictPos.setUnit(librevenge::RVNG_POINT); |
1703 | 20 | } |
1704 | 203 | if (pict->getBdBox().size().x() > 0 && pict->getBdBox().size().y() > 0) |
1705 | 0 | pictPos.setNaturalSize(pict->getBdBox().size()); |
1706 | | |
1707 | 203 | if (getTextListener()) { |
1708 | 203 | MWAWEmbeddedObject picture; |
1709 | 203 | if (pict->getBinary(picture)) |
1710 | 203 | getTextListener()->insertPicture(pictPos, picture, style); |
1711 | 203 | } |
1712 | 203 | return true; |
1713 | 420 | } |
1714 | | |
1715 | | //////////////////////////////////////////////////////////// |
1716 | | // some debug functions |
1717 | | //////////////////////////////////////////////////////////// |
1718 | | void MacWrtProParser::checkUnparsed() |
1719 | 0 | { |
1720 | 0 | MWAWInputStreamPtr input = getInput(); |
1721 | 0 | libmwaw::DebugStream f; |
1722 | |
|
1723 | 0 | long pos; |
1724 | 0 | std::stringstream notParsed; |
1725 | 0 | for (int bl = 3; bl < 1000; bl++) { |
1726 | 0 | if (m_state->m_parsedBlocks.find(bl) != m_state->m_parsedBlocks.end()) |
1727 | 0 | continue; |
1728 | | |
1729 | 0 | pos = bl*0x100; |
1730 | 0 | input->seek(pos, librevenge::RVNG_SEEK_SET); |
1731 | 0 | if (input->isEnd()) break; |
1732 | 0 | notParsed << std::hex << bl << std::dec << ","; |
1733 | | |
1734 | | // normaly there must remains only text entry... |
1735 | 0 | f.str(""); |
1736 | 0 | f << "Entries(Unparsed):"; |
1737 | |
|
1738 | 0 | std::string text(""); |
1739 | 0 | bool findZero = false; |
1740 | 0 | for (int c = 0; c < 256; c++) { |
1741 | 0 | auto ch = char(input->readULong(1)); |
1742 | 0 | if (!ch) { |
1743 | 0 | if (findZero) { |
1744 | 0 | input->seek(-1, librevenge::RVNG_SEEK_CUR); |
1745 | 0 | break; |
1746 | 0 | } |
1747 | 0 | findZero = true; |
1748 | 0 | continue; |
1749 | 0 | } |
1750 | 0 | if (findZero) { |
1751 | 0 | text += "#"; |
1752 | 0 | findZero = false; |
1753 | 0 | } |
1754 | 0 | text+=ch; |
1755 | 0 | } |
1756 | 0 | f << text; |
1757 | 0 | if (long(input->tell()) != pos+256) |
1758 | 0 | ascii().addDelimiter(input->tell(),'|'); |
1759 | 0 | ascii().addPos(pos); |
1760 | 0 | ascii().addNote(f.str().c_str()); |
1761 | 0 | } |
1762 | 0 | if (!notParsed.str().empty()) { |
1763 | 0 | MWAW_DEBUG_MSG(("MacWrtProParser::checkUnparsed: not parsed %s\n", notParsed.str().c_str())); |
1764 | 0 | } |
1765 | 0 | } |
1766 | | |
1767 | | std::string MacWrtProParser::convertDateToDebugString(unsigned dt) |
1768 | 58.4k | { |
1769 | 58.4k | int Y, M, D, HH, MM, SS; |
1770 | 58.4k | MWAWCellContent::double2Date(double(dt/3600/24)+1460., Y, M, D); // change the reference date from 1/1/1904 to 1/1/1900 |
1771 | 58.4k | double time=double(dt%(3600*24))/3600/24; |
1772 | 58.4k | MWAWCellContent::double2Time(time, HH, MM, SS); |
1773 | 58.4k | std::stringstream s; |
1774 | 58.4k | s << D << "/" << M << "/" << Y << " " << HH << ":" << MM << ","; |
1775 | 58.4k | return s.str(); |
1776 | 58.4k | } |
1777 | | |
1778 | | // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: |