Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmwaw/src/lib/EDocParser.cxx
Line
Count
Source
1
/* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3
/* libmwaw
4
* Version: MPL 2.0 / LGPLv2+
5
*
6
* The contents of this file are subject to the Mozilla Public License Version
7
* 2.0 (the "License"); you may not use this file except in compliance with
8
* the License or as specified alternatively below. You may obtain a copy of
9
* the License at http://www.mozilla.org/MPL/
10
*
11
* Software distributed under the License is distributed on an "AS IS" basis,
12
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
* for the specific language governing rights and limitations under the
14
* License.
15
*
16
* Major Contributor(s):
17
* Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18
* Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19
* Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20
* Copyright (C) 2006, 2007 Andrew Ziem
21
* Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22
*
23
*
24
* All Rights Reserved.
25
*
26
* For minor contributions see the git repository.
27
*
28
* Alternatively, the contents of this file may be used under the terms of
29
* the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30
* in which case the provisions of the LGPLv2+ are applicable
31
* instead of those above.
32
*/
33
34
#include <algorithm>
35
#include <iomanip>
36
#include <iostream>
37
#include <limits>
38
#include <set>
39
#include <sstream>
40
41
#include <librevenge/librevenge.h>
42
43
#include "MWAWTextListener.hxx"
44
#include "MWAWFont.hxx"
45
#include "MWAWFontConverter.hxx"
46
#include "MWAWHeader.hxx"
47
#include "MWAWParagraph.hxx"
48
#include "MWAWPictData.hxx"
49
#include "MWAWPosition.hxx"
50
#include "MWAWPrinter.hxx"
51
#include "MWAWRSRCParser.hxx"
52
#include "MWAWSubDocument.hxx"
53
54
#include "libmwaw_internal.hxx"
55
56
#include "EDocParser.hxx"
57
58
/** Internal: the structures of a EDocParser */
59
namespace EDocParserInternal
60
{
61
////////////////////////////////////////
62
//! Internal: an index of a EDocParser
63
struct Index {
64
  //! constructor
65
  Index()
66
0
    : m_levelId(0)
67
0
    , m_text("")
68
0
    , m_page(-1)
69
0
    , m_extra("")
70
0
  {
71
0
  }
72
  //! operator<<
73
  friend std::ostream &operator<<(std::ostream &o, Index const &index)
74
0
  {
75
0
    if (index.m_text.length()) o << "text=\"" << index.m_text << "\",";
76
0
    if (index.m_levelId) o << "levelId=" << index.m_levelId << ",";
77
0
    if (index.m_page>0) o << "page=" << index.m_page << ",";
78
0
    o << index.m_extra;
79
0
    return o;
80
0
  }
81
  //! the font id
82
  int m_levelId;
83
  //! the text
84
  std::string m_text;
85
  //! the page number
86
  int m_page;
87
  //! extra data
88
  std::string m_extra;
89
};
90
91
////////////////////////////////////////
92
//! Internal: the state of a EDocParser
93
struct State {
94
  //! constructor
95
  State()
96
6.42k
    : m_compressed(false)
97
6.42k
    , m_maxPictId(0)
98
6.42k
    , m_idCPICMap()
99
6.42k
    , m_idPICTMap()
100
6.42k
    , m_indexList()
101
6.42k
    , m_actPage(0)
102
6.42k
    , m_numPages(0)
103
6.42k
    , m_headerHeight(0)
104
6.42k
    , m_footerHeight(0)
105
6.42k
  {
106
6.42k
  }
107
  //! a flag to know if the data are compressed or not
108
  bool m_compressed;
109
  //! the maximum of picture to read
110
  int m_maxPictId;
111
  //! a map id -> cPIC zone
112
  std::map<int,MWAWEntry> m_idCPICMap;
113
  //! a map id -> PICT zone
114
  std::map<int,MWAWEntry> m_idPICTMap;
115
  //! the index list
116
  std::vector<Index> m_indexList;
117
  int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */;
118
119
  int m_headerHeight /** the header height if known */,
120
      m_footerHeight /** the footer height if known */;
121
};
122
123
}
124
125
////////////////////////////////////////////////////////////
126
// constructor/destructor, ...
127
////////////////////////////////////////////////////////////
128
EDocParser::EDocParser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header)
129
2.65k
  : MWAWTextParser(input, rsrcParser, header)
130
2.65k
  , m_state()
131
2.65k
{
132
2.65k
  init();
133
2.65k
}
134
135
EDocParser::~EDocParser()
136
2.65k
{
137
2.65k
}
138
139
void EDocParser::init()
140
2.65k
{
141
2.65k
  resetTextListener();
142
143
2.65k
  m_state.reset(new EDocParserInternal::State);
144
145
  // no margins ( ie. the document is a set of picture corresponding to each page )
146
2.65k
  getPageSpan().setMargins(0.01);
147
2.65k
}
148
149
MWAWInputStreamPtr EDocParser::rsrcInput()
150
2.28k
{
151
2.28k
  return getRSRCParser()->getInput();
152
2.28k
}
153
154
libmwaw::DebugFile &EDocParser::rsrcAscii()
155
2.28k
{
156
2.28k
  return getRSRCParser()->ascii();
157
2.28k
}
158
159
////////////////////////////////////////////////////////////
160
// new page
161
////////////////////////////////////////////////////////////
162
void EDocParser::newPage(int number)
163
1.96k
{
164
1.96k
  if (number <= m_state->m_actPage || number > m_state->m_numPages)
165
0
    return;
166
167
3.92k
  while (m_state->m_actPage < number) {
168
1.96k
    m_state->m_actPage++;
169
1.96k
    if (!getTextListener() || m_state->m_actPage == 1)
170
978
      continue;
171
985
    getTextListener()->insertBreak(MWAWTextListener::PageBreak);
172
985
  }
173
1.96k
}
174
175
////////////////////////////////////////////////////////////
176
// the parser
177
////////////////////////////////////////////////////////////
178
void EDocParser::parse(librevenge::RVNGTextInterface *docInterface)
179
1.10k
{
180
1.10k
  if (!getInput().get() || !getRSRCParser() || !checkHeader(nullptr))  throw(libmwaw::ParseException());
181
1.10k
  bool ok = false;
182
1.10k
  try {
183
1.10k
    checkHeader(nullptr);
184
1.10k
    ok = createZones();
185
1.10k
    if (ok) {
186
1.10k
      createDocument(docInterface);
187
1.10k
      sendContents();
188
#ifdef DEBUG
189
      flushExtra();
190
#endif
191
1.10k
    }
192
1.10k
    ascii().reset();
193
1.10k
  }
194
1.10k
  catch (...) {
195
0
    MWAW_DEBUG_MSG(("EDocParser::parse: exception catched when parsing\n"));
196
0
    ok = false;
197
0
  }
198
199
1.10k
  resetTextListener();
200
1.10k
  if (!ok) throw(libmwaw::ParseException());
201
1.10k
}
202
203
////////////////////////////////////////////////////////////
204
// create the document
205
////////////////////////////////////////////////////////////
206
void EDocParser::createDocument(librevenge::RVNGTextInterface *documentInterface)
207
1.10k
{
208
1.10k
  if (!documentInterface) return;
209
1.10k
  if (getTextListener()) {
210
0
    MWAW_DEBUG_MSG(("EDocParser::createDocument: listener already exist\n"));
211
0
    return;
212
0
  }
213
214
  // update the page
215
1.10k
  m_state->m_actPage = 0;
216
217
  // create the page list
218
1.10k
  int numPages = m_state->m_maxPictId;
219
1.10k
  if (!m_state->m_indexList.empty())
220
0
    numPages++;
221
1.10k
  if (numPages <= 0) numPages=1;
222
1.10k
  m_state->m_numPages=numPages;
223
224
1.10k
  MWAWPageSpan ps(getPageSpan());
225
1.10k
  ps.setPageSpan(numPages+1);
226
1.10k
  std::vector<MWAWPageSpan> pageList(1,ps);
227
  //
228
1.10k
  MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface));
229
1.10k
  setTextListener(listen);
230
1.10k
  listen->startDocument();
231
1.10k
}
232
233
234
////////////////////////////////////////////////////////////
235
//
236
// Intermediate level
237
//
238
////////////////////////////////////////////////////////////
239
bool EDocParser::createZones()
240
1.10k
{
241
1.10k
  MWAWRSRCParserPtr rsrcParser = getRSRCParser();
242
1.10k
  auto const &entryMap = rsrcParser->getEntriesMap();
243
244
  // the 128 zone
245
1.10k
  auto it = entryMap.lower_bound("eDcF");
246
2.00k
  while (it != entryMap.end()) {
247
1.00k
    if (it->first != "eDcF")
248
106
      break;
249
897
    MWAWEntry const &entry = it++->second;
250
897
    readFontsName(entry);
251
897
  }
252
1.10k
  it = entryMap.lower_bound("eIdx");
253
1.10k
  while (it != entryMap.end()) {
254
76
    if (it->first != "eIdx")
255
76
      break;
256
0
    MWAWEntry const &entry = it++->second;
257
0
    readIndex(entry);
258
0
  }
259
1.10k
  it = entryMap.lower_bound("Info");
260
2.96k
  while (it != entryMap.end()) {
261
2.91k
    if (it->first != "Info")
262
1.05k
      break;
263
1.85k
    MWAWEntry const &entry = it++->second;
264
1.85k
    readInfo(entry);
265
1.85k
  }
266
1.10k
  bool res=findContents();
267
#ifdef DEBUG_WITH_FILES
268
  // get rid of the default application resource
269
  libmwaw::DebugFile &ascFile = rsrcAscii();
270
  static char const* const appliRsrc[]= {
271
    // default, Dialog (3000: DLOG,DITL,DLGX,dctb","ictb","STR ")
272
    "ALRT","BNDL","CNTL","CURS","CDEF", "DLOG","DLGX","DITL","FREF","ICON",
273
    "ICN#","MENU","SIZE","WIND",
274
    "cicn","crsr","dctb","icl4","icl8", "ics4","ics8","ics#","ictb","mstr",
275
    "snd ",
276
    "eSRD"
277
  };
278
  for (int r=0; r < 14+11+1; r++) {
279
    it = entryMap.lower_bound(appliRsrc[r]);
280
    while (it != entryMap.end()) {
281
      if (it->first != appliRsrc[r])
282
        break;
283
      MWAWEntry const &entry = it++->second;
284
      if (entry.isParsed()) continue;
285
      entry.setParsed(true);
286
      ascFile.skipZone(entry.begin()-4,entry.end()-1);
287
    }
288
  }
289
#endif
290
1.10k
  return res;
291
1.10k
}
292
293
bool EDocParser::findContents()
294
1.10k
{
295
1.10k
  MWAWRSRCParserPtr rsrcParser = getRSRCParser();
296
1.10k
  auto const &entryMap = rsrcParser->getEntriesMap();
297
298
  /* if the data is compress, we must look for cPIC zone ; if not,
299
     we look for the first PICT zone.
300
     Note: maybe we can also find text in TEXT zone, but I never see that
301
   */
302
1.10k
  char const *wh[2]= {"cPIC", "PICT"};
303
3.31k
  for (int st = 0; st < 2; st++) {
304
2.20k
    std::map<int, MWAWEntry> &map=st==0 ? m_state->m_idCPICMap : m_state->m_idPICTMap;
305
2.20k
    std::set<int> seens;
306
2.20k
    auto it = entryMap.lower_bound(wh[st]);
307
3.23k
    while (it != entryMap.end()) {
308
3.05k
      if (it->first != wh[st])
309
2.02k
        break;
310
1.02k
      MWAWEntry const &entry = it++->second;
311
1.02k
      map[entry.id()]= entry;
312
1.02k
      seens.insert(entry.id());
313
1.02k
    }
314
2.20k
    if (seens.empty() || m_state->m_maxPictId)
315
1.18k
      continue;
316
1.02k
    auto sIt=seens.lower_bound(1);
317
1.02k
    if (sIt==seens.end()|| *sIt>10)
318
42
      continue;
319
978
    int maxId=*sIt;
320
1.95k
    while (sIt!=seens.end() && *sIt<maxId+5)
321
978
      maxId=*(sIt++);
322
978
    m_state->m_maxPictId=maxId;
323
978
    m_state->m_compressed=(st==0);
324
978
  }
325
326
1.10k
  return true;
327
1.10k
}
328
329
bool EDocParser::sendContents()
330
1.10k
{
331
1.10k
  bool compressed=m_state->m_compressed;
332
1.10k
  int actPage=0;
333
3.06k
  for (int i=1; i <= m_state->m_maxPictId; i++) {
334
1.96k
    newPage(++actPage);
335
1.96k
    sendPicture(i, compressed);
336
1.96k
  }
337
1.10k
  if (!m_state->m_indexList.empty()) {
338
0
    newPage(++actPage);
339
0
    sendIndex();
340
0
  }
341
1.10k
  return true;
342
1.10k
}
343
344
bool EDocParser::sendPicture(int pictId, bool compressed)
345
1.96k
{
346
1.96k
  if (!getTextListener()) {
347
0
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: can not find the listener\n"));
348
0
    return false;
349
0
  }
350
1.96k
  std::map<int, MWAWEntry>::const_iterator it;
351
1.96k
  librevenge::RVNGBinaryData data;
352
1.96k
  if (compressed) {
353
1.96k
    it = m_state->m_idCPICMap.find(pictId);
354
1.96k
    if (it==m_state->m_idCPICMap.end() || !decodeZone(it->second,data))
355
1.24k
      return false;
356
1.96k
  }
357
0
  else {
358
0
    it = m_state->m_idPICTMap.find(pictId);
359
0
    if (it==m_state->m_idPICTMap.end() ||
360
0
        !getRSRCParser()->parsePICT(it->second, data))
361
0
      return false;
362
0
  }
363
364
721
  auto dataSz=int(data.size());
365
721
  if (!dataSz)
366
0
    return false;
367
721
  MWAWInputStreamPtr pictInput=MWAWInputStream::get(data, false);
368
721
  if (!pictInput) {
369
0
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: oops can not find an input\n"));
370
0
    return false;
371
0
  }
372
721
  MWAWBox2f box;
373
721
  auto res = MWAWPictData::check(pictInput, dataSz,box);
374
721
  if (res == MWAWPict::MWAW_R_BAD) {
375
230
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: can not find the picture\n"));
376
230
    return false;
377
230
  }
378
491
  pictInput->seek(0,librevenge::RVNG_SEEK_SET);
379
491
  std::shared_ptr<MWAWPict> thePict(MWAWPictData::get(pictInput, dataSz));
380
491
  MWAWPosition pictPos=MWAWPosition(MWAWVec2f(0,0),box.size(), librevenge::RVNG_POINT);
381
491
  pictPos.setRelativePosition(MWAWPosition::Char);
382
491
  if (thePict) {
383
491
    MWAWEmbeddedObject picture;
384
491
    if (thePict->getBinary(picture))
385
491
      getTextListener()->insertPicture(pictPos, picture);
386
491
  }
387
491
  return true;
388
721
}
389
390
void EDocParser::flushExtra()
391
0
{
392
#ifdef DEBUG
393
  for (auto const &rIt : m_state->m_idCPICMap) {
394
    MWAWEntry const &entry = rIt.second;
395
    if (entry.isParsed()) continue;
396
    sendPicture(entry.id(), true);
397
  }
398
  for (auto const &rIt : m_state->m_idPICTMap) {
399
    MWAWEntry const &entry = rIt.second;
400
    if (entry.isParsed()) continue;
401
    sendPicture(entry.id(), false);
402
  }
403
#endif
404
0
}
405
406
////////////////////////////////////////////////////////////
407
//
408
// Low level
409
//
410
////////////////////////////////////////////////////////////
411
412
413
// the font name
414
bool EDocParser::readFontsName(MWAWEntry const &entry)
415
897
{
416
897
  long length = entry.length();
417
897
  if (!entry.valid() || (length%0x100)!=2) {
418
407
    MWAW_DEBUG_MSG(("EDocParser::readFontsName: the entry seems very short\n"));
419
407
    return false;
420
407
  }
421
422
490
  entry.setParsed(true);
423
490
  long pos = entry.begin();
424
490
  MWAWInputStreamPtr input = rsrcInput();
425
490
  libmwaw::DebugFile &ascFile = rsrcAscii();
426
490
  input->seek(pos, librevenge::RVNG_SEEK_SET);
427
490
  libmwaw::DebugStream f;
428
490
  f << "Entries(FontsName):";
429
490
  if (entry.id()!=128)
430
2
    f << "#id=" << entry.id() << ",";
431
490
  auto N=static_cast<int>(input->readULong(2));
432
490
  f << "N=" << N << ",";
433
490
  ascFile.addPos(pos-4);
434
490
  ascFile.addNote(f.str().c_str());
435
490
  if (N*0x100+2!=length) {
436
150
    MWAW_DEBUG_MSG(("EDocParser::readFontsName: the number of elements seems bad\n"));
437
150
    return false;
438
150
  }
439
1.70k
  for (int i = 0; i < N; i++) {
440
1.36k
    pos = input->tell();
441
442
1.36k
    f.str("");
443
1.36k
    f << "FontsName-" << i << ":";
444
1.36k
    auto fSz=static_cast<int>(input->readULong(1));
445
1.36k
    if (!fSz || fSz >= 255) {
446
20
      f << "##" << fSz << ",";
447
20
      MWAW_DEBUG_MSG(("EDocParser::readFontsName: the font name %d seems bad\n", i));
448
20
    }
449
1.34k
    else {
450
1.34k
      std::string name("");
451
50.0k
      for (int c=0; c < fSz; c++)
452
48.7k
        name += char(input->readULong(1));
453
1.34k
      f << "\"" << name << "\",";
454
1.34k
    }
455
1.36k
    input->seek(pos+32, librevenge::RVNG_SEEK_SET);
456
153k
    for (int j = 0; j < 112; j++) { // always 0 .
457
152k
      auto val = static_cast<int>(input->readLong(2));
458
152k
      if (val) f << "f" << j << "=" << val << ",";
459
152k
    }
460
1.36k
    ascFile.addPos(pos);
461
1.36k
    ascFile.addNote(f.str().c_str());
462
1.36k
    input->seek(pos+0x100, librevenge::RVNG_SEEK_SET);
463
1.36k
  }
464
340
  return true;
465
490
}
466
467
// the index
468
bool EDocParser::sendIndex()
469
0
{
470
0
  if (!getTextListener()) {
471
0
    MWAW_DEBUG_MSG(("EDocParser::sendIndex: can not find the listener\n"));
472
0
    return false;
473
0
  }
474
0
  if (!m_state->m_indexList.size())
475
0
    return true;
476
477
0
  double w = getPageWidth();
478
0
  MWAWParagraph para;
479
0
  MWAWTabStop tab;
480
0
  tab.m_alignment = MWAWTabStop::RIGHT;
481
0
  tab.m_leaderCharacter='.';
482
0
  tab.m_position = w-0.3;
483
484
0
  para.m_tabs->push_back(tab);
485
0
  para.m_marginsUnit=librevenge::RVNG_INCH;
486
487
0
  MWAWFont cFont(3,10);
488
0
  cFont.setFlags(MWAWFont::boldBit);
489
0
  MWAWFont actFont(3,12);
490
491
0
  getTextListener()->insertEOL();
492
0
  std::stringstream ss;
493
0
  for (auto const &index : m_state->m_indexList) {
494
0
    para.m_margins[0] = 0.3*double(index.m_levelId+1);
495
0
    getTextListener()->setParagraph(para);
496
0
    getTextListener()->setFont(actFont);
497
0
    for (char c : index.m_text)
498
0
      getTextListener()->insertCharacter(static_cast<unsigned char>(c));
499
500
0
    if (index.m_page >= 0) {
501
0
      getTextListener()->setFont(cFont);
502
0
      getTextListener()->insertTab();
503
0
      ss.str("");
504
0
      ss << index.m_page;
505
0
      getTextListener()->insertUnicodeString(librevenge::RVNGString(ss.str().c_str()));
506
0
    }
507
0
    getTextListener()->insertEOL();
508
0
  }
509
0
  return true;
510
0
}
511
512
bool EDocParser::readIndex(MWAWEntry const &entry)
513
0
{
514
0
  long length = entry.length();
515
0
  if (!entry.valid() || length < 20) {
516
0
    MWAW_DEBUG_MSG(("EDocParser::readIndex: the entry seems very short\n"));
517
0
    return false;
518
0
  }
519
520
0
  entry.setParsed(true);
521
0
  long pos = entry.begin();
522
0
  long endPos = entry.end();
523
0
  MWAWInputStreamPtr input = rsrcInput();
524
0
  libmwaw::DebugFile &ascFile = rsrcAscii();
525
0
  input->seek(pos, librevenge::RVNG_SEEK_SET);
526
0
  libmwaw::DebugStream f;
527
0
  f << "Entries(Index):";
528
0
  if (entry.id()!=128)
529
0
    f << "#id=" << entry.id() << ",";
530
0
  auto val=static_cast<int>(input->readULong(2));
531
0
  if (val) // 100 ?
532
0
    f << "f0=" << std::hex << val << std::dec << ",";
533
0
  auto N=static_cast<int>(input->readULong(2));
534
0
  f << "N=" << N << ",";
535
0
  for (int i = 0; i < 8; i++) { // always 0
536
0
    val=static_cast<int>(input->readLong(2));
537
0
    if (val)
538
0
      f << "f" << i << "=" << val << ",";
539
0
  }
540
0
  ascFile.addPos(pos-4);
541
0
  ascFile.addNote(f.str().c_str());
542
0
  if (N*14+20>length) {
543
0
    MWAW_DEBUG_MSG(("EDocParser::readIndex: the number of elements seems bad\n"));
544
0
    return false;
545
0
  }
546
547
0
  for (int i = 0; i < N; i++) {
548
0
    pos = input->tell();
549
0
    f.str("");
550
0
    if (pos+14 > endPos) {
551
0
      f << "Index-" << i << ":###";
552
0
      ascFile.addPos(pos);
553
0
      ascFile.addNote(f.str().c_str());
554
555
0
      MWAW_DEBUG_MSG(("EDocParser::readIndex: can not read index %d\n", i));
556
0
      return false;
557
0
    }
558
0
    EDocParserInternal::Index index;
559
0
    val = static_cast<int>(input->readULong(1)); // 0|80
560
0
    if (val) f << "fl=" << std::hex << val << std::dec << ",";
561
0
    index.m_levelId = static_cast<int>(input->readULong(1));
562
0
    index.m_page = static_cast<int>(input->readLong(2));
563
    // f1: y pos, other 0
564
0
    for (int j = 0; j < 4; j++) {
565
0
      val = static_cast<int>(input->readLong(2));
566
0
      if (val)
567
0
        f << "f" << j << "=" << val << ",";
568
0
    }
569
0
    auto fSz = static_cast<int>(input->readULong(1));
570
0
    if (pos+13+fSz > endPos) {
571
0
      index.m_extra=f.str();
572
0
      f.str("");
573
0
      f << "Index-" << i << ":" << index << "###";
574
0
      ascFile.addPos(pos);
575
0
      ascFile.addNote(f.str().c_str());
576
577
0
      MWAW_DEBUG_MSG(("EDocParser::readIndex: can not read index %d text\n", i));
578
0
      return false;
579
0
    }
580
0
    std::string text("");
581
0
    for (int j = 0; j < fSz; j++)
582
0
      text += char(input->readULong(1));
583
0
    index.m_text=text;
584
0
    index.m_extra=f.str();
585
0
    m_state->m_indexList.push_back(index);
586
0
    f.str("");
587
0
    f << "Index-" << i << ":" << index;
588
0
    if ((fSz%2)==0) //
589
0
      input->seek(1,librevenge::RVNG_SEEK_CUR);
590
0
    ascFile.addPos(pos);
591
0
    ascFile.addNote(f.str().c_str());
592
0
  }
593
0
  return true;
594
0
}
595
596
// the document information
597
bool EDocParser::readInfo(MWAWEntry const &entry)
598
1.85k
{
599
1.85k
  long length = entry.length();
600
1.85k
  if (!entry.valid() || length < 0x68) {
601
1.01k
    MWAW_DEBUG_MSG(("EDocParser::readInfo: the entry seems very short\n"));
602
1.01k
    return false;
603
1.01k
  }
604
605
846
  entry.setParsed(true);
606
846
  long pos = entry.begin();
607
846
  long endPos = entry.end();
608
846
  MWAWInputStreamPtr input = rsrcInput();
609
846
  libmwaw::DebugFile &ascFile = rsrcAscii();
610
846
  input->seek(pos, librevenge::RVNG_SEEK_SET);
611
846
  libmwaw::DebugStream f;
612
846
  f << "Entries(Info):";
613
846
  if (entry.id()!=128)
614
327
    f << "#id=" << entry.id() << ",";
615
846
  int val;
616
4.23k
  for (int i = 0; i < 4; i++) { // f0=0, other big number
617
3.38k
    val = static_cast<int>(input->readULong(2));
618
3.38k
    if (val)
619
2.50k
      f << "f" << i << "=" << std::hex << val << std::dec << ",";
620
3.38k
  }
621
  // creator, file name
622
2.53k
  for (int i = 0; i < 2; i++) {
623
1.69k
    auto sz=static_cast<int>(input->readULong(1));
624
1.69k
    if (sz > 31) {
625
525
      MWAW_DEBUG_MSG(("EDocParser::readInfo: can not read string %d\n", i));
626
525
      f << "###,";
627
525
    }
628
1.16k
    else {
629
1.16k
      std::string name("");
630
8.24k
      for (int c=0; c < sz; c++)
631
7.07k
        name += char(input->readULong(1));
632
1.16k
      f << name << ",";
633
1.16k
    }
634
1.69k
    input->seek(pos+8+(i+1)*32, librevenge::RVNG_SEEK_SET);
635
1.69k
  }
636
5.07k
  for (int i = 0; i < 5; i++) { // always 4, 0, 210, 0, 0 ?
637
4.23k
    val = static_cast<int>(input->readLong(2));
638
4.23k
    if (val)
639
2.47k
      f << "g" << i << "=" << val << ",";
640
4.23k
  }
641
846
  int dim[2];
642
1.69k
  for (auto &d : dim) d = static_cast<int>(input->readLong(2));
643
846
  f << "dim?=" << dim[1] << "x" << dim[0] << ",";
644
846
  if (dim[1]>100 && dim[1]<2000 && dim[0]>100 && dim[0]< 2000) {
645
472
    getPageSpan().setFormLength(double(dim[0])/72.);
646
472
    getPageSpan().setFormWidth(double(dim[1])/72.);
647
472
  }
648
374
  else {
649
374
    MWAW_DEBUG_MSG(("EDocParser::readInfo: the page dimension seems bad\n"));
650
374
    f << "###,";
651
374
  }
652
846
  auto N=static_cast<int>(input->readLong(2));
653
846
  f << "numPict?=" << N << ","; // seems ok in eDcR, but no in eSRD
654
2.53k
  for (int i = 0; i < 2; i++) { // fl0=hasIndex ?, fl1=0
655
1.69k
    val = static_cast<int>(input->readLong(1));
656
1.69k
    if (val)
657
132
      f << "fl" << i << "=" << val << ",";
658
1.69k
  }
659
846
  val = static_cast<int>(input->readLong(2)); // 0 or bf
660
846
  if (val)
661
66
    f << "g5=" << val << ",";
662
3.38k
  for (int i = 0; i < 3; i++) { // 3 big number: some size?
663
2.53k
    val = static_cast<int>(input->readULong(4));
664
2.53k
    if (val)
665
2.14k
      f << "f" << i << "=" << std::hex << val << std::dec << ",";
666
2.53k
  }
667
846
  ascFile.addDelimiter(input->tell(),'|');
668
846
  ascFile.addPos(pos-4);
669
846
  ascFile.addNote(f.str().c_str());
670
846
  if (input->tell() != endPos) {
671
373
    ascFile.addPos(input->tell());
672
373
    ascFile.addNote("Info(II)");
673
373
  }
674
846
  return true;
675
1.85k
}
676
677
// code to uncompress data ( very low level)
678
namespace EDocParserInternal
679
{
680
//! very low structure to help uncompress data
681
struct DeflateStruct {
682
  //! constructor
683
  DeflateStruct(long size, long initSize)
684
922
    : m_toWrite(size)
685
922
    , m_data()
686
922
    , m_circQueue(0x2000,0)
687
922
    , m_circQueuePos(0)
688
922
    , m_numDelayed(0)
689
922
    , m_delayedChar('\0')
690
922
  {
691
922
    m_data.reserve(size_t(initSize));
692
922
  }
693
  //! true if we have build of the data
694
  bool isEnd() const
695
246k
  {
696
246k
    return m_toWrite <= 0;
697
246k
  }
698
  //! push a new character
699
  bool push(unsigned char c)
700
729k
  {
701
729k
    if (m_toWrite <= 0) return false;
702
727k
    m_circQueue[m_circQueuePos++]=c;
703
727k
    if (m_circQueuePos==0x2000)
704
0
      m_circQueuePos=0;
705
727k
    if (m_numDelayed)
706
14.6k
      return treatDelayed(c);
707
712k
    if (c==0x81 && m_toWrite!=1) {
708
9.21k
      m_numDelayed++;
709
9.21k
      return true;
710
9.21k
    }
711
703k
    m_delayedChar=c;
712
703k
    m_data.push_back(c);
713
703k
    m_toWrite--;
714
703k
    return true;
715
712k
  }
716
  //! send a duplicated part of the data
717
  bool sendDuplicated(int num, int depl);
718
  //! check if there is delayed char, if so treat them
719
  bool treatDelayed(unsigned char c);
720
  //! return the content of the block in dt
721
  bool getBinaryData(librevenge::RVNGBinaryData &dt) const
722
721
  {
723
721
    dt.clear();
724
721
    if (m_data.empty()) return false;
725
721
    unsigned char const *firstPos=&m_data[0];
726
721
    dt.append(firstPos, static_cast<unsigned long>(m_data.size()));
727
721
    return true;
728
721
  }
729
protected:
730
  //! the number of data that we need to write
731
  long m_toWrite;
732
  //! the resulting data
733
  std::vector<unsigned char> m_data;
734
735
  //! a circular queue
736
  std::vector<unsigned char> m_circQueue;
737
  //! the position in the circular queue
738
  size_t m_circQueuePos;
739
  //! the number of character delayed
740
  int m_numDelayed;
741
  //! the delayed character
742
  unsigned char m_delayedChar;
743
private:
744
  DeflateStruct(DeflateStruct const &orig) = delete;
745
  DeflateStruct &operator=(DeflateStruct const &orig) = delete;
746
};
747
748
bool DeflateStruct::sendDuplicated(int num, int depl)
749
91.7k
{
750
91.7k
  int readPos=int(m_circQueuePos)+depl;
751
100k
  while (readPos < 0) readPos+=0x2000;
752
91.7k
  while (readPos >= 0x2000) readPos-=0x2000;
753
754
668k
  while (num-->0) {
755
577k
    push(m_circQueue[size_t(readPos++)]);
756
577k
    if (readPos==0x2000)
757
411
      readPos=0;
758
577k
  }
759
91.7k
  return true;
760
91.7k
}
761
bool DeflateStruct::treatDelayed(unsigned char c)
762
14.6k
{
763
14.6k
  if (m_toWrite <= 0)
764
0
    return false;
765
14.6k
  if (m_numDelayed==1) {
766
9.21k
    if (c==0x82) {
767
5.48k
      m_numDelayed++;
768
5.48k
      return true;
769
5.48k
    }
770
3.73k
    m_delayedChar=0x81;
771
3.73k
    m_data.push_back(m_delayedChar);
772
3.73k
    if (--m_toWrite==0) return true;
773
3.72k
    if (c==0x81 && m_toWrite==1)
774
3
      return true;
775
3.72k
    m_numDelayed=0;
776
3.72k
    m_delayedChar=c;
777
3.72k
    m_data.push_back(c);
778
3.72k
    m_toWrite--;
779
3.72k
    return true;
780
3.72k
  }
781
782
5.48k
  m_numDelayed=0;
783
5.48k
  if (c==0) {
784
1.42k
    m_data.push_back(0x81);
785
1.42k
    if (--m_toWrite==0) return true;
786
1.42k
    m_delayedChar=0x82;
787
1.42k
    m_data.push_back(m_delayedChar);
788
1.42k
    m_toWrite--;
789
1.42k
    return true;
790
1.42k
  }
791
4.05k
  if (c-1 > m_toWrite) return false;
792
50.5k
  for (int i = 0; i < int(c-1); i++)
793
46.5k
    m_data.push_back(m_delayedChar);
794
4.00k
  m_toWrite -= (c-1);
795
4.00k
  return true;
796
4.05k
}
797
}
798
799
bool EDocParser::decodeZone(MWAWEntry const &entry, librevenge::RVNGBinaryData &data)
800
978
{
801
978
  data.clear();
802
978
  long length = entry.length();
803
978
  if (!entry.valid() || length<0x21+12) {
804
32
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: the entry seems very short\n"));
805
32
    return false;
806
32
  }
807
946
  entry.setParsed(true);
808
946
  long pos = entry.begin();
809
946
  long endPos = entry.end();
810
946
  MWAWInputStreamPtr input = rsrcInput();
811
946
  libmwaw::DebugFile &ascFile = rsrcAscii();
812
946
  input->seek(pos, librevenge::RVNG_SEEK_SET);
813
814
946
  libmwaw::DebugStream f;
815
946
  f << "Entries(CompressZone):";
816
946
  if (long(input->readULong(4))!=length) {
817
23
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected zone size\n"));
818
23
    return false;
819
23
  }
820
923
  auto zoneSize=long(input->readULong(4));
821
923
  f << "sz[final]=" << std::hex << zoneSize << std::dec << ",";
822
823
923
  if (!zoneSize) {
824
1
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected final zone size\n"));
825
1
    return false;
826
1
  }
827
922
  f << "checkSum=" << std::hex << input->readULong(4) << std::dec << ",";
828
829
922
  ascFile.addPos(pos-4);
830
922
  ascFile.addNote(f.str().c_str());
831
832
  // make an initial size estimate to avoid big allocation in case zoneSize is damaged
833
922
  const long maxInputSize = input->size() - input->tell();
834
922
  const long initSize = (zoneSize / 4 > maxInputSize) ? 4 * maxInputSize : zoneSize;
835
922
  EDocParserInternal::DeflateStruct deflate(zoneSize, initSize);
836
922
  int const maxData[]= {0x80, 0x20, 0x40};
837
922
  int val;
838
839
1.64k
  while (!deflate.isEnd() && input->tell() < endPos-3) {
840
    // only find a simple compress zone but seems ok to have more
841
922
    std::vector<unsigned char> vectors32K[3];
842
922
    std::vector<unsigned char> originalValues[3];
843
3.18k
    for (int st=0; st < 3; st++) {
844
2.46k
      pos=input->tell();
845
2.46k
      f.str("");
846
2.46k
      f << "CompressZone[data" << st << "]:";
847
2.46k
      auto num=static_cast<int>(input->readULong(1));
848
2.46k
      f << "num=" << num << ",";
849
2.46k
      if (num > maxData[st] || pos+1+num > endPos) {
850
52
        MWAW_DEBUG_MSG(("EDocParser::decodeZone: find unexpected num of data : %d for zone %d\n", num, st));
851
52
        f << "###";
852
853
52
        ascFile.addPos(pos);
854
52
        ascFile.addNote(f.str().c_str());
855
52
        return false;
856
52
      }
857
2.41k
      std::multimap<int,int> mapData;
858
2.41k
      originalValues[st].resize(size_t(maxData[st])*2, 0);
859
135k
      for (int i = 0; i < num; i++) {
860
132k
        val=static_cast<int>(input->readULong(1));
861
398k
        for (int b=0; b < 2; b++) {
862
265k
          int byte= b==0 ? (val>>4) : (val&0xF);
863
265k
          originalValues[st][size_t(2*i+b)]=static_cast<unsigned char>(byte);
864
265k
          if (byte==0)
865
160k
            continue;
866
105k
          mapData.insert(std::multimap<int,int>::value_type(byte,2*i+b));
867
105k
        }
868
132k
      }
869
2.41k
      vectors32K[st].resize(0x8000,0);
870
2.41k
      int writePos=0;
871
94.7k
      for (auto const &it : mapData) {
872
94.7k
        int n=0x8000>>(it.first);
873
94.7k
        if (writePos+n>0x8000) {
874
149
          MWAW_DEBUG_MSG(("EDocParser::decodeZone: find unexpected value writePos=%x for zone %d\n",static_cast<unsigned int>(writePos+n), st));
875
876
149
          f << "###";
877
878
149
          ascFile.addPos(pos);
879
149
          ascFile.addNote(f.str().c_str());
880
149
          return false;
881
149
        }
882
71.3M
        for (int j = 0; j < n; j++)
883
71.2M
          vectors32K[st][size_t(writePos++)]=static_cast<unsigned char>(it.second);
884
94.5k
      }
885
886
2.26k
      ascFile.addPos(pos);
887
2.26k
      ascFile.addNote(f.str().c_str());
888
2.26k
    }
889
721
    pos = input->tell();
890
721
    int byte=0;
891
721
    long maxBlockSz=0xFFF0;
892
721
    unsigned int value=(static_cast<unsigned int>(input->readULong(2)))<<16;
893
244k
    while (maxBlockSz) {
894
244k
      if (deflate.isEnd() || input->tell()>endPos) break;
895
244k
      int ind0=(value>>16);
896
244k
      if (ind0 & 0x8000) {
897
152k
        auto ind1 = static_cast<int>(vectors32K[0][size_t(ind0&0x7FFF)]);
898
152k
        int byt1=originalValues[0][size_t(ind1)]+1;
899
152k
        if (byte<byt1) {
900
65.1k
          value = (value<<byte);
901
65.1k
          byt1 -= byte;
902
65.1k
          value |= static_cast<unsigned int>(input->readULong(2));
903
65.1k
          byte=16;
904
65.1k
        }
905
152k
        value=(value<<byt1);
906
152k
        byte-=byt1;
907
908
152k
        deflate.push(static_cast<unsigned char>(ind1));
909
152k
        maxBlockSz-=2;
910
152k
        continue;
911
152k
      }
912
913
91.7k
      auto ind1 = static_cast<int>(vectors32K[1][size_t(ind0)]);
914
91.7k
      int byt1 = originalValues[1][size_t(ind1)]+1;
915
91.7k
      if (byte<byt1) {
916
17.6k
        value = (value<<byte);
917
17.6k
        byt1 -= byte;
918
17.6k
        value |= static_cast<unsigned int>(input->readULong(2));
919
17.6k
        byte=16;
920
17.6k
      }
921
91.7k
      value=(value<<byt1);
922
91.7k
      byte-=byt1;
923
91.7k
      auto ind2 = static_cast<int>(vectors32K[2][size_t(value>>17)]);
924
91.7k
      int byt2=originalValues[2][size_t(ind2)];
925
91.7k
      if (byte<byt2) {
926
10.8k
        value = (value<<byte);
927
10.8k
        byt2 -= byte;
928
10.8k
        value |= static_cast<unsigned int>(input->readULong(2));
929
10.8k
        byte=16;
930
10.8k
      }
931
91.7k
      value=(value<<byt2);
932
91.7k
      byte-=byt2;
933
934
91.7k
      ind2=int(value>>26) | (ind2<<6);
935
91.7k
      int byt3=6;
936
91.7k
      if (byte<byt3) {
937
35.4k
        value = (value<<byte);
938
35.4k
        byt3 -= byte;
939
35.4k
        value |= static_cast<unsigned int>(input->readULong(2));
940
35.4k
        byte=16;
941
35.4k
      }
942
91.7k
      value=(value<<byt3);
943
91.7k
      byte-=byt3;
944
91.7k
      deflate.sendDuplicated(ind1, -ind2);
945
91.7k
      maxBlockSz-=3;
946
91.7k
    }
947
721
  }
948
949
721
  if (input->tell()!=endPos) {
950
718
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected end of data\n"));
951
718
    ascFile.addPos(input->tell());
952
718
    ascFile.addNote("CompressZone[after]");
953
718
  }
954
721
  bool res = deflate.getBinaryData(data);
955
721
  ascFile.skipZone(pos,input->tell()-1);
956
#if defined(DEBUG_WITH_FILES)
957
  if (res) {
958
    static int volatile cPictName = 0;
959
    libmwaw::DebugStream f2;
960
    f2 << "CPICT" << ++cPictName << ".pct";
961
    libmwaw::Debug::dumpFile(data, f2.str().c_str());
962
  }
963
#endif
964
721
  return res;
965
922
}
966
967
////////////////////////////////////////////////////////////
968
// read the header
969
////////////////////////////////////////////////////////////
970
bool EDocParser::checkHeader(MWAWHeader *header, bool strict)
971
3.76k
{
972
3.76k
  *m_state = EDocParserInternal::State();
973
  /** no data fork, may be ok, but this means
974
      that the file contains no text, so... */
975
3.76k
  MWAWInputStreamPtr input = getInput();
976
3.76k
  if (!input || !getRSRCParser())
977
65
    return false;
978
3.69k
  if (input->hasDataFork()) {
979
4
    MWAW_DEBUG_MSG(("EDocParser::checkHeader: find a datafork, odd!!!\n"));
980
4
  }
981
3.69k
  if (strict) {
982
    // check that the fontname zone exists
983
1.25k
    auto const &entryMap = getRSRCParser()->getEntriesMap();
984
1.25k
    if (entryMap.find("eDcF") == entryMap.end())
985
240
      return false;
986
1.25k
  }
987
3.45k
  if (header)
988
1.24k
    header->reset(MWAWDocument::MWAW_T_EDOC, version());
989
990
3.45k
  return true;
991
3.69k
}
992
993
// vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: