Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmwaw/src/lib/EDocParser.cxx
Line
Count
Source
1
/* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3
/* libmwaw
4
* Version: MPL 2.0 / LGPLv2+
5
*
6
* The contents of this file are subject to the Mozilla Public License Version
7
* 2.0 (the "License"); you may not use this file except in compliance with
8
* the License or as specified alternatively below. You may obtain a copy of
9
* the License at http://www.mozilla.org/MPL/
10
*
11
* Software distributed under the License is distributed on an "AS IS" basis,
12
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13
* for the specific language governing rights and limitations under the
14
* License.
15
*
16
* Major Contributor(s):
17
* Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18
* Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19
* Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20
* Copyright (C) 2006, 2007 Andrew Ziem
21
* Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22
*
23
*
24
* All Rights Reserved.
25
*
26
* For minor contributions see the git repository.
27
*
28
* Alternatively, the contents of this file may be used under the terms of
29
* the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30
* in which case the provisions of the LGPLv2+ are applicable
31
* instead of those above.
32
*/
33
34
#include <algorithm>
35
#include <iomanip>
36
#include <iostream>
37
#include <limits>
38
#include <set>
39
#include <sstream>
40
41
#include <librevenge/librevenge.h>
42
43
#include "MWAWTextListener.hxx"
44
#include "MWAWFont.hxx"
45
#include "MWAWFontConverter.hxx"
46
#include "MWAWHeader.hxx"
47
#include "MWAWParagraph.hxx"
48
#include "MWAWPictData.hxx"
49
#include "MWAWPosition.hxx"
50
#include "MWAWPrinter.hxx"
51
#include "MWAWRSRCParser.hxx"
52
#include "MWAWSubDocument.hxx"
53
54
#include "libmwaw_internal.hxx"
55
56
#include "EDocParser.hxx"
57
58
/** Internal: the structures of a EDocParser */
59
namespace EDocParserInternal
60
{
61
////////////////////////////////////////
62
//! Internal: an index of a EDocParser
63
struct Index {
64
  //! constructor
65
  Index()
66
0
    : m_levelId(0)
67
0
    , m_text("")
68
0
    , m_page(-1)
69
0
    , m_extra("")
70
0
  {
71
0
  }
72
  //! operator<<
73
  friend std::ostream &operator<<(std::ostream &o, Index const &index)
74
0
  {
75
0
    if (index.m_text.length()) o << "text=\"" << index.m_text << "\",";
76
0
    if (index.m_levelId) o << "levelId=" << index.m_levelId << ",";
77
0
    if (index.m_page>0) o << "page=" << index.m_page << ",";
78
0
    o << index.m_extra;
79
0
    return o;
80
0
  }
81
  //! the font id
82
  int m_levelId;
83
  //! the text
84
  std::string m_text;
85
  //! the page number
86
  int m_page;
87
  //! extra data
88
  std::string m_extra;
89
};
90
91
////////////////////////////////////////
92
//! Internal: the state of a EDocParser
93
struct State {
94
  //! constructor
95
  State()
96
8.16k
    : m_compressed(false)
97
8.16k
    , m_maxPictId(0)
98
8.16k
    , m_idCPICMap()
99
8.16k
    , m_idPICTMap()
100
8.16k
    , m_indexList()
101
8.16k
    , m_actPage(0)
102
8.16k
    , m_numPages(0)
103
8.16k
    , m_headerHeight(0)
104
8.16k
    , m_footerHeight(0)
105
8.16k
  {
106
8.16k
  }
107
  //! a flag to know if the data are compressed or not
108
  bool m_compressed;
109
  //! the maximum of picture to read
110
  int m_maxPictId;
111
  //! a map id -> cPIC zone
112
  std::map<int, MWAWEntry> m_idCPICMap;
113
  //! a map id -> PICT zone
114
  std::map<int, MWAWEntry> m_idPICTMap;
115
  //! the index list
116
  std::vector<Index> m_indexList;
117
  int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */;
118
119
  int m_headerHeight /** the header height if known */,
120
      m_footerHeight /** the footer height if known */;
121
};
122
123
}
124
125
////////////////////////////////////////////////////////////
126
// constructor/destructor, ...
127
////////////////////////////////////////////////////////////
128
EDocParser::EDocParser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header)
129
3.37k
  : MWAWTextParser(input, rsrcParser, header)
130
3.37k
  , m_state()
131
3.37k
{
132
3.37k
  init();
133
3.37k
}
134
135
EDocParser::~EDocParser()
136
3.37k
{
137
3.37k
}
138
139
void EDocParser::init()
140
3.37k
{
141
3.37k
  resetTextListener();
142
143
3.37k
  m_state.reset(new EDocParserInternal::State);
144
145
  // no margins ( ie. the document is a set of picture corresponding to each page )
146
3.37k
  getPageSpan().setMargins(0.01);
147
3.37k
}
148
149
MWAWInputStreamPtr EDocParser::rsrcInput()
150
2.97k
{
151
2.97k
  return getRSRCParser()->getInput();
152
2.97k
}
153
154
libmwaw::DebugFile &EDocParser::rsrcAscii()
155
2.97k
{
156
2.97k
  return getRSRCParser()->ascii();
157
2.97k
}
158
159
////////////////////////////////////////////////////////////
160
// new page
161
////////////////////////////////////////////////////////////
162
void EDocParser::newPage(int number)
163
2.52k
{
164
2.52k
  if (number <= m_state->m_actPage || number > m_state->m_numPages)
165
0
    return;
166
167
5.04k
  while (m_state->m_actPage < number) {
168
2.52k
    m_state->m_actPage++;
169
2.52k
    if (!getTextListener() || m_state->m_actPage == 1)
170
1.25k
      continue;
171
1.26k
    getTextListener()->insertBreak(MWAWTextListener::PageBreak);
172
1.26k
  }
173
2.52k
}
174
175
////////////////////////////////////////////////////////////
176
// the parser
177
////////////////////////////////////////////////////////////
178
void EDocParser::parse(librevenge::RVNGTextInterface *docInterface)
179
1.41k
{
180
1.41k
  if (!getInput().get() || !getRSRCParser() || !checkHeader(nullptr))  throw(libmwaw::ParseException());
181
1.41k
  bool ok = false;
182
1.41k
  try {
183
1.41k
    checkHeader(nullptr);
184
1.41k
    ok = createZones();
185
1.41k
    if (ok) {
186
1.41k
      createDocument(docInterface);
187
1.41k
      sendContents();
188
#ifdef DEBUG
189
      flushExtra();
190
#endif
191
1.41k
    }
192
1.41k
    ascii().reset();
193
1.41k
  }
194
1.41k
  catch (...) {
195
0
    MWAW_DEBUG_MSG(("EDocParser::parse: exception catched when parsing\n"));
196
0
    ok = false;
197
0
  }
198
199
1.41k
  resetTextListener();
200
1.41k
  if (!ok) throw(libmwaw::ParseException());
201
1.41k
}
202
203
////////////////////////////////////////////////////////////
204
// create the document
205
////////////////////////////////////////////////////////////
206
void EDocParser::createDocument(librevenge::RVNGTextInterface *documentInterface)
207
1.41k
{
208
1.41k
  if (!documentInterface) return;
209
1.41k
  if (getTextListener()) {
210
0
    MWAW_DEBUG_MSG(("EDocParser::createDocument: listener already exist\n"));
211
0
    return;
212
0
  }
213
214
  // update the page
215
1.41k
  m_state->m_actPage = 0;
216
217
  // create the page list
218
1.41k
  int numPages = m_state->m_maxPictId;
219
1.41k
  if (!m_state->m_indexList.empty())
220
0
    numPages++;
221
1.41k
  if (numPages <= 0) numPages=1;
222
1.41k
  m_state->m_numPages=numPages;
223
224
1.41k
  MWAWPageSpan ps(getPageSpan());
225
1.41k
  ps.setPageSpan(numPages+1);
226
1.41k
  std::vector<MWAWPageSpan> pageList(1,ps);
227
  //
228
1.41k
  MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface));
229
1.41k
  setTextListener(listen);
230
1.41k
  listen->startDocument();
231
1.41k
}
232
233
234
////////////////////////////////////////////////////////////
235
//
236
// Intermediate level
237
//
238
////////////////////////////////////////////////////////////
239
bool EDocParser::createZones()
240
1.41k
{
241
1.41k
  MWAWRSRCParserPtr rsrcParser = getRSRCParser();
242
1.41k
  auto const &entryMap = rsrcParser->getEntriesMap();
243
244
  // the 128 zone
245
1.41k
  auto it = entryMap.lower_bound("eDcF");
246
2.57k
  while (it != entryMap.end()) {
247
1.30k
    if (it->first != "eDcF")
248
141
      break;
249
1.16k
    MWAWEntry const &entry = it++->second;
250
1.16k
    readFontsName(entry);
251
1.16k
  }
252
1.41k
  it = entryMap.lower_bound("eIdx");
253
1.41k
  while (it != entryMap.end()) {
254
108
    if (it->first != "eIdx")
255
108
      break;
256
0
    MWAWEntry const &entry = it++->second;
257
0
    readIndex(entry);
258
0
  }
259
1.41k
  it = entryMap.lower_bound("Info");
260
3.77k
  while (it != entryMap.end()) {
261
3.71k
    if (it->first != "Info")
262
1.35k
      break;
263
2.36k
    MWAWEntry const &entry = it++->second;
264
2.36k
    readInfo(entry);
265
2.36k
  }
266
1.41k
  bool res=findContents();
267
#ifdef DEBUG_WITH_FILES
268
  // get rid of the default application resource
269
  libmwaw::DebugFile &ascFile = rsrcAscii();
270
  static char const* const appliRsrc[]= {
271
    // default, Dialog (3000: DLOG,DITL,DLGX,dctb","ictb","STR ")
272
    "ALRT","BNDL","CNTL","CURS","CDEF", "DLOG","DLGX","DITL","FREF","ICON",
273
    "ICN#","MENU","SIZE","WIND",
274
    "cicn","crsr","dctb","icl4","icl8", "ics4","ics8","ics#","ictb","mstr",
275
    "snd ",
276
    "eSRD"
277
  };
278
  for (int r=0; r < 14+11+1; r++) {
279
    it = entryMap.lower_bound(appliRsrc[r]);
280
    while (it != entryMap.end()) {
281
      if (it->first != appliRsrc[r])
282
        break;
283
      MWAWEntry const &entry = it++->second;
284
      if (entry.isParsed()) continue;
285
      entry.setParsed(true);
286
      ascFile.skipZone(entry.begin()-4,entry.end()-1);
287
    }
288
  }
289
#endif
290
1.41k
  return res;
291
1.41k
}
292
293
bool EDocParser::findContents()
294
1.41k
{
295
1.41k
  MWAWRSRCParserPtr rsrcParser = getRSRCParser();
296
1.41k
  auto const &entryMap = rsrcParser->getEntriesMap();
297
298
  /* if the data is compress, we must look for cPIC zone ; if not,
299
     we look for the first PICT zone.
300
     Note: maybe we can also find text in TEXT zone, but I never see that
301
   */
302
1.41k
  char const *wh[2]= {"cPIC", "PICT"};
303
4.23k
  for (int st = 0; st < 2; st++) {
304
2.82k
    std::map<int, MWAWEntry> &map=st==0 ? m_state->m_idCPICMap : m_state->m_idPICTMap;
305
2.82k
    std::set<int> seens;
306
2.82k
    auto it = entryMap.lower_bound(wh[st]);
307
4.16k
    while (it != entryMap.end()) {
308
3.93k
      if (it->first != wh[st])
309
2.59k
        break;
310
1.33k
      MWAWEntry const &entry = it++->second;
311
1.33k
      map[entry.id()]= entry;
312
1.33k
      seens.insert(entry.id());
313
1.33k
    }
314
2.82k
    if (seens.empty() || m_state->m_maxPictId)
315
1.50k
      continue;
316
1.31k
    auto sIt=seens.lower_bound(1);
317
1.31k
    if (sIt==seens.end()|| *sIt>10)
318
61
      continue;
319
1.25k
    int maxId=*sIt;
320
2.51k
    while (sIt!=seens.end() && *sIt<maxId+5)
321
1.25k
      maxId=*(sIt++);
322
1.25k
    m_state->m_maxPictId=maxId;
323
1.25k
    m_state->m_compressed=(st==0);
324
1.25k
  }
325
326
1.41k
  return true;
327
1.41k
}
328
329
bool EDocParser::sendContents()
330
1.41k
{
331
1.41k
  bool compressed=m_state->m_compressed;
332
1.41k
  int actPage=0;
333
3.93k
  for (int i=1; i <= m_state->m_maxPictId; i++) {
334
2.52k
    newPage(++actPage);
335
2.52k
    sendPicture(i, compressed);
336
2.52k
  }
337
1.41k
  if (!m_state->m_indexList.empty()) {
338
0
    newPage(++actPage);
339
0
    sendIndex();
340
0
  }
341
1.41k
  return true;
342
1.41k
}
343
344
bool EDocParser::sendPicture(int pictId, bool compressed)
345
2.52k
{
346
2.52k
  if (!getTextListener()) {
347
0
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: can not find the listener\n"));
348
0
    return false;
349
0
  }
350
2.52k
  std::map<int, MWAWEntry>::const_iterator it;
351
2.52k
  librevenge::RVNGBinaryData data;
352
2.52k
  if (compressed) {
353
2.52k
    it = m_state->m_idCPICMap.find(pictId);
354
2.52k
    if (it==m_state->m_idCPICMap.end() || !decodeZone(it->second,data))
355
1.61k
      return false;
356
2.52k
  }
357
0
  else {
358
0
    it = m_state->m_idPICTMap.find(pictId);
359
0
    if (it==m_state->m_idPICTMap.end() ||
360
0
        !getRSRCParser()->parsePICT(it->second, data))
361
0
      return false;
362
0
  }
363
364
910
  auto dataSz=int(data.size());
365
910
  if (!dataSz)
366
0
    return false;
367
910
  MWAWInputStreamPtr pictInput=MWAWInputStream::get(data, false);
368
910
  if (!pictInput) {
369
0
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: oops can not find an input\n"));
370
0
    return false;
371
0
  }
372
910
  MWAWBox2f box;
373
910
  auto res = MWAWPictData::check(pictInput, dataSz,box);
374
910
  if (res == MWAWPict::MWAW_R_BAD) {
375
314
    MWAW_DEBUG_MSG(("EDocParser::sendPicture: can not find the picture\n"));
376
314
    return false;
377
314
  }
378
596
  pictInput->seek(0,librevenge::RVNG_SEEK_SET);
379
596
  std::shared_ptr<MWAWPict> thePict(MWAWPictData::get(pictInput, dataSz));
380
596
  MWAWPosition pictPos=MWAWPosition(MWAWVec2f(0,0),box.size(), librevenge::RVNG_POINT);
381
596
  pictPos.setRelativePosition(MWAWPosition::Char);
382
596
  if (thePict) {
383
596
    MWAWEmbeddedObject picture;
384
596
    if (thePict->getBinary(picture))
385
596
      getTextListener()->insertPicture(pictPos, picture);
386
596
  }
387
596
  return true;
388
910
}
389
390
void EDocParser::flushExtra()
391
0
{
392
#ifdef DEBUG
393
  for (auto const &rIt : m_state->m_idCPICMap) {
394
    MWAWEntry const &entry = rIt.second;
395
    if (entry.isParsed()) continue;
396
    sendPicture(entry.id(), true);
397
  }
398
  for (auto const &rIt : m_state->m_idPICTMap) {
399
    MWAWEntry const &entry = rIt.second;
400
    if (entry.isParsed()) continue;
401
    sendPicture(entry.id(), false);
402
  }
403
#endif
404
0
}
405
406
////////////////////////////////////////////////////////////
407
//
408
// Low level
409
//
410
////////////////////////////////////////////////////////////
411
412
413
// the font name
414
bool EDocParser::readFontsName(MWAWEntry const &entry)
415
1.16k
{
416
1.16k
  long length = entry.length();
417
1.16k
  if (!entry.valid() || (length%0x100)!=2) {
418
488
    MWAW_DEBUG_MSG(("EDocParser::readFontsName: the entry seems very short\n"));
419
488
    return false;
420
488
  }
421
422
678
  entry.setParsed(true);
423
678
  long pos = entry.begin();
424
678
  MWAWInputStreamPtr input = rsrcInput();
425
678
  libmwaw::DebugFile &ascFile = rsrcAscii();
426
678
  input->seek(pos, librevenge::RVNG_SEEK_SET);
427
678
  libmwaw::DebugStream f;
428
678
  f << "Entries(FontsName):";
429
678
  if (entry.id()!=128)
430
3
    f << "#id=" << entry.id() << ",";
431
678
  auto N=static_cast<int>(input->readULong(2));
432
678
  f << "N=" << N << ",";
433
678
  ascFile.addPos(pos-4);
434
678
  ascFile.addNote(f.str().c_str());
435
678
  if (N*0x100+2!=length) {
436
194
    MWAW_DEBUG_MSG(("EDocParser::readFontsName: the number of elements seems bad\n"));
437
194
    return false;
438
194
  }
439
2.42k
  for (int i = 0; i < N; i++) {
440
1.93k
    pos = input->tell();
441
442
1.93k
    f.str("");
443
1.93k
    f << "FontsName-" << i << ":";
444
1.93k
    auto fSz=static_cast<int>(input->readULong(1));
445
1.93k
    if (!fSz || fSz >= 255) {
446
44
      f << "##" << fSz << ",";
447
44
      MWAW_DEBUG_MSG(("EDocParser::readFontsName: the font name %d seems bad\n", i));
448
44
    }
449
1.89k
    else {
450
1.89k
      std::string name("");
451
73.0k
      for (int c=0; c < fSz; c++)
452
71.1k
        name += char(input->readULong(1));
453
1.89k
      f << "\"" << name << "\",";
454
1.89k
    }
455
1.93k
    input->seek(pos+32, librevenge::RVNG_SEEK_SET);
456
218k
    for (int j = 0; j < 112; j++) { // always 0 .
457
216k
      auto val = static_cast<int>(input->readLong(2));
458
216k
      if (val) f << "f" << j << "=" << val << ",";
459
216k
    }
460
1.93k
    ascFile.addPos(pos);
461
1.93k
    ascFile.addNote(f.str().c_str());
462
1.93k
    input->seek(pos+0x100, librevenge::RVNG_SEEK_SET);
463
1.93k
  }
464
484
  return true;
465
678
}
466
467
// the index
468
bool EDocParser::sendIndex()
469
0
{
470
0
  if (!getTextListener()) {
471
0
    MWAW_DEBUG_MSG(("EDocParser::sendIndex: can not find the listener\n"));
472
0
    return false;
473
0
  }
474
0
  if (!m_state->m_indexList.size())
475
0
    return true;
476
477
0
  double w = getPageWidth();
478
0
  MWAWParagraph para;
479
0
  MWAWTabStop tab;
480
0
  tab.m_alignment = MWAWTabStop::RIGHT;
481
0
  tab.m_leaderCharacter='.';
482
0
  tab.m_position = w-0.3;
483
484
0
  para.m_tabs->push_back(tab);
485
0
  para.m_marginsUnit=librevenge::RVNG_INCH;
486
487
0
  MWAWFont cFont(3,10);
488
0
  cFont.setFlags(MWAWFont::boldBit);
489
0
  MWAWFont actFont(3,12);
490
491
0
  getTextListener()->insertEOL();
492
0
  std::stringstream ss;
493
0
  for (auto const &index : m_state->m_indexList) {
494
0
    para.m_margins[0] = 0.3*double(index.m_levelId+1);
495
0
    getTextListener()->setParagraph(para);
496
0
    getTextListener()->setFont(actFont);
497
0
    for (char c : index.m_text)
498
0
      getTextListener()->insertCharacter(static_cast<unsigned char>(c));
499
500
0
    if (index.m_page >= 0) {
501
0
      getTextListener()->setFont(cFont);
502
0
      getTextListener()->insertTab();
503
0
      ss.str("");
504
0
      ss << index.m_page;
505
0
      getTextListener()->insertUnicodeString(librevenge::RVNGString(ss.str().c_str()));
506
0
    }
507
0
    getTextListener()->insertEOL();
508
0
  }
509
0
  return true;
510
0
}
511
512
bool EDocParser::readIndex(MWAWEntry const &entry)
513
0
{
514
0
  long length = entry.length();
515
0
  if (!entry.valid() || length < 20) {
516
0
    MWAW_DEBUG_MSG(("EDocParser::readIndex: the entry seems very short\n"));
517
0
    return false;
518
0
  }
519
520
0
  entry.setParsed(true);
521
0
  long pos = entry.begin();
522
0
  long endPos = entry.end();
523
0
  MWAWInputStreamPtr input = rsrcInput();
524
0
  libmwaw::DebugFile &ascFile = rsrcAscii();
525
0
  input->seek(pos, librevenge::RVNG_SEEK_SET);
526
0
  libmwaw::DebugStream f;
527
0
  f << "Entries(Index):";
528
0
  if (entry.id()!=128)
529
0
    f << "#id=" << entry.id() << ",";
530
0
  auto val=static_cast<int>(input->readULong(2));
531
0
  if (val) // 100 ?
532
0
    f << "f0=" << std::hex << val << std::dec << ",";
533
0
  auto N=static_cast<int>(input->readULong(2));
534
0
  f << "N=" << N << ",";
535
0
  for (int i = 0; i < 8; i++) { // always 0
536
0
    val=static_cast<int>(input->readLong(2));
537
0
    if (val)
538
0
      f << "f" << i << "=" << val << ",";
539
0
  }
540
0
  ascFile.addPos(pos-4);
541
0
  ascFile.addNote(f.str().c_str());
542
0
  if (N*14+20>length) {
543
0
    MWAW_DEBUG_MSG(("EDocParser::readIndex: the number of elements seems bad\n"));
544
0
    return false;
545
0
  }
546
547
0
  for (int i = 0; i < N; i++) {
548
0
    pos = input->tell();
549
0
    f.str("");
550
0
    if (pos+14 > endPos) {
551
0
      f << "Index-" << i << ":###";
552
0
      ascFile.addPos(pos);
553
0
      ascFile.addNote(f.str().c_str());
554
555
0
      MWAW_DEBUG_MSG(("EDocParser::readIndex: can not read index %d\n", i));
556
0
      return false;
557
0
    }
558
0
    EDocParserInternal::Index index;
559
0
    val = static_cast<int>(input->readULong(1)); // 0|80
560
0
    if (val) f << "fl=" << std::hex << val << std::dec << ",";
561
0
    index.m_levelId = static_cast<int>(input->readULong(1));
562
0
    index.m_page = static_cast<int>(input->readLong(2));
563
    // f1: y pos, other 0
564
0
    for (int j = 0; j < 4; j++) {
565
0
      val = static_cast<int>(input->readLong(2));
566
0
      if (val)
567
0
        f << "f" << j << "=" << val << ",";
568
0
    }
569
0
    auto fSz = static_cast<int>(input->readULong(1));
570
0
    if (pos+13+fSz > endPos) {
571
0
      index.m_extra=f.str();
572
0
      f.str("");
573
0
      f << "Index-" << i << ":" << index << "###";
574
0
      ascFile.addPos(pos);
575
0
      ascFile.addNote(f.str().c_str());
576
577
0
      MWAW_DEBUG_MSG(("EDocParser::readIndex: can not read index %d text\n", i));
578
0
      return false;
579
0
    }
580
0
    std::string text("");
581
0
    for (int j = 0; j < fSz; j++)
582
0
      text += char(input->readULong(1));
583
0
    index.m_text=text;
584
0
    index.m_extra=f.str();
585
0
    m_state->m_indexList.push_back(index);
586
0
    f.str("");
587
0
    f << "Index-" << i << ":" << index;
588
0
    if ((fSz%2)==0) //
589
0
      input->seek(1,librevenge::RVNG_SEEK_CUR);
590
0
    ascFile.addPos(pos);
591
0
    ascFile.addNote(f.str().c_str());
592
0
  }
593
0
  return true;
594
0
}
595
596
// the document information
597
bool EDocParser::readInfo(MWAWEntry const &entry)
598
2.36k
{
599
2.36k
  long length = entry.length();
600
2.36k
  if (!entry.valid() || length < 0x68) {
601
1.28k
    MWAW_DEBUG_MSG(("EDocParser::readInfo: the entry seems very short\n"));
602
1.28k
    return false;
603
1.28k
  }
604
605
1.08k
  entry.setParsed(true);
606
1.08k
  long pos = entry.begin();
607
1.08k
  long endPos = entry.end();
608
1.08k
  MWAWInputStreamPtr input = rsrcInput();
609
1.08k
  libmwaw::DebugFile &ascFile = rsrcAscii();
610
1.08k
  input->seek(pos, librevenge::RVNG_SEEK_SET);
611
1.08k
  libmwaw::DebugStream f;
612
1.08k
  f << "Entries(Info):";
613
1.08k
  if (entry.id()!=128)
614
416
    f << "#id=" << entry.id() << ",";
615
1.08k
  int val;
616
5.41k
  for (int i = 0; i < 4; i++) { // f0=0, other big number
617
4.33k
    val = static_cast<int>(input->readULong(2));
618
4.33k
    if (val)
619
3.23k
      f << "f" << i << "=" << std::hex << val << std::dec << ",";
620
4.33k
  }
621
  // creator, file name
622
3.24k
  for (int i = 0; i < 2; i++) {
623
2.16k
    auto sz=static_cast<int>(input->readULong(1));
624
2.16k
    if (sz > 31) {
625
670
      MWAW_DEBUG_MSG(("EDocParser::readInfo: can not read string %d\n", i));
626
670
      f << "###,";
627
670
    }
628
1.49k
    else {
629
1.49k
      std::string name("");
630
10.9k
      for (int c=0; c < sz; c++)
631
9.47k
        name += char(input->readULong(1));
632
1.49k
      f << name << ",";
633
1.49k
    }
634
2.16k
    input->seek(pos+8+(i+1)*32, librevenge::RVNG_SEEK_SET);
635
2.16k
  }
636
6.49k
  for (int i = 0; i < 5; i++) { // always 4, 0, 210, 0, 0 ?
637
5.41k
    val = static_cast<int>(input->readLong(2));
638
5.41k
    if (val)
639
3.15k
      f << "g" << i << "=" << val << ",";
640
5.41k
  }
641
1.08k
  int dim[2];
642
2.16k
  for (auto &d : dim) d = static_cast<int>(input->readLong(2));
643
1.08k
  f << "dim?=" << dim[1] << "x" << dim[0] << ",";
644
1.08k
  if (dim[1]>100 && dim[1]<2000 && dim[0]>100 && dim[0]< 2000) {
645
596
    getPageSpan().setFormLength(double(dim[0])/72.);
646
596
    getPageSpan().setFormWidth(double(dim[1])/72.);
647
596
  }
648
487
  else {
649
487
    MWAW_DEBUG_MSG(("EDocParser::readInfo: the page dimension seems bad\n"));
650
487
    f << "###,";
651
487
  }
652
1.08k
  auto N=static_cast<int>(input->readLong(2));
653
1.08k
  f << "numPict?=" << N << ","; // seems ok in eDcR, but no in eSRD
654
3.24k
  for (int i = 0; i < 2; i++) { // fl0=hasIndex ?, fl1=0
655
2.16k
    val = static_cast<int>(input->readLong(1));
656
2.16k
    if (val)
657
170
      f << "fl" << i << "=" << val << ",";
658
2.16k
  }
659
1.08k
  val = static_cast<int>(input->readLong(2)); // 0 or bf
660
1.08k
  if (val)
661
85
    f << "g5=" << val << ",";
662
4.33k
  for (int i = 0; i < 3; i++) { // 3 big number: some size?
663
3.24k
    val = static_cast<int>(input->readULong(4));
664
3.24k
    if (val)
665
2.73k
      f << "f" << i << "=" << std::hex << val << std::dec << ",";
666
3.24k
  }
667
1.08k
  ascFile.addDelimiter(input->tell(),'|');
668
1.08k
  ascFile.addPos(pos-4);
669
1.08k
  ascFile.addNote(f.str().c_str());
670
1.08k
  if (input->tell() != endPos) {
671
486
    ascFile.addPos(input->tell());
672
486
    ascFile.addNote("Info(II)");
673
486
  }
674
1.08k
  return true;
675
2.36k
}
676
677
// code to uncompress data ( very low level)
678
namespace EDocParserInternal
679
{
680
//! very low structure to help uncompress data
681
struct DeflateStruct {
682
  //! constructor
683
  DeflateStruct(long size, long initSize)
684
1.18k
    : m_toWrite(size)
685
1.18k
    , m_data()
686
1.18k
    , m_circQueue(0x2000,0)
687
1.18k
    , m_circQueuePos(0)
688
1.18k
    , m_numDelayed(0)
689
1.18k
    , m_delayedChar('\0')
690
1.18k
  {
691
1.18k
    m_data.reserve(size_t(initSize));
692
1.18k
  }
693
  //! true if we have build of the data
694
  bool isEnd() const
695
315k
  {
696
315k
    return m_toWrite <= 0;
697
315k
  }
698
  //! push a new character
699
  bool push(unsigned char c)
700
929k
  {
701
929k
    if (m_toWrite <= 0) return false;
702
925k
    m_circQueue[m_circQueuePos++]=c;
703
925k
    if (m_circQueuePos==0x2000)
704
0
      m_circQueuePos=0;
705
925k
    if (m_numDelayed)
706
17.7k
      return treatDelayed(c);
707
908k
    if (c==0x81 && m_toWrite!=1) {
708
11.3k
      m_numDelayed++;
709
11.3k
      return true;
710
11.3k
    }
711
896k
    m_delayedChar=c;
712
896k
    m_data.push_back(c);
713
896k
    m_toWrite--;
714
896k
    return true;
715
908k
  }
716
  //! send a duplicated part of the data
717
  bool sendDuplicated(int num, int depl);
718
  //! check if there is delayed char, if so treat them
719
  bool treatDelayed(unsigned char c);
720
  //! return the content of the block in dt
721
  bool getBinaryData(librevenge::RVNGBinaryData &dt) const
722
910
  {
723
910
    dt.clear();
724
910
    if (m_data.empty()) return false;
725
910
    unsigned char const *firstPos=&m_data[0];
726
910
    dt.append(firstPos, static_cast<unsigned long>(m_data.size()));
727
910
    return true;
728
910
  }
729
protected:
730
  //! the number of data that we need to write
731
  long m_toWrite;
732
  //! the resulting data
733
  std::vector<unsigned char> m_data;
734
735
  //! a circular queue
736
  std::vector<unsigned char> m_circQueue;
737
  //! the position in the circular queue
738
  size_t m_circQueuePos;
739
  //! the number of character delayed
740
  int m_numDelayed;
741
  //! the delayed character
742
  unsigned char m_delayedChar;
743
private:
744
  DeflateStruct(DeflateStruct const &orig) = delete;
745
  DeflateStruct &operator=(DeflateStruct const &orig) = delete;
746
};
747
748
bool DeflateStruct::sendDuplicated(int num, int depl)
749
118k
{
750
118k
  int readPos=int(m_circQueuePos)+depl;
751
129k
  while (readPos < 0) readPos+=0x2000;
752
118k
  while (readPos >= 0x2000) readPos-=0x2000;
753
754
853k
  while (num-->0) {
755
735k
    push(m_circQueue[size_t(readPos++)]);
756
735k
    if (readPos==0x2000)
757
574
      readPos=0;
758
735k
  }
759
118k
  return true;
760
118k
}
761
bool DeflateStruct::treatDelayed(unsigned char c)
762
17.7k
{
763
17.7k
  if (m_toWrite <= 0)
764
0
    return false;
765
17.7k
  if (m_numDelayed==1) {
766
11.3k
    if (c==0x82) {
767
6.40k
      m_numDelayed++;
768
6.40k
      return true;
769
6.40k
    }
770
4.99k
    m_delayedChar=0x81;
771
4.99k
    m_data.push_back(m_delayedChar);
772
4.99k
    if (--m_toWrite==0) return true;
773
4.98k
    if (c==0x81 && m_toWrite==1)
774
5
      return true;
775
4.98k
    m_numDelayed=0;
776
4.98k
    m_delayedChar=c;
777
4.98k
    m_data.push_back(c);
778
4.98k
    m_toWrite--;
779
4.98k
    return true;
780
4.98k
  }
781
782
6.40k
  m_numDelayed=0;
783
6.40k
  if (c==0) {
784
1.92k
    m_data.push_back(0x81);
785
1.92k
    if (--m_toWrite==0) return true;
786
1.92k
    m_delayedChar=0x82;
787
1.92k
    m_data.push_back(m_delayedChar);
788
1.92k
    m_toWrite--;
789
1.92k
    return true;
790
1.92k
  }
791
4.47k
  if (c-1 > m_toWrite) return false;
792
56.6k
  for (int i = 0; i < int(c-1); i++)
793
52.2k
    m_data.push_back(m_delayedChar);
794
4.37k
  m_toWrite -= (c-1);
795
4.37k
  return true;
796
4.47k
}
797
}
798
799
bool EDocParser::decodeZone(MWAWEntry const &entry, librevenge::RVNGBinaryData &data)
800
1.25k
{
801
1.25k
  data.clear();
802
1.25k
  long length = entry.length();
803
1.25k
  if (!entry.valid() || length<0x21+12) {
804
43
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: the entry seems very short\n"));
805
43
    return false;
806
43
  }
807
1.21k
  entry.setParsed(true);
808
1.21k
  long pos = entry.begin();
809
1.21k
  long endPos = entry.end();
810
1.21k
  MWAWInputStreamPtr input = rsrcInput();
811
1.21k
  libmwaw::DebugFile &ascFile = rsrcAscii();
812
1.21k
  input->seek(pos, librevenge::RVNG_SEEK_SET);
813
814
1.21k
  libmwaw::DebugStream f;
815
1.21k
  f << "Entries(CompressZone):";
816
1.21k
  if (long(input->readULong(4))!=length) {
817
28
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected zone size\n"));
818
28
    return false;
819
28
  }
820
1.18k
  auto zoneSize=long(input->readULong(4));
821
1.18k
  f << "sz[final]=" << std::hex << zoneSize << std::dec << ",";
822
823
1.18k
  if (!zoneSize) {
824
2
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected final zone size\n"));
825
2
    return false;
826
2
  }
827
1.18k
  f << "checkSum=" << std::hex << input->readULong(4) << std::dec << ",";
828
829
1.18k
  ascFile.addPos(pos-4);
830
1.18k
  ascFile.addNote(f.str().c_str());
831
832
  // make an initial size estimate to avoid big allocation in case zoneSize is damaged
833
1.18k
  const long maxInputSize = input->size() - input->tell();
834
1.18k
  const long initSize = (zoneSize / 4 > maxInputSize) ? 4 * maxInputSize : zoneSize;
835
1.18k
  EDocParserInternal::DeflateStruct deflate(zoneSize, initSize);
836
1.18k
  int const maxData[]= {0x80, 0x20, 0x40};
837
1.18k
  int val;
838
839
2.09k
  while (!deflate.isEnd() && input->tell() < endPos-3) {
840
    // only find a simple compress zone but seems ok to have more
841
1.18k
    std::vector<unsigned char> vectors32K[3];
842
1.18k
    std::vector<unsigned char> originalValues[3];
843
4.02k
    for (int st=0; st < 3; st++) {
844
3.11k
      pos=input->tell();
845
3.11k
      f.str("");
846
3.11k
      f << "CompressZone[data" << st << "]:";
847
3.11k
      auto num=static_cast<int>(input->readULong(1));
848
3.11k
      f << "num=" << num << ",";
849
3.11k
      if (num > maxData[st] || pos+1+num > endPos) {
850
49
        MWAW_DEBUG_MSG(("EDocParser::decodeZone: find unexpected num of data : %d for zone %d\n", num, st));
851
49
        f << "###";
852
853
49
        ascFile.addPos(pos);
854
49
        ascFile.addNote(f.str().c_str());
855
49
        return false;
856
49
      }
857
3.06k
      std::multimap<int, int> mapData;
858
3.06k
      originalValues[st].resize(size_t(maxData[st])*2, 0);
859
174k
      for (int i = 0; i < num; i++) {
860
171k
        val=static_cast<int>(input->readULong(1));
861
513k
        for (int b=0; b < 2; b++) {
862
342k
          int byte= b==0 ? (val>>4) : (val&0xF);
863
342k
          originalValues[st][size_t(2*i+b)]=static_cast<unsigned char>(byte);
864
342k
          if (byte==0)
865
206k
            continue;
866
135k
          mapData.insert(std::multimap<int, int>::value_type(byte,2*i+b));
867
135k
        }
868
171k
      }
869
3.06k
      vectors32K[st].resize(0x8000,0);
870
3.06k
      int writePos=0;
871
119k
      for (auto const &it : mapData) {
872
119k
        int n=0x8000>>(it.first);
873
119k
        if (writePos+n>0x8000) {
874
225
          MWAW_DEBUG_MSG(("EDocParser::decodeZone: find unexpected value writePos=%x for zone %d\n",static_cast<unsigned int>(writePos+n), st));
875
876
225
          f << "###";
877
878
225
          ascFile.addPos(pos);
879
225
          ascFile.addNote(f.str().c_str());
880
225
          return false;
881
225
        }
882
90.8M
        for (int j = 0; j < n; j++)
883
90.7M
          vectors32K[st][size_t(writePos++)]=static_cast<unsigned char>(it.second);
884
118k
      }
885
886
2.84k
      ascFile.addPos(pos);
887
2.84k
      ascFile.addNote(f.str().c_str());
888
2.84k
    }
889
910
    pos = input->tell();
890
910
    int byte=0;
891
910
    long maxBlockSz=0xFFF0;
892
910
    unsigned int value=(static_cast<unsigned int>(input->readULong(2)))<<16;
893
313k
    while (maxBlockSz) {
894
313k
      if (deflate.isEnd() || input->tell()>endPos) break;
895
312k
      int ind0=(value>>16);
896
312k
      if (ind0 & 0x8000) {
897
194k
        auto ind1 = static_cast<int>(vectors32K[0][size_t(ind0&0x7FFF)]);
898
194k
        int byt1=originalValues[0][size_t(ind1)]+1;
899
194k
        if (byte<byt1) {
900
82.7k
          value = (value<<byte);
901
82.7k
          byt1 -= byte;
902
82.7k
          value |= static_cast<unsigned int>(input->readULong(2));
903
82.7k
          byte=16;
904
82.7k
        }
905
194k
        value=(value<<byt1);
906
194k
        byte-=byt1;
907
908
194k
        deflate.push(static_cast<unsigned char>(ind1));
909
194k
        maxBlockSz-=2;
910
194k
        continue;
911
194k
      }
912
913
118k
      auto ind1 = static_cast<int>(vectors32K[1][size_t(ind0)]);
914
118k
      int byt1 = originalValues[1][size_t(ind1)]+1;
915
118k
      if (byte<byt1) {
916
22.5k
        value = (value<<byte);
917
22.5k
        byt1 -= byte;
918
22.5k
        value |= static_cast<unsigned int>(input->readULong(2));
919
22.5k
        byte=16;
920
22.5k
      }
921
118k
      value=(value<<byt1);
922
118k
      byte-=byt1;
923
118k
      auto ind2 = static_cast<int>(vectors32K[2][size_t(value>>17)]);
924
118k
      int byt2=originalValues[2][size_t(ind2)];
925
118k
      if (byte<byt2) {
926
14.3k
        value = (value<<byte);
927
14.3k
        byt2 -= byte;
928
14.3k
        value |= static_cast<unsigned int>(input->readULong(2));
929
14.3k
        byte=16;
930
14.3k
      }
931
118k
      value=(value<<byt2);
932
118k
      byte-=byt2;
933
934
118k
      ind2=int(value>>26) | (ind2<<6);
935
118k
      int byt3=6;
936
118k
      if (byte<byt3) {
937
45.9k
        value = (value<<byte);
938
45.9k
        byt3 -= byte;
939
45.9k
        value |= static_cast<unsigned int>(input->readULong(2));
940
45.9k
        byte=16;
941
45.9k
      }
942
118k
      value=(value<<byt3);
943
118k
      byte-=byt3;
944
118k
      deflate.sendDuplicated(ind1, -ind2);
945
118k
      maxBlockSz-=3;
946
118k
    }
947
910
  }
948
949
910
  if (input->tell()!=endPos) {
950
905
    MWAW_DEBUG_MSG(("EDocParser::decodeZone: unexpected end of data\n"));
951
905
    ascFile.addPos(input->tell());
952
905
    ascFile.addNote("CompressZone[after]");
953
905
  }
954
910
  bool res = deflate.getBinaryData(data);
955
910
  ascFile.skipZone(pos,input->tell()-1);
956
#if defined(DEBUG_WITH_FILES)
957
  if (res) {
958
    static int volatile cPictName = 0;
959
    libmwaw::DebugStream f2;
960
    f2 << "CPICT" << ++cPictName << ".pct";
961
    libmwaw::Debug::dumpFile(data, f2.str().c_str());
962
  }
963
#endif
964
910
  return res;
965
1.18k
}
966
967
////////////////////////////////////////////////////////////
968
// read the header
969
////////////////////////////////////////////////////////////
970
bool EDocParser::checkHeader(MWAWHeader *header, bool strict)
971
4.78k
{
972
4.78k
  *m_state = EDocParserInternal::State();
973
  /** no data fork, may be ok, but this means
974
      that the file contains no text, so... */
975
4.78k
  MWAWInputStreamPtr input = getInput();
976
4.78k
  if (!input || !getRSRCParser())
977
68
    return false;
978
4.71k
  if (input->hasDataFork()) {
979
4
    MWAW_DEBUG_MSG(("EDocParser::checkHeader: find a datafork, odd!!!\n"));
980
4
  }
981
4.71k
  if (strict) {
982
    // check that the fontname zone exists
983
1.61k
    auto const &entryMap = getRSRCParser()->getEntriesMap();
984
1.61k
    if (entryMap.find("eDcF") == entryMap.end())
985
291
      return false;
986
1.61k
  }
987
4.42k
  if (header)
988
1.60k
    header->reset(MWAWDocument::MWAW_T_EDOC, version());
989
990
4.42k
  return true;
991
4.71k
}
992
993
// vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: