Coverage Report

Created: 2025-07-07 10:01

/work/workdir/UnpackedTarball/mythes/mythes.cxx
Line
Count
Source (jump to first uncovered line)
1
#include "COPYING"
2
#include <stdio.h>
3
#include <string.h>
4
#include <stdlib.h>
5
#include <errno.h>
6
#include <limits>
7
#include <vector>
8
9
#include "mythes.hxx"
10
11
#ifdef _WIN32
12
#include <windows.h>
13
#include <wchar.h>
14
#endif
15
16
MyThes::MyThes(const char* idxpath, const char * datpath)
17
0
{
18
0
    nw = 0;
19
0
    encoding = NULL;
20
0
    list = NULL;
21
0
    offst = NULL;
22
0
    pdfile = NULL;
23
24
0
    if (thInitialize(idxpath, datpath) != 1) {
25
0
        fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
26
0
        fflush(stderr);
27
0
        thCleanup();
28
        // did not initialize properly - throw exception?
29
0
    }
30
0
}
31
32
33
MyThes::~MyThes()
34
0
{
35
0
    thCleanup();
36
0
}
37
38
39
int MyThes::thInitialize(const char* idxpath, const char* datpath)
40
0
{
41
42
    // open the index file
43
0
    FILE * pifile = myfopen(idxpath,"r");
44
0
    if (!pifile) {
45
0
        return 0;
46
0
    } 
47
48
    // parse in encoding and index size
49
0
    std::vector<char> buffer(MAX_WD_LEN);
50
0
    char * wrd = &buffer[0];
51
0
    readLine(pifile,wrd,MAX_WD_LEN);
52
0
    encoding = mystrdup(wrd);
53
0
    readLine(pifile,wrd,MAX_WD_LEN);
54
0
    int idxsz = atoi(wrd); 
55
   
56
0
    if (idxsz <= 0 || static_cast<unsigned int>(idxsz) > std::numeric_limits<int>::max() / sizeof(char*)) {
57
0
       fprintf(stderr,"Error - bad index %d\n", idxsz);
58
0
       fclose(pifile);
59
0
       return 0;
60
0
    }
61
62
    // now allocate list, offst for the given size
63
0
    list = (char**)   calloc(idxsz,sizeof(char*));
64
0
    offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
65
66
0
    if ( (!(list)) || (!(offst)) ) {
67
0
       fprintf(stderr,"Error - bad memory allocation\n");
68
0
       fclose(pifile);
69
0
       return 0;
70
0
    }
71
72
    // now parse the remaining lines of the index
73
0
    int len = readLine(pifile,wrd,MAX_WD_LEN);
74
0
    while (len > 0)
75
0
    { 
76
0
        int np = mystr_indexOfChar(wrd,'|');
77
0
        if (nw < idxsz) {
78
0
            if (np >= 0) {          
79
0
                *(wrd+np) = '\0';
80
0
                list[nw] = (char *)calloc(1,(np+1));
81
0
                if (!list[nw]) {
82
0
                    fprintf(stderr,"Error - bad memory allocation\n");
83
0
                    fflush(stderr);
84
0
                    fclose(pifile);
85
0
                    return 0;
86
0
                }
87
0
                memcpy((list[nw]),wrd,np);
88
0
                offst[nw] = atoi(wrd+np+1);
89
0
                nw++;
90
0
            }
91
0
        }
92
0
        len = readLine(pifile,wrd,MAX_WD_LEN);
93
0
    }
94
95
0
    fclose(pifile);
96
97
    /* next open the data file */
98
0
    pdfile = myfopen(datpath,"r");
99
0
    if (!pdfile) {
100
0
        return 0;
101
0
    } 
102
        
103
0
    return 1;        
104
0
}
105
106
107
void MyThes::thCleanup()
108
0
{
109
    /* first close the data file */
110
0
    if (pdfile) {
111
0
        fclose(pdfile);
112
0
        pdfile=NULL;
113
0
    }
114
115
0
    if (list)
116
0
    {
117
        /* now free up all the allocated strings on the list */
118
0
        for (int i=0; i < nw; i++) 
119
0
        {
120
0
            if (list[i]) {
121
0
                free(list[i]);
122
0
                list[i] = 0;
123
0
            }
124
0
        }
125
0
        free((void*)list);
126
0
    }
127
128
0
    if (encoding) free((void*)encoding);
129
0
    if (offst) free((void*)offst);
130
131
0
    encoding = NULL;
132
0
    list = NULL;
133
0
    offst = NULL;
134
0
    nw = 0;
135
0
}
136
137
138
139
// lookup text in index and count of meanings and a list of meaning entries
140
// with each entry having a synonym count and pointer to an 
141
// array of char * (i.e the synonyms)
142
// 
143
// note: calling routine should call CleanUpAfterLookup with the original
144
// meaning point and count to properly deallocate memory
145
146
int MyThes::Lookup(const char * pText, int len, mentry** pme)
147
0
{ 
148
149
0
    *pme = NULL;
150
151
    // handle the case of missing file or file related errors
152
0
    if (! pdfile) return 0;
153
154
0
    long offset = 0;
155
156
    /* copy search word and make sure null terminated */
157
0
    std::vector<char> buffer(len+1);
158
0
    char * wrd = &buffer[0];
159
0
    memcpy(wrd,pText,len);
160
  
161
    /* find it in the list */
162
0
    int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
163
0
    if (idx < 0) return 0;
164
165
    // now seek to the offset
166
0
    offset = (long) offst[idx];
167
0
    int rc = fseek(pdfile,offset,SEEK_SET);
168
0
    if (rc) {
169
0
       return 0;
170
0
    }
171
172
    // grab the count of the number of meanings
173
    // and allocate a list of meaning entries
174
0
    char * buf = NULL;
175
0
    buf  = (char *) malloc( MAX_LN_LEN );
176
0
    if (!buf) return 0;
177
0
    readLine(pdfile, buf, (MAX_LN_LEN-1));
178
0
    int np = mystr_indexOfChar(buf,'|');
179
0
    if (np < 0) {
180
0
         free(buf);
181
0
         return 0;
182
0
    }          
183
0
    int nmeanings = atoi(buf+np+1);
184
0
    if (nmeanings < 0 || static_cast<unsigned int>(nmeanings) > std::numeric_limits<int>::max() / sizeof(mentry))
185
0
        nmeanings = 0;
186
0
    *pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL);
187
0
    if (!(*pme)) {
188
0
        free(buf);
189
0
        return 0;
190
0
    }
191
192
    // now read in each meaning and parse it to get defn, count and synonym lists
193
0
    mentry* pm = *(pme);
194
0
    char dfn[MAX_WD_LEN];
195
196
0
    for (int j = 0; j < nmeanings; j++) {
197
0
        readLine(pdfile, buf, (MAX_LN_LEN-1));
198
199
0
        pm->count = 0;
200
0
        pm->psyns = NULL;
201
0
        pm->defn = NULL;
202
203
        // store away the part of speech for later use
204
0
        char * p = buf;
205
0
        char * pos = NULL;
206
0
        np = mystr_indexOfChar(p,'|');
207
0
        if (np >= 0) {
208
0
           *(buf+np) = '\0';
209
0
     pos = mystrdup(p);
210
0
           p = p + np + 1;
211
0
  } else {
212
0
          pos = mystrdup("");
213
0
        }
214
        
215
        // count the number of fields in the remaining line
216
0
        int nf = 1;
217
0
        char * d = p;
218
0
        np = mystr_indexOfChar(d,'|');        
219
0
        while ( np >= 0 ) {
220
0
    nf++;
221
0
          d = d + np + 1;
222
0
          np = mystr_indexOfChar(d,'|');          
223
0
  }
224
0
  pm->count = nf;
225
0
        pm->psyns = (char **) malloc(nf*sizeof(char*)); 
226
        
227
        // fill in the synonym list
228
0
        d = p;
229
0
        for (int jj = 0; jj < nf; jj++) 
230
0
        {
231
0
            np = mystr_indexOfChar(d,'|');
232
0
            if (np > 0) 
233
0
            {
234
0
                *(d+np) = '\0';
235
0
                pm->psyns[jj] = mystrdup(d);
236
0
                d = d + np + 1;
237
0
            } 
238
0
            else 
239
0
            {
240
0
              pm->psyns[jj] = mystrdup(d);
241
0
            }            
242
0
        }
243
244
        // add pos to first synonym to create the definition
245
0
        if (pm->psyns[0])
246
0
  {
247
0
            int k = strlen(pos);
248
0
            int m = strlen(pm->psyns[0]);
249
0
            if ((k+m) < (MAX_WD_LEN - 1)) {
250
0
                 memcpy(dfn,pos,k);
251
0
                 *(dfn+k) = ' ';
252
0
                 memcpy((dfn+k+1),(pm->psyns[0]),m+1);
253
0
                 pm->defn = mystrdup(dfn);
254
0
            } else {
255
0
                pm->defn = mystrdup(pm->psyns[0]);
256
0
            }
257
0
        }
258
0
        free(pos);
259
0
        pm++;
260
261
0
    }
262
0
    free(buf);
263
   
264
0
    return nmeanings;
265
0
} 
266
267
268
269
void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
270
0
{ 
271
272
0
    if (nmeanings == 0) return;
273
0
    if ((*pme) == NULL) return;
274
275
0
    mentry * pm = *pme;
276
       
277
0
    for (int i = 0; i < nmeanings; i++) {
278
0
       int count = pm->count;
279
0
       for (int j = 0; j < count; j++) {
280
0
    if (pm->psyns[j]) free(pm->psyns[j]);
281
0
          pm->psyns[j] = NULL;
282
0
       }
283
0
       if (pm->psyns) free(pm->psyns);
284
0
       pm->psyns = NULL;
285
0
       if (pm->defn) free(pm->defn);
286
0
       pm->defn = NULL;
287
0
       pm->count = 0;
288
0
       pm++;
289
0
    }
290
0
    pm = *pme;
291
0
    free(pm);
292
0
    *pme = NULL;
293
0
    return;
294
0
}
295
296
297
// read a line of text from a text file stripping
298
// off the line terminator and replacing it with
299
// a null string terminator.
300
// returns:  -1 on error or the number of characters in
301
//             in the returning string
302
303
// A maximum of nc characters will be returned
304
305
int MyThes::readLine(FILE * pf, char * buf, int nc)
306
0
{
307
    
308
0
  if (fgets(buf,nc,pf)) {
309
0
    mychomp(buf);
310
0
    return strlen(buf);
311
0
  }
312
0
  return -1;
313
0
}
314
315
316
 
317
//  performs a binary search on null terminated character
318
//  strings
319
//
320
//  returns: -1 on not found
321
//           index of wrd in the list[]
322
323
int MyThes::binsearch(char * sw, char* _list[], int nlst) 
324
0
{
325
0
    int lp, up, mp, j, indx;
326
0
    lp = 0;
327
0
    up = nlst-1;
328
0
    indx = -1;
329
0
    if (strcmp(sw,_list[lp]) < 0) return -1;
330
0
    if (strcmp(sw,_list[up]) > 0) return -1;
331
0
    while (indx < 0 ) {
332
0
        mp = (int)((lp+up) >> 1);
333
0
        j = strcmp(sw,_list[mp]);
334
0
        if ( j > 0) {
335
0
            lp = mp + 1;
336
0
        } else if (j < 0 ) {
337
0
            up = mp - 1;
338
0
        } else {
339
0
            indx = mp;
340
0
        }
341
0
        if (lp > up) return -1;      
342
0
    }
343
0
    return indx;
344
0
}
345
346
char * MyThes::get_th_encoding()
347
0
{
348
0
  return encoding;
349
0
}
350
351
352
// string duplication routine
353
char * MyThes::mystrdup(const char * s)
354
0
{
355
0
  char * d = NULL;
356
0
  if (s) {
357
0
    int sl = strlen(s)+1;
358
0
    d = (char *) malloc(sl);
359
0
    if (d) memcpy(d,s,sl);
360
0
  }
361
0
  return d;
362
0
}
363
364
// remove cross-platform text line end characters
365
void MyThes::mychomp(char * s)
366
0
{
367
0
  int k = strlen(s);
368
0
  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
369
0
  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
370
0
}
371
372
373
// return index of char in string
374
int MyThes::mystr_indexOfChar(const char * d, int c)
375
0
{
376
0
  char * p = strchr((char *)d,c);
377
0
  if (p) return (int)(p-d);
378
0
  return -1;
379
0
}
380
381
0
FILE * MyThes::myfopen(const char * path, const char * mode) {
382
#ifdef _WIN32
383
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
384
    if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
385
        int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
386
        wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
387
        wchar_t *buff2 = (wchar_t *) malloc(len * sizeof(wchar_t));
388
        FILE * f = NULL;
389
        MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
390
        if (_wfullpath( buff2, buff, len ) != NULL) {
391
          f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
392
        }
393
        free(buff);
394
        free(buff2);
395
        return f;
396
    }
397
#endif
398
0
    return fopen(path, mode);
399
0
}