Coverage Report

Created: 2025-09-13 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/kcodecs/src/probers/nsSBCSGroupProber.cpp
Line
Count
Source
1
/*  -*- C++ -*-
2
    SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4
    SPDX-License-Identifier: MIT
5
*/
6
7
#include "nsSBCSGroupProber.h"
8
9
#include "UnicodeGroupProber.h"
10
#include "nsHebrewProber.h"
11
#include "nsSBCharSetProber.h"
12
13
#include <stdio.h>
14
#include <stdlib.h>
15
16
namespace kencodingprober
17
{
18
nsSBCSGroupProber::nsSBCSGroupProber()
19
3.37k
{
20
3.37k
    mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
21
3.37k
    mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
22
3.37k
    mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
23
3.37k
    mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
24
3.37k
    mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
25
3.37k
    mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
26
3.37k
    mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
27
3.37k
    mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
28
3.37k
    mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
29
3.37k
    mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
30
31
3.37k
    nsHebrewProber *hebprober = new nsHebrewProber();
32
    // Notice: Any change in these indexes - 10,11,12 must be reflected
33
    // in the code below as well.
34
3.37k
    mProbers[10] = hebprober;
35
3.37k
    mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
36
3.37k
    mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
37
3.37k
    mProbers[13] = new UnicodeGroupProber();
38
39
    // Tell the Hebrew prober about the logical and visual probers
40
3.37k
    if (mProbers[10] && mProbers[11] && mProbers[12]) { // all are not null
41
3.37k
        hebprober->SetModelProbers(mProbers[11], mProbers[12]);
42
3.37k
    } else { // One or more is null. avoid any Hebrew probing, null them all
43
0
        for (unsigned int i = 10; i <= 12; ++i) {
44
0
            delete mProbers[i];
45
0
            mProbers[i] = nullptr;
46
0
        }
47
0
    }
48
49
    // disable latin2 before latin1 is available, otherwise all latin1
50
    // will be detected as latin2 because of their similarity.
51
    // mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
52
    // mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
53
54
3.37k
    Reset();
55
3.37k
}
56
57
nsSBCSGroupProber::~nsSBCSGroupProber()
58
3.37k
{
59
50.5k
    for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
60
47.2k
        delete mProbers[i];
61
47.2k
    }
62
3.37k
}
63
64
const char *nsSBCSGroupProber::GetCharSetName()
65
82
{
66
    // if we have no answer yet
67
82
    if (mBestGuess == -1) {
68
0
        GetConfidence();
69
        // no charset seems positive
70
0
        if (mBestGuess == -1)
71
        // we will use default.
72
0
        {
73
0
            mBestGuess = 0;
74
0
        }
75
0
    }
76
82
    return mProbers[mBestGuess]->GetCharSetName();
77
82
}
78
79
void nsSBCSGroupProber::Reset(void)
80
3.37k
{
81
3.37k
    mActiveNum = 0;
82
50.5k
    for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
83
47.2k
        if (mProbers[i]) { // not null
84
47.2k
            mProbers[i]->Reset();
85
47.2k
            mIsActive[i] = true;
86
47.2k
            ++mActiveNum;
87
47.2k
        } else {
88
0
            mIsActive[i] = false;
89
0
        }
90
47.2k
    }
91
3.37k
    mBestGuess = -1;
92
3.37k
    mState = eDetecting;
93
3.37k
}
94
95
nsProbingState nsSBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
96
3.34k
{
97
3.34k
    nsProbingState st;
98
3.34k
    unsigned int i;
99
3.34k
    char *newBuf1 = nullptr;
100
3.34k
    unsigned int newLen1 = 0;
101
102
    // apply filter to original buffer, and we got new buffer back
103
    // depend on what script it is, we will feed them the new buffer
104
    // we got after applying proper filter
105
    // this is done without any consideration to KeepEnglishLetters
106
    // of each prober since as of now, there are no probers here which
107
    // recognize languages with English characters.
108
3.34k
    if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
109
0
        goto done;
110
0
    }
111
112
3.34k
    if (newLen1 == 0) {
113
578
        goto done; // Nothing to see here, move on.
114
578
    }
115
116
40.9k
    for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) {
117
38.3k
        if (!mIsActive[i]) {
118
0
            continue;
119
0
        }
120
38.3k
        st = mProbers[i]->HandleData(newBuf1, newLen1);
121
38.3k
        if (st == eFoundIt) {
122
164
            mBestGuess = i;
123
164
            mState = eFoundIt;
124
164
            break;
125
38.1k
        } else if (st == eNotMe) {
126
1.85k
            mIsActive[i] = false;
127
1.85k
            mActiveNum--;
128
1.85k
            if (mActiveNum == 0) {
129
0
                mState = eNotMe;
130
0
                break;
131
0
            }
132
1.85k
        }
133
38.3k
    }
134
135
3.34k
done:
136
3.34k
    free(newBuf1);
137
138
3.34k
    return mState;
139
2.76k
}
140
141
float nsSBCSGroupProber::GetConfidence(void)
142
0
{
143
0
    unsigned int i;
144
0
    float bestConf = 0.0;
145
0
    float cf;
146
147
0
    switch (mState) {
148
0
    case eFoundIt:
149
0
        return (float)0.99; // sure yes
150
0
    case eNotMe:
151
0
        return (float)0.01; // sure no
152
0
    default:
153
0
        for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) {
154
0
            if (!mIsActive[i]) {
155
0
                continue;
156
0
            }
157
0
            cf = mProbers[i]->GetConfidence();
158
0
            if (bestConf < cf) {
159
0
                bestConf = cf;
160
0
                mBestGuess = i;
161
0
            }
162
0
        }
163
0
    }
164
0
    return bestConf;
165
0
}
166
167
#ifdef DEBUG_PROBE
168
void nsSBCSGroupProber::DumpStatus()
169
{
170
    unsigned int i;
171
    float cf;
172
173
    cf = GetConfidence();
174
    printf(" SBCS Group Prober --------begin status \r\n");
175
    for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
176
        if (!mIsActive[i]) {
177
            printf("  inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
178
        } else {
179
            mProbers[i]->DumpStatus();
180
        }
181
    }
182
    printf(" SBCS Group found best match [%s] confidence %f.\r\n", mProbers[mBestGuess]->GetCharSetName(), cf);
183
}
184
#endif
185
}