Coverage Report

Created: 2026-06-30 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/kcodecs/src/probers/nsMBCSGroupProber.cpp
Line
Count
Source
1
/*  -*- C++ -*-
2
    SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4
    SPDX-License-Identifier: MIT
5
*/
6
7
#include "nsMBCSGroupProber.h"
8
9
#include "UnicodeGroupProber.h"
10
#include "nsBig5Prober.h"
11
#include "nsEUCJPProber.h"
12
#include "nsEUCKRProber.h"
13
#include "nsGB2312Prober.h"
14
#include "nsSJISProber.h"
15
16
#include <stdio.h>
17
#include <stdlib.h>
18
19
namespace kencodingprober
20
{
21
#ifdef DEBUG_PROBE
22
static const char *const ProberName[] = {
23
    "Unicode",
24
    "SJIS",
25
    "EUCJP",
26
    "GB18030",
27
    "EUCKR",
28
    "Big5",
29
};
30
31
#endif
32
33
namespace
34
{
35
using Prober = nsMBCSGroupProber::Prober;
36
constexpr std::array<bool, 6> fromSelectedList(std::span<const Prober> selected)
37
14.0k
{
38
14.0k
    std::array<bool, 6> isSelected{false};
39
59.2k
    for (auto p : selected) {
40
59.2k
        const auto i = static_cast<std::underlying_type_t<Prober>>(p);
41
59.2k
        if (i >= NUM_OF_PROBERS) {
42
0
            continue;
43
0
        }
44
59.2k
        isSelected[i] = true;
45
59.2k
    }
46
14.0k
    return isSelected;
47
14.0k
}
48
static_assert(fromSelectedList({})[0] == false);
49
static_assert(fromSelectedList({})[5] == false);
50
static_assert(fromSelectedList(std::array{Prober::Unicode})[0] == true);
51
static_assert(fromSelectedList(std::array{Prober::Unicode})[5] == false);
52
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[0] == false);
53
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[1] == true);
54
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[2] == false);
55
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[3] == false);
56
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[4] == false);
57
static_assert(fromSelectedList(std::array{Prober::SJIS, Prober::Big5})[5] == true);
58
59
} // namespace <anonymous>
60
61
nsMBCSGroupProber::nsMBCSGroupProber(std::span<const Prober> selected)
62
14.0k
    : mProbers{std::make_unique<UnicodeGroupProber>(),
63
14.0k
               std::make_unique<nsSJISProber>(),
64
14.0k
               std::make_unique<nsEUCJPProber>(),
65
14.0k
               std::make_unique<nsGB18030Prober>(),
66
14.0k
               std::make_unique<nsEUCKRProber>(),
67
14.0k
               std::make_unique<nsBig5Prober>(),
68
14.0k
    }
69
14.0k
    , mIsSelected(fromSelectedList(selected))
70
14.0k
{
71
98.5k
    for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) {
72
84.5k
        if (mProbers[i] && mIsSelected[i]) {
73
59.2k
            mIsActive[i] = true;
74
59.2k
            ++mActiveNum;
75
59.2k
        }
76
84.5k
    }
77
14.0k
}
78
79
nsMBCSGroupProber::nsMBCSGroupProber()
80
5.67k
    : nsMBCSGroupProber(std::array{
81
5.67k
          Prober::Unicode,
82
5.67k
          Prober::SJIS,
83
5.67k
          Prober::EUCJP,
84
5.67k
          Prober::GB18030,
85
5.67k
          Prober::EUCKR,
86
5.67k
          Prober::Big5,
87
5.67k
      })
88
5.67k
{
89
5.67k
}
90
91
const char *nsMBCSGroupProber::GetCharSetName()
92
89
{
93
89
    if (mBestGuess == -1) {
94
0
        GetConfidence();
95
0
        if (mBestGuess == -1) {
96
0
            mBestGuess = 0;
97
0
        }
98
0
    }
99
89
    return mProbers[mBestGuess]->GetCharSetName();
100
89
}
101
102
nsProbingState nsMBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
103
7.69k
{
104
    // do filtering to reduce load to probers
105
7.69k
    char *highbyteBuf;
106
7.69k
    char *hptr;
107
7.69k
    bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
108
7.69k
    hptr = highbyteBuf = (char *)malloc(aLen);
109
7.69k
    if (!hptr) {
110
0
        return mState;
111
0
    }
112
178M
    for (unsigned int i = 0; i < aLen; ++i) {
113
178M
        if (aBuf[i] & 0x80) {
114
118M
            *hptr++ = aBuf[i];
115
118M
            keepNext = true;
116
118M
        } else {
117
            // if previous is highbyte, keep this even it is a ASCII
118
59.8M
            if (keepNext) {
119
2.55M
                *hptr++ = aBuf[i];
120
2.55M
                keepNext = false;
121
2.55M
            }
122
59.8M
        }
123
178M
    }
124
125
51.6k
    for (unsigned int i = 0; i < NUM_OF_PROBERS; ++i) {
126
44.8k
        if (!mIsActive[i]) {
127
11.7k
            continue;
128
11.7k
        }
129
33.1k
        nsProbingState st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
130
33.1k
        if (st == eFoundIt) {
131
302
            mBestGuess = i;
132
302
            mState = eFoundIt;
133
302
            break;
134
32.8k
        } else if (st == eNotMe) {
135
11.1k
            mIsActive[i] = false;
136
11.1k
            mActiveNum--;
137
11.1k
            if (mActiveNum == 0) {
138
604
                mState = eNotMe;
139
604
                break;
140
604
            }
141
11.1k
        }
142
33.1k
    }
143
144
7.69k
    free(highbyteBuf);
145
146
7.69k
    return mState;
147
7.69k
}
148
149
float nsMBCSGroupProber::GetConfidence(void)
150
0
{
151
0
    float bestConf = 0.0;
152
153
0
    switch (mState) {
154
0
    case eFoundIt:
155
0
        return 0.99f;
156
0
    case eNotMe:
157
0
        return 0.01f;
158
0
    default:
159
0
        for (unsigned int i = 0; i < NUM_OF_PROBERS; ++i) {
160
0
            if (!mIsActive[i]) {
161
0
                continue;
162
0
            }
163
0
            float cf = mProbers[i]->GetConfidence();
164
0
            if (bestConf < cf) {
165
0
                bestConf = cf;
166
0
                mBestGuess = i;
167
0
            }
168
0
        }
169
0
    }
170
0
    return bestConf;
171
0
}
172
173
#ifdef DEBUG_PROBE
174
void nsMBCSGroupProber::DumpStatus()
175
{
176
    GetConfidence();
177
    for (size_t i = 0; i < NUM_OF_PROBERS; i++) {
178
        if (!mIsSelected[i]) {
179
            printf("  MBCS deselected: [%s][%s]\r\n", ProberName[i], mProbers[i]->GetCharSetName());
180
        } else if (!mIsActive[i]) {
181
            printf("  MBCS inactive: [%s][%s] (confidence is too low).\r\n", ProberName[i], mProbers[i]->GetCharSetName());
182
        } else {
183
            float cf = mProbers[i]->GetConfidence();
184
            printf("  MBCS %1.3f: [%s][%s]\r\n", cf, ProberName[i], mProbers[i]->GetCharSetName());
185
            mProbers[i]->DumpStatus();
186
        }
187
    }
188
}
189
#endif
190
}