/src/kcodecs/src/probers/nsSBCSGroupProber.cpp
Line | Count | Source |
1 | | /* -*- C++ -*- |
2 | | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | | |
4 | | SPDX-License-Identifier: MIT |
5 | | */ |
6 | | |
7 | | #include "nsSBCSGroupProber.h" |
8 | | |
9 | | #include "UnicodeGroupProber.h" |
10 | | #include "nsHebrewProber.h" |
11 | | #include "nsSBCharSetProber.h" |
12 | | |
13 | | #include <stdio.h> |
14 | | #include <stdlib.h> |
15 | | |
16 | | namespace kencodingprober |
17 | | { |
18 | | nsSBCSGroupProber::nsSBCSGroupProber() |
19 | 3.37k | { |
20 | 3.37k | mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); |
21 | 3.37k | mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); |
22 | 3.37k | mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); |
23 | 3.37k | mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); |
24 | 3.37k | mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); |
25 | 3.37k | mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); |
26 | 3.37k | mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); |
27 | 3.37k | mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); |
28 | 3.37k | mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); |
29 | 3.37k | mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); |
30 | | |
31 | 3.37k | nsHebrewProber *hebprober = new nsHebrewProber(); |
32 | | // Notice: Any change in these indexes - 10,11,12 must be reflected |
33 | | // in the code below as well. |
34 | 3.37k | mProbers[10] = hebprober; |
35 | 3.37k | mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew |
36 | 3.37k | mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew |
37 | 3.37k | mProbers[13] = new UnicodeGroupProber(); |
38 | | |
39 | | // Tell the Hebrew prober about the logical and visual probers |
40 | 3.37k | if (mProbers[10] && mProbers[11] && mProbers[12]) { // all are not null |
41 | 3.37k | hebprober->SetModelProbers(mProbers[11], mProbers[12]); |
42 | 3.37k | } else { // One or more is null. avoid any Hebrew probing, null them all |
43 | 0 | for (unsigned int i = 10; i <= 12; ++i) { |
44 | 0 | delete mProbers[i]; |
45 | 0 | mProbers[i] = nullptr; |
46 | 0 | } |
47 | 0 | } |
48 | | |
49 | | // disable latin2 before latin1 is available, otherwise all latin1 |
50 | | // will be detected as latin2 because of their similarity. |
51 | | // mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); |
52 | | // mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); |
53 | | |
54 | 3.37k | Reset(); |
55 | 3.37k | } |
56 | | |
57 | | nsSBCSGroupProber::~nsSBCSGroupProber() |
58 | 3.37k | { |
59 | 50.5k | for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) { |
60 | 47.2k | delete mProbers[i]; |
61 | 47.2k | } |
62 | 3.37k | } |
63 | | |
64 | | const char *nsSBCSGroupProber::GetCharSetName() |
65 | 82 | { |
66 | | // if we have no answer yet |
67 | 82 | if (mBestGuess == -1) { |
68 | 0 | GetConfidence(); |
69 | | // no charset seems positive |
70 | 0 | if (mBestGuess == -1) |
71 | | // we will use default. |
72 | 0 | { |
73 | 0 | mBestGuess = 0; |
74 | 0 | } |
75 | 0 | } |
76 | 82 | return mProbers[mBestGuess]->GetCharSetName(); |
77 | 82 | } |
78 | | |
79 | | void nsSBCSGroupProber::Reset(void) |
80 | 3.37k | { |
81 | 3.37k | mActiveNum = 0; |
82 | 50.5k | for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) { |
83 | 47.2k | if (mProbers[i]) { // not null |
84 | 47.2k | mProbers[i]->Reset(); |
85 | 47.2k | mIsActive[i] = true; |
86 | 47.2k | ++mActiveNum; |
87 | 47.2k | } else { |
88 | 0 | mIsActive[i] = false; |
89 | 0 | } |
90 | 47.2k | } |
91 | 3.37k | mBestGuess = -1; |
92 | 3.37k | mState = eDetecting; |
93 | 3.37k | } |
94 | | |
95 | | nsProbingState nsSBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen) |
96 | 3.34k | { |
97 | 3.34k | nsProbingState st; |
98 | 3.34k | unsigned int i; |
99 | 3.34k | char *newBuf1 = nullptr; |
100 | 3.34k | unsigned int newLen1 = 0; |
101 | | |
102 | | // apply filter to original buffer, and we got new buffer back |
103 | | // depend on what script it is, we will feed them the new buffer |
104 | | // we got after applying proper filter |
105 | | // this is done without any consideration to KeepEnglishLetters |
106 | | // of each prober since as of now, there are no probers here which |
107 | | // recognize languages with English characters. |
108 | 3.34k | if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) { |
109 | 0 | goto done; |
110 | 0 | } |
111 | | |
112 | 3.34k | if (newLen1 == 0) { |
113 | 578 | goto done; // Nothing to see here, move on. |
114 | 578 | } |
115 | | |
116 | 40.9k | for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) { |
117 | 38.3k | if (!mIsActive[i]) { |
118 | 0 | continue; |
119 | 0 | } |
120 | 38.3k | st = mProbers[i]->HandleData(newBuf1, newLen1); |
121 | 38.3k | if (st == eFoundIt) { |
122 | 164 | mBestGuess = i; |
123 | 164 | mState = eFoundIt; |
124 | 164 | break; |
125 | 38.1k | } else if (st == eNotMe) { |
126 | 1.85k | mIsActive[i] = false; |
127 | 1.85k | mActiveNum--; |
128 | 1.85k | if (mActiveNum == 0) { |
129 | 0 | mState = eNotMe; |
130 | 0 | break; |
131 | 0 | } |
132 | 1.85k | } |
133 | 38.3k | } |
134 | | |
135 | 3.34k | done: |
136 | 3.34k | free(newBuf1); |
137 | | |
138 | 3.34k | return mState; |
139 | 2.76k | } |
140 | | |
141 | | float nsSBCSGroupProber::GetConfidence(void) |
142 | 0 | { |
143 | 0 | unsigned int i; |
144 | 0 | float bestConf = 0.0; |
145 | 0 | float cf; |
146 | |
|
147 | 0 | switch (mState) { |
148 | 0 | case eFoundIt: |
149 | 0 | return (float)0.99; // sure yes |
150 | 0 | case eNotMe: |
151 | 0 | return (float)0.01; // sure no |
152 | 0 | default: |
153 | 0 | for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) { |
154 | 0 | if (!mIsActive[i]) { |
155 | 0 | continue; |
156 | 0 | } |
157 | 0 | cf = mProbers[i]->GetConfidence(); |
158 | 0 | if (bestConf < cf) { |
159 | 0 | bestConf = cf; |
160 | 0 | mBestGuess = i; |
161 | 0 | } |
162 | 0 | } |
163 | 0 | } |
164 | 0 | return bestConf; |
165 | 0 | } |
166 | | |
167 | | #ifdef DEBUG_PROBE |
168 | | void nsSBCSGroupProber::DumpStatus() |
169 | | { |
170 | | unsigned int i; |
171 | | float cf; |
172 | | |
173 | | cf = GetConfidence(); |
174 | | printf(" SBCS Group Prober --------begin status \r\n"); |
175 | | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) { |
176 | | if (!mIsActive[i]) { |
177 | | printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); |
178 | | } else { |
179 | | mProbers[i]->DumpStatus(); |
180 | | } |
181 | | } |
182 | | printf(" SBCS Group found best match [%s] confidence %f.\r\n", mProbers[mBestGuess]->GetCharSetName(), cf); |
183 | | } |
184 | | #endif |
185 | | } |