/src/kcodecs/src/probers/UnicodeGroupProber.cpp
Line | Count | Source |
1 | | /* -*- C++ -*- |
2 | | SPDX-FileCopyrightText: 2008 Wang Kai <wkai@gmail.com> |
3 | | |
4 | | SPDX-License-Identifier: MIT |
5 | | */ |
6 | | |
7 | | #include "UnicodeGroupProber.h" |
8 | | |
9 | | #include <QChar> |
10 | | #include <math.h> |
11 | | |
12 | | namespace kencodingprober |
13 | | { |
14 | | UnicodeGroupProber::UnicodeGroupProber(void) |
15 | 12.3k | { |
16 | 12.3k | mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel); |
17 | 12.3k | mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel); |
18 | 12.3k | mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel); |
19 | 12.3k | mActiveSM = NUM_OF_UNICODE_CHARSETS; |
20 | 12.3k | mState = eDetecting; |
21 | 12.3k | mDetectedCharset = "UTF-8"; |
22 | 12.3k | } |
23 | | |
24 | | UnicodeGroupProber::~UnicodeGroupProber(void) |
25 | 12.3k | { |
26 | 49.4k | for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) { |
27 | 37.0k | delete mCodingSM[i]; |
28 | 37.0k | } |
29 | 12.3k | } |
30 | | |
31 | | void UnicodeGroupProber::Reset(void) |
32 | 10.4k | { |
33 | 10.4k | mState = eDetecting; |
34 | 41.6k | for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) { |
35 | 31.2k | mCodingSM[i]->Reset(); |
36 | 31.2k | } |
37 | 10.4k | mActiveSM = NUM_OF_UNICODE_CHARSETS; |
38 | 10.4k | mDetectedCharset = "UTF-8"; |
39 | 10.4k | } |
40 | | |
41 | | nsProbingState UnicodeGroupProber::HandleData(const char *aBuf, unsigned int aLen) |
42 | 12.2k | { |
43 | 12.2k | nsSMState codingState; |
44 | | |
45 | 12.2k | if (mActiveSM == 0) { |
46 | 0 | mState = eNotMe; |
47 | 0 | return mState; |
48 | 0 | } |
49 | | |
50 | 47.5k | for (int j = mActiveSM - 1; j >= 0; --j) { |
51 | 412M | for (uint i = 0; i < aLen; ++i) { |
52 | | // byte is feed to all active state machine |
53 | 412M | codingState = mCodingSM[j]->NextState(aBuf[i]); |
54 | 412M | if (codingState == eError) { |
55 | | // got negative answer for this state machine, make it inactive |
56 | 12.5k | mActiveSM--; |
57 | 12.5k | if (mActiveSM == 0) { |
58 | 1.45k | mState = eNotMe; |
59 | 1.45k | return mState; |
60 | 11.0k | } else if (j != (int)mActiveSM) { |
61 | 7.84k | nsCodingStateMachine *t; |
62 | 7.84k | t = mCodingSM[mActiveSM]; |
63 | 7.84k | mCodingSM[mActiveSM] = mCodingSM[j]; |
64 | 7.84k | mCodingSM[j] = t; |
65 | 7.84k | } |
66 | 11.0k | break; |
67 | 412M | } else if (codingState == eItsMe) { |
68 | 14 | mState = eFoundIt; |
69 | 14 | mDetectedCharset = mCodingSM[j]->GetCodingStateMachine(); |
70 | 14 | return mState; |
71 | 14 | } |
72 | 412M | } |
73 | 36.7k | } |
74 | 10.7k | mDetectedCharset = mCodingSM[0]->GetCodingStateMachine(); |
75 | 10.7k | return mState; |
76 | 12.2k | } |
77 | | |
78 | | float UnicodeGroupProber::GetConfidence() |
79 | 0 | { |
80 | 0 | if (mState == eFoundIt) { |
81 | 0 | return 0.99f; |
82 | 0 | } else { |
83 | 0 | return 0.0f; |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | #ifdef DEBUG_PROBE |
88 | | void UnicodeGroupProber::DumpStatus() |
89 | | { |
90 | | GetConfidence(); |
91 | | for (uint i = 0; i < mActiveSM; i++) { |
92 | | qDebug() << "Unicode group" << mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine(); |
93 | | } |
94 | | } |
95 | | #endif |
96 | | |
97 | | } |