/src/mozilla-central/extensions/universalchardet/src/base/nsEUCJPProber.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | // for japanese encoding, obeserve characteristic: |
7 | | // 1, kana character (or hankaku?) often have hight frequency of appereance |
8 | | // 2, kana character often exist in group |
9 | | // 3, certain combination of kana is never used in japanese language |
10 | | |
11 | | #include "nsEUCJPProber.h" |
12 | | #include "nsDebug.h" |
13 | | |
14 | | void nsEUCJPProber::Reset(void) |
15 | 0 | { |
16 | 0 | mCodingSM->Reset(); |
17 | 0 | mState = eDetecting; |
18 | 0 | mContextAnalyser.Reset(); |
19 | 0 | mDistributionAnalyser.Reset(); |
20 | 0 | } |
21 | | |
22 | | nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) |
23 | 0 | { |
24 | 0 | NS_ASSERTION(aLen, "HandleData called with empty buffer"); |
25 | 0 | uint32_t codingState; |
26 | 0 |
|
27 | 0 | for (uint32_t i = 0; i < aLen; i++) |
28 | 0 | { |
29 | 0 | codingState = mCodingSM->NextState(aBuf[i]); |
30 | 0 | if (codingState == eItsMe) |
31 | 0 | { |
32 | 0 | mState = eFoundIt; |
33 | 0 | break; |
34 | 0 | } |
35 | 0 | if (codingState == eStart) |
36 | 0 | { |
37 | 0 | uint32_t charLen = mCodingSM->GetCurrentCharLen(); |
38 | 0 |
|
39 | 0 | if (i == 0) |
40 | 0 | { |
41 | 0 | mLastChar[1] = aBuf[0]; |
42 | 0 | mContextAnalyser.HandleOneChar(mLastChar, charLen); |
43 | 0 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); |
44 | 0 | } |
45 | 0 | else |
46 | 0 | { |
47 | 0 | mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); |
48 | 0 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); |
49 | 0 | } |
50 | 0 | } |
51 | 0 | } |
52 | 0 |
|
53 | 0 | mLastChar[0] = aBuf[aLen-1]; |
54 | 0 |
|
55 | 0 | if (mState == eDetecting) |
56 | 0 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) |
57 | 0 | mState = eFoundIt; |
58 | 0 |
|
59 | 0 | return mState; |
60 | 0 | } |
61 | | |
62 | | float nsEUCJPProber::GetConfidence(void) |
63 | 0 | { |
64 | 0 | float contxtCf = mContextAnalyser.GetConfidence(); |
65 | 0 | float distribCf = mDistributionAnalyser.GetConfidence(); |
66 | 0 |
|
67 | 0 | return (contxtCf > distribCf ? contxtCf : distribCf); |
68 | 0 | } |
69 | | |