Coverage Report

Created: 2025-07-23 06:54

/src/kcodecs/src/probers/nsLatin1Prober.cpp
Line
Count
Source (jump to first uncovered line)
1
/*  -*- C++ -*-
2
    SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4
    SPDX-License-Identifier: MIT
5
*/
6
7
#include "nsLatin1Prober.h"
8
#include <stdio.h>
9
#include <stdlib.h>
10
11
#define UDF 0 // undefined
12
3.13k
#define OTH 1 // other
13
#define ASC 2 // ascii capital letter
14
#define ASS 3 // ascii small letter
15
#define ACV 4 // accent capital vowel
16
#define ACO 5 // accent capital other
17
#define ASV 6 // accent small vowel
18
#define ASO 7 // accent small other
19
41.3M
#define CLASS_NUM 8 // total classes
20
21
namespace kencodingprober
22
{
23
static const unsigned char Latin1_CharToClass[] = {
24
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
25
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
26
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
27
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
28
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
29
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
30
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
31
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
32
    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
33
    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
34
    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
35
    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
36
    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
37
    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
38
    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
39
    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
40
    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
41
    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
42
    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
43
    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
44
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
45
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
46
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
47
    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
48
    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
49
    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
50
    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
51
    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
52
    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
53
    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
54
    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
55
    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
56
};
57
58
/* 0 : illegal
59
   1 : very unlikely
60
   2 : normal
61
   3 : very likely
62
*/
63
static const unsigned char Latin1ClassModel[] = {
64
    /*      UDF OTH ASC ASS ACV ACO ASV ASO  */
65
    /*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
66
    /*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
67
    /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
68
    /*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
69
    /*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
70
    /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
71
    /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
72
    /*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
73
};
74
75
void nsLatin1Prober::Reset(void)
76
3.13k
{
77
3.13k
    mState = eDetecting;
78
3.13k
    mLastCharClass = OTH;
79
15.6k
    for (int i = 0; i < FREQ_CAT_NUM; i++) {
80
12.5k
        mFreqCounter[i] = 0;
81
12.5k
    }
82
3.13k
}
83
84
nsProbingState nsLatin1Prober::HandleData(const char *aBuf, unsigned int aLen)
85
3.13k
{
86
3.13k
    char *newBuf1 = nullptr;
87
3.13k
    unsigned int newLen1 = 0;
88
89
3.13k
    if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
90
0
        newBuf1 = (char *)aBuf;
91
0
        newLen1 = aLen;
92
0
    }
93
94
41.3M
    for (unsigned int i = 0; i < newLen1; i++) {
95
41.3M
        const unsigned char charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
96
41.3M
        const unsigned char freq = Latin1ClassModel[mLastCharClass * CLASS_NUM + charClass];
97
41.3M
        if (freq == 0) {
98
330
            mState = eNotMe;
99
330
            break;
100
330
        }
101
41.3M
        mFreqCounter[freq]++;
102
41.3M
        mLastCharClass = charClass;
103
41.3M
    }
104
105
3.13k
    if (newBuf1 != aBuf) {
106
3.13k
        free(newBuf1);
107
3.13k
    }
108
109
3.13k
    return mState;
110
3.13k
}
111
112
float nsLatin1Prober::GetConfidence(void)
113
0
{
114
0
    if (mState == eNotMe) {
115
0
        return 0.01f;
116
0
    }
117
118
0
    float confidence;
119
0
    unsigned int total = 0;
120
0
    for (int i = 0; i < FREQ_CAT_NUM; i++) {
121
0
        total += mFreqCounter[i];
122
0
    }
123
124
0
    if (!total) {
125
0
        confidence = 0.0f;
126
0
    } else {
127
0
        confidence = mFreqCounter[3] * 1.0f / total;
128
0
        confidence -= mFreqCounter[1] * 20.0f / total;
129
0
    }
130
131
0
    if (confidence < 0.0f) {
132
0
        confidence = 0.0f;
133
0
    }
134
135
    // lower the confidence of latin1 so that other more accurate detector
136
    // can take priority.
137
0
    confidence *= 0.50f;
138
139
0
    return confidence;
140
0
}
141
142
#ifdef DEBUG_PROBE
143
void nsLatin1Prober::DumpStatus()
144
{
145
    printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
146
}
147
#endif
148
}