Coverage Report

Created: 2025-12-31 10:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/lingucomponent/source/languageguessing/simpleguesser.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
 /**
21
  *
22
  *
23
  *
24
  *
25
  * TODO
26
  * - Add exception throwing when h == NULL
27
  * - Not init h when implicit constructor is launched
28
  */
29
30
#include <string.h>
31
32
#ifdef SYSTEM_LIBEXTTEXTCAT
33
#include <libexttextcat/textcat.h>
34
#include <libexttextcat/common.h>
35
#include <libexttextcat/constants.h>
36
#include <libexttextcat/fingerprint.h>
37
#else
38
#include <textcat.h>
39
#include <common.h>
40
#include <constants.h>
41
#include <fingerprint.h>
42
#endif
43
44
#include <sal/types.h>
45
46
#include<rtl/character.hxx>
47
#include "simpleguesser.hxx"
48
49
0
static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){
50
0
            size_t i;
51
0
            int ret = 0;
52
53
0
            size_t min = s1.length();
54
0
            if (min > s2.length())
55
0
                min = s2.length();
56
57
0
            for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
58
0
                    ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i]))
59
0
                        -  rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i]));
60
0
                    if(s1[i] == '.' || s2[i] == '.') {ret = 0;}    //. is a neutral character
61
0
            }
62
0
            return ret;
63
0
 }
64
65
namespace {
66
67
/**
68
 * This following structure is from textcat.c
69
 */
70
typedef struct textcat_t{
71
72
    void **fprint;
73
    char *fprint_disable;
74
    uint4 size;
75
    uint4 maxsize;
76
77
    char output[MAXOUTPUTSIZE];
78
79
} textcat_t;
80
// end of the 3 structs
81
82
}
83
84
SimpleGuesser::SimpleGuesser()
85
0
{
86
0
    h = nullptr;
87
0
}
88
89
0
SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
90
    // Check for self-assignment!
91
0
    if (this == &sg)      // Same object?
92
0
      return *this;       // Yes, so skip assignment, and just return *this.
93
94
0
    if(h){textcat_Done(h);}
95
0
    h = sg.h;
96
0
    return *this;
97
0
}
98
99
SimpleGuesser::~SimpleGuesser()
100
0
{
101
0
    if(h){textcat_Done(h);}
102
0
}
103
104
/*!
105
    \fn SimpleGuesser::GuessLanguage(char* text)
106
 */
107
std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
108
0
{
109
0
    std::vector<Guess> guesses;
110
111
0
    if (!h)
112
0
        return guesses;
113
114
0
    int len = strlen(text);
115
116
0
    if (len > MAX_STRING_LENGTH_TO_ANALYSE)
117
0
        len = MAX_STRING_LENGTH_TO_ANALYSE;
118
119
0
    const char *guess_list = textcat_Classify(h, text, len);
120
121
0
    if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
122
0
        return guesses;
123
124
0
    int current_pointer = 0;
125
126
0
    while(guess_list[current_pointer] != '\0')
127
0
    {
128
0
        while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
129
0
            current_pointer++;
130
0
        if(guess_list[current_pointer] != '\0')
131
0
        {
132
0
            guesses.emplace_back(guess_list + current_pointer);
133
0
            current_pointer++;
134
0
        }
135
0
    }
136
137
0
    return guesses;
138
0
}
139
140
Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
141
0
{
142
0
    std::vector<Guess> ret = GuessLanguage(text);
143
0
    return ret.empty() ? Guess() : ret[0];
144
0
}
145
/**
146
 * Is used to know which language is available, unavailable or both
147
 * when mask = 0xF0, return only Available
148
 * when mask = 0x0F, return only Unavailable
149
 * when mask = 0xFF, return both Available and Unavailable
150
 */
151
std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
152
0
{
153
0
    textcat_t *tables = static_cast<textcat_t*>(h);
154
155
0
    std::vector<Guess> lang;
156
0
    if(!h){return lang;}
157
158
0
    for (size_t i=0; i<tables->size; ++i)
159
0
    {
160
0
        if (tables->fprint_disable[i] & mask)
161
0
        {
162
0
            std::string langStr = "[";
163
0
            langStr += fp_Name(tables->fprint[i]);
164
0
            lang.emplace_back(langStr.c_str());
165
0
        }
166
0
    }
167
168
0
    return lang;
169
0
}
170
171
std::vector<Guess> SimpleGuesser::GetAvailableLanguages()
172
0
{
173
0
    return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
174
0
}
175
176
std::vector<Guess> SimpleGuesser::GetUnavailableLanguages()
177
0
{
178
0
    return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
179
0
}
180
181
std::vector<Guess> SimpleGuesser::GetAllManagedLanguages()
182
0
{
183
0
    return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
184
0
}
185
186
void SimpleGuesser::XableLanguage(const std::string& lang, char mask)
187
0
{
188
0
    textcat_t *tables = static_cast<textcat_t*>(h);
189
190
0
    if(!h){return;}
191
192
0
    for (size_t i=0; i<tables->size; i++)
193
0
    {
194
0
        std::string language(fp_Name(tables->fprint[i]));
195
0
        if (startsAsciiCaseInsensitive(language,lang) == 0)
196
0
            tables->fprint_disable[i] = mask;
197
0
    }
198
0
}
199
200
void SimpleGuesser::EnableLanguage(const std::string& lang)
201
0
{
202
0
    XableLanguage(lang,  sal::static_int_cast< char >( 0xF0 ));
203
0
}
204
205
void SimpleGuesser::DisableLanguage(const std::string& lang)
206
0
{
207
0
    XableLanguage(lang,  sal::static_int_cast< char >( 0x0F ));
208
0
}
209
210
void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
211
0
{
212
0
    if (h)
213
0
        textcat_Done(h);
214
0
    h = special_textcat_Init(path, prefix);
215
0
}
216
217
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */