/src/libreoffice/lingucomponent/source/languageguessing/simpleguesser.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | /** |
21 | | * |
22 | | * |
23 | | * |
24 | | * |
25 | | * TODO |
26 | | * - Add exception throwing when h == NULL |
27 | | * - Not init h when implicit constructor is launched |
28 | | */ |
29 | | |
30 | | #include <string.h> |
31 | | |
32 | | #ifdef SYSTEM_LIBEXTTEXTCAT |
33 | | #include <libexttextcat/textcat.h> |
34 | | #include <libexttextcat/common.h> |
35 | | #include <libexttextcat/constants.h> |
36 | | #include <libexttextcat/fingerprint.h> |
37 | | #else |
38 | | #include <textcat.h> |
39 | | #include <common.h> |
40 | | #include <constants.h> |
41 | | #include <fingerprint.h> |
42 | | #endif |
43 | | |
44 | | #include <sal/types.h> |
45 | | |
46 | | #include<rtl/character.hxx> |
47 | | #include "simpleguesser.hxx" |
48 | | |
49 | 0 | static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){ |
50 | 0 | size_t i; |
51 | 0 | int ret = 0; |
52 | |
|
53 | 0 | size_t min = s1.length(); |
54 | 0 | if (min > s2.length()) |
55 | 0 | min = s2.length(); |
56 | |
|
57 | 0 | for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){ |
58 | 0 | ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i])) |
59 | 0 | - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i])); |
60 | 0 | if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character |
61 | 0 | } |
62 | 0 | return ret; |
63 | 0 | } |
64 | | |
65 | | namespace { |
66 | | |
67 | | /** |
68 | | * This following structure is from textcat.c |
69 | | */ |
70 | | typedef struct textcat_t{ |
71 | | |
72 | | void **fprint; |
73 | | char *fprint_disable; |
74 | | uint4 size; |
75 | | uint4 maxsize; |
76 | | |
77 | | char output[MAXOUTPUTSIZE]; |
78 | | |
79 | | } textcat_t; |
80 | | // end of the 3 structs |
81 | | |
82 | | } |
83 | | |
84 | | SimpleGuesser::SimpleGuesser() |
85 | 0 | { |
86 | 0 | h = nullptr; |
87 | 0 | } |
88 | | |
89 | 0 | SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){ |
90 | | // Check for self-assignment! |
91 | 0 | if (this == &sg) // Same object? |
92 | 0 | return *this; // Yes, so skip assignment, and just return *this. |
93 | | |
94 | 0 | if(h){textcat_Done(h);} |
95 | 0 | h = sg.h; |
96 | 0 | return *this; |
97 | 0 | } |
98 | | |
99 | | SimpleGuesser::~SimpleGuesser() |
100 | 0 | { |
101 | 0 | if(h){textcat_Done(h);} |
102 | 0 | } |
103 | | |
104 | | /*! |
105 | | \fn SimpleGuesser::GuessLanguage(char* text) |
106 | | */ |
107 | | std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text) |
108 | 0 | { |
109 | 0 | std::vector<Guess> guesses; |
110 | |
|
111 | 0 | if (!h) |
112 | 0 | return guesses; |
113 | | |
114 | 0 | int len = strlen(text); |
115 | |
|
116 | 0 | if (len > MAX_STRING_LENGTH_TO_ANALYSE) |
117 | 0 | len = MAX_STRING_LENGTH_TO_ANALYSE; |
118 | |
|
119 | 0 | const char *guess_list = textcat_Classify(h, text, len); |
120 | |
|
121 | 0 | if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0) |
122 | 0 | return guesses; |
123 | | |
124 | 0 | int current_pointer = 0; |
125 | |
|
126 | 0 | while(guess_list[current_pointer] != '\0') |
127 | 0 | { |
128 | 0 | while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0') |
129 | 0 | current_pointer++; |
130 | 0 | if(guess_list[current_pointer] != '\0') |
131 | 0 | { |
132 | 0 | guesses.emplace_back(guess_list + current_pointer); |
133 | 0 | current_pointer++; |
134 | 0 | } |
135 | 0 | } |
136 | |
|
137 | 0 | return guesses; |
138 | 0 | } |
139 | | |
140 | | Guess SimpleGuesser::GuessPrimaryLanguage(const char* text) |
141 | 0 | { |
142 | 0 | std::vector<Guess> ret = GuessLanguage(text); |
143 | 0 | return ret.empty() ? Guess() : ret[0]; |
144 | 0 | } |
145 | | /** |
146 | | * Is used to know which language is available, unavailable or both |
147 | | * when mask = 0xF0, return only Available |
148 | | * when mask = 0x0F, return only Unavailable |
149 | | * when mask = 0xFF, return both Available and Unavailable |
150 | | */ |
151 | | std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask) |
152 | 0 | { |
153 | 0 | textcat_t *tables = static_cast<textcat_t*>(h); |
154 | |
|
155 | 0 | std::vector<Guess> lang; |
156 | 0 | if(!h){return lang;} |
157 | | |
158 | 0 | for (size_t i=0; i<tables->size; ++i) |
159 | 0 | { |
160 | 0 | if (tables->fprint_disable[i] & mask) |
161 | 0 | { |
162 | 0 | std::string langStr = "["; |
163 | 0 | langStr += fp_Name(tables->fprint[i]); |
164 | 0 | lang.emplace_back(langStr.c_str()); |
165 | 0 | } |
166 | 0 | } |
167 | |
|
168 | 0 | return lang; |
169 | 0 | } |
170 | | |
171 | | std::vector<Guess> SimpleGuesser::GetAvailableLanguages() |
172 | 0 | { |
173 | 0 | return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) ); |
174 | 0 | } |
175 | | |
176 | | std::vector<Guess> SimpleGuesser::GetUnavailableLanguages() |
177 | 0 | { |
178 | 0 | return GetManagedLanguages( sal::static_int_cast< char >( 0x0F )); |
179 | 0 | } |
180 | | |
181 | | std::vector<Guess> SimpleGuesser::GetAllManagedLanguages() |
182 | 0 | { |
183 | 0 | return GetManagedLanguages( sal::static_int_cast< char >( 0xFF )); |
184 | 0 | } |
185 | | |
186 | | void SimpleGuesser::XableLanguage(const std::string& lang, char mask) |
187 | 0 | { |
188 | 0 | textcat_t *tables = static_cast<textcat_t*>(h); |
189 | |
|
190 | 0 | if(!h){return;} |
191 | | |
192 | 0 | for (size_t i=0; i<tables->size; i++) |
193 | 0 | { |
194 | 0 | std::string language(fp_Name(tables->fprint[i])); |
195 | 0 | if (startsAsciiCaseInsensitive(language,lang) == 0) |
196 | 0 | tables->fprint_disable[i] = mask; |
197 | 0 | } |
198 | 0 | } |
199 | | |
200 | | void SimpleGuesser::EnableLanguage(const std::string& lang) |
201 | 0 | { |
202 | 0 | XableLanguage(lang, sal::static_int_cast< char >( 0xF0 )); |
203 | 0 | } |
204 | | |
205 | | void SimpleGuesser::DisableLanguage(const std::string& lang) |
206 | 0 | { |
207 | 0 | XableLanguage(lang, sal::static_int_cast< char >( 0x0F )); |
208 | 0 | } |
209 | | |
210 | | void SimpleGuesser::SetDBPath(const char* path, const char* prefix) |
211 | 0 | { |
212 | 0 | if (h) |
213 | 0 | textcat_Done(h); |
214 | 0 | h = special_textcat_Init(path, prefix); |
215 | 0 | } |
216 | | |
217 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |