Coverage Report

Created: 2025-10-10 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rust-stemmers-1.2.0/src/lib.rs
Line
Count
Source
1
//! This library provides rust implementations for some stemmer algorithms
2
//! written in the [snowball language](https://snowballstem.org/).
3
//!
4
//!
5
//! All algorithms expect the input to already be lowercased.
6
//!
7
//! # Usage
8
//! ```toml
9
//! [dependencies]
10
//! rust-stemmers = "^1.0"
11
//! ```
12
//!
13
//! ```rust
14
//! extern crate rust_stemmers;
15
//!
16
//! use rust_stemmers::{Algorithm, Stemmer};
17
//!
18
//! fn main() {
19
//!    let en_stemmer = Stemmer::create(Algorithm::English);
20
//!    assert_eq!(en_stemmer.stem("fruitlessly"), "fruitless");
21
//! }
22
//! ```
23
extern crate serde;
24
#[macro_use]
25
extern crate serde_derive;
26
27
use std::borrow::Cow;
28
29
mod snowball;
30
31
use snowball::SnowballEnv;
32
use snowball::algorithms;
33
34
/// Enum of all supported algorithms.
35
/// Check the [Snowball-Website](https://snowballstem.org/) for details.
36
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
37
pub enum Algorithm {
38
    Arabic,
39
    Danish,
40
    Dutch,
41
    English,
42
    Finnish,
43
    French,
44
    German,
45
    Greek,
46
    Hungarian,
47
    Italian,
48
    Norwegian,
49
    Portuguese,
50
    Romanian,
51
    Russian,
52
    Spanish,
53
    Swedish,
54
    Tamil,
55
    Turkish
56
}
57
58
/// Wrapps a usable interface around the actual stemmer implementation
59
pub struct Stemmer {
60
    stemmer: fn(&mut SnowballEnv) -> bool,
61
}
62
63
impl Stemmer {
64
    /// Create a new stemmer from an algorithm
65
0
    pub fn create(lang: Algorithm) -> Self {
66
0
        match lang {
67
0
            Algorithm::Arabic => Stemmer { stemmer: algorithms::arabic::stem },
68
0
            Algorithm::Danish => Stemmer { stemmer: algorithms::danish::stem },
69
0
            Algorithm::Dutch => Stemmer { stemmer: algorithms::dutch::stem },
70
0
            Algorithm::English => Stemmer { stemmer: algorithms::english::stem },
71
0
            Algorithm::Finnish => Stemmer { stemmer: algorithms::finnish::stem },
72
0
            Algorithm::French => Stemmer { stemmer: algorithms::french::stem },
73
0
            Algorithm::German => Stemmer { stemmer: algorithms::german::stem },
74
0
            Algorithm::Greek => Stemmer { stemmer: algorithms::greek::stem },
75
0
            Algorithm::Hungarian => Stemmer { stemmer: algorithms::hungarian::stem },
76
0
            Algorithm::Italian => Stemmer { stemmer: algorithms::italian::stem },
77
0
            Algorithm::Norwegian => Stemmer { stemmer: algorithms::norwegian::stem },
78
0
            Algorithm::Portuguese => Stemmer { stemmer: algorithms::portuguese::stem },
79
0
            Algorithm::Romanian => Stemmer { stemmer: algorithms::romanian::stem },
80
0
            Algorithm::Russian => Stemmer { stemmer: algorithms::russian::stem },
81
0
            Algorithm::Spanish => Stemmer { stemmer: algorithms::spanish::stem },
82
0
            Algorithm::Swedish => Stemmer { stemmer: algorithms::swedish::stem },
83
0
            Algorithm::Tamil => Stemmer { stemmer: algorithms::tamil::stem },
84
0
            Algorithm::Turkish => Stemmer { stemmer: algorithms::turkish::stem },
85
        }
86
0
    }
87
88
    /// Stem a single word
89
    /// Please note, that the input is expected to be all lowercase (if that is applicable).
90
0
    pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
91
0
        let mut env = SnowballEnv::create(input);
92
0
        (self.stemmer)(&mut env);
93
0
        env.get_current()
94
0
    }
95
}
96
97
98
99
#[cfg(test)]
100
mod tests {
101
    use super::{Stemmer, Algorithm};
102
103
    fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
104
        assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
105
    }
106
107
    #[test]
108
    fn german_test() {
109
        use std::fs;
110
        use std::io;
111
        use std::io::BufRead;
112
113
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
114
        let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
115
116
        let lines = vocab.lines().zip(result.lines());
117
118
        for (voc, res) in lines {
119
            stemms_to(voc.unwrap().as_str(),
120
                      res.unwrap().as_str(),
121
                      Algorithm::German);
122
        }
123
    }
124
125
    #[test]
126
    fn english_test() {
127
        use std::fs;
128
        use std::io;
129
        use std::io::BufRead;
130
131
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
132
        let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
133
134
        let lines = vocab.lines().zip(result.lines());
135
136
        for (voc, res) in lines {
137
            stemms_to(voc.unwrap().as_str(),
138
                      res.unwrap().as_str(),
139
                      Algorithm::English);
140
        }
141
    }
142
143
    #[test]
144
    fn french_test() {
145
        use std::fs;
146
        use std::io;
147
        use std::io::BufRead;
148
149
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
150
        let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
151
152
        let lines = vocab.lines().zip(result.lines());
153
154
        for (voc, res) in lines {
155
            stemms_to(voc.unwrap().as_str(),
156
                      res.unwrap().as_str(),
157
                      Algorithm::French);
158
        }
159
    }
160
161
    #[test]
162
    fn spanish_test() {
163
        use std::fs;
164
        use std::io;
165
        use std::io::BufRead;
166
167
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
168
        let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
169
170
        let lines = vocab.lines().zip(result.lines());
171
172
        for (voc, res) in lines {
173
            stemms_to(voc.unwrap().as_str(),
174
                      res.unwrap().as_str(),
175
                      Algorithm::Spanish);
176
        }
177
    }
178
179
    #[test]
180
    fn portuguese_test() {
181
        use std::fs;
182
        use std::io;
183
        use std::io::BufRead;
184
185
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
186
        let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
187
188
        let lines = vocab.lines().zip(result.lines());
189
190
        for (voc, res) in lines {
191
            stemms_to(voc.unwrap().as_str(),
192
                      res.unwrap().as_str(),
193
                      Algorithm::Portuguese);
194
        }
195
    }
196
197
    #[test]
198
    fn italian_test() {
199
        use std::fs;
200
        use std::io;
201
        use std::io::BufRead;
202
203
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
204
        let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
205
206
        let lines = vocab.lines().zip(result.lines());
207
208
        for (voc, res) in lines {
209
            stemms_to(voc.unwrap().as_str(),
210
                      res.unwrap().as_str(),
211
                      Algorithm::Italian);
212
        }
213
    }
214
215
    #[test]
216
    fn romanian_test() {
217
        use std::fs;
218
        use std::io;
219
        use std::io::BufRead;
220
221
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
222
        let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
223
224
        let lines = vocab.lines().zip(result.lines());
225
226
        for (voc, res) in lines {
227
            stemms_to(voc.unwrap().as_str(),
228
                      res.unwrap().as_str(),
229
                      Algorithm::Romanian);
230
        }
231
    }
232
233
    #[test]
234
    fn russian_test() {
235
        use std::fs;
236
        use std::io;
237
        use std::io::BufRead;
238
239
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
240
        let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
241
242
        let lines = vocab.lines().zip(result.lines());
243
244
        for (voc, res) in lines {
245
            stemms_to(voc.unwrap().as_str(),
246
                      res.unwrap().as_str(),
247
                      Algorithm::Russian);
248
        }
249
    }
250
251
    #[test]
252
    fn arabic_test() {
253
        use std::fs;
254
        use std::io;
255
        use std::io::BufRead;
256
257
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
258
        let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
259
260
        let lines = vocab.lines().zip(result.lines());
261
262
        for (voc, res) in lines {
263
            stemms_to(voc.unwrap().as_str(),
264
                      res.unwrap().as_str(),
265
                      Algorithm::Arabic);
266
        }
267
    }
268
269
    #[test]
270
    fn finnish_test() {
271
        use std::fs;
272
        use std::io;
273
        use std::io::BufRead;
274
275
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
276
        let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());
277
278
        let lines = vocab.lines().zip(result.lines());
279
280
        for (voc, res) in lines {
281
            stemms_to(voc.unwrap().as_str(),
282
                      res.unwrap().as_str(),
283
                      Algorithm::Finnish);
284
        }
285
    }
286
287
    #[test]
288
    fn greek_test() {
289
        use std::fs;
290
        use std::io;
291
        use std::io::BufRead;
292
293
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
294
        let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());
295
296
        let lines = vocab.lines().zip(result.lines());
297
298
        for (voc, res) in lines {
299
            stemms_to(voc.unwrap().as_str(),
300
                      res.unwrap().as_str(),
301
                      Algorithm::Greek);
302
        }
303
    }
304
305
    #[test]
306
    fn norwegian_test() {
307
        use std::fs;
308
        use std::io;
309
        use std::io::BufRead;
310
311
        let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
312
        let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());
313
314
        let lines = vocab.lines().zip(result.lines());
315
316
        for (voc, res) in lines {
317
            stemms_to(voc.unwrap().as_str(),
318
                      res.unwrap().as_str(),
319
                      Algorithm::Norwegian);
320
        }
321
    }
322
323
}