/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rust-stemmers-1.2.0/src/lib.rs

Source
//! This library provides rust implementations for some stemmer algorithms
//! written in the [snowball language](https://snowballstem.org/).
//!
//!
//! All algorithms expect the input to already be lowercased.
//!
//! # Usage
//! ```toml
//! [dependencies]
//! rust-stemmers = "^1.0"
//! ```
//!
//! ```rust
//! extern crate rust_stemmers;
//!
//! use rust_stemmers::{Algorithm, Stemmer};
//!
//! fn main() {
//!    let en_stemmer = Stemmer::create(Algorithm::English);
//!    assert_eq!(en_stemmer.stem("fruitlessly"), "fruitless");
//! }
//! ```
extern crate serde;
#[macro_use]
extern crate serde_derive;

use std::borrow::Cow;

mod snowball;

use snowball::SnowballEnv;
use snowball::algorithms;

/// Enum of all supported algorithms.
/// Check the [Snowball-Website](https://snowballstem.org/) for details.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
pub enum Algorithm {
    Arabic,
    Danish,
    Dutch,
    English,
    Finnish,
    French,
    German,
    Greek,
    Hungarian,
    Italian,
    Norwegian,
    Portuguese,
    Romanian,
    Russian,
    Spanish,
    Swedish,
    Tamil,
    Turkish
}

/// Wrapps a usable interface around the actual stemmer implementation
pub struct Stemmer {
    stemmer: fn(&mut SnowballEnv) -> bool,
}

impl Stemmer {
    /// Create a new stemmer from an algorithm
    pub fn create(lang: Algorithm) -> Self {
        match lang {
            Algorithm::Arabic => Stemmer { stemmer: algorithms::arabic::stem },
            Algorithm::Danish => Stemmer { stemmer: algorithms::danish::stem },
            Algorithm::Dutch => Stemmer { stemmer: algorithms::dutch::stem },
            Algorithm::English => Stemmer { stemmer: algorithms::english::stem },
            Algorithm::Finnish => Stemmer { stemmer: algorithms::finnish::stem },
            Algorithm::French => Stemmer { stemmer: algorithms::french::stem },
            Algorithm::German => Stemmer { stemmer: algorithms::german::stem },
            Algorithm::Greek => Stemmer { stemmer: algorithms::greek::stem },
            Algorithm::Hungarian => Stemmer { stemmer: algorithms::hungarian::stem },
            Algorithm::Italian => Stemmer { stemmer: algorithms::italian::stem },
            Algorithm::Norwegian => Stemmer { stemmer: algorithms::norwegian::stem },
            Algorithm::Portuguese => Stemmer { stemmer: algorithms::portuguese::stem },
            Algorithm::Romanian => Stemmer { stemmer: algorithms::romanian::stem },
            Algorithm::Russian => Stemmer { stemmer: algorithms::russian::stem },
            Algorithm::Spanish => Stemmer { stemmer: algorithms::spanish::stem },
            Algorithm::Swedish => Stemmer { stemmer: algorithms::swedish::stem },
            Algorithm::Tamil => Stemmer { stemmer: algorithms::tamil::stem },
            Algorithm::Turkish => Stemmer { stemmer: algorithms::turkish::stem },
        }
    }

    /// Stem a single word
    /// Please note, that the input is expected to be all lowercase (if that is applicable).
    pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
        let mut env = SnowballEnv::create(input);
        (self.stemmer)(&mut env);
        env.get_current()
    }
}



#[cfg(test)]
mod tests {
    use super::{Stemmer, Algorithm};

    fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
        assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
    }

    #[test]
    fn german_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::German);
        }
    }

    #[test]
    fn english_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::English);
        }
    }

    #[test]
    fn french_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::French);
        }
    }

    #[test]
    fn spanish_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Spanish);
        }
    }

    #[test]
    fn portuguese_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Portuguese);
        }
    }

    #[test]
    fn italian_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Italian);
        }
    }

    #[test]
    fn romanian_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Romanian);
        }
    }

    #[test]
    fn russian_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Russian);
        }
    }

    #[test]
    fn arabic_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Arabic);
        }
    }

    #[test]
    fn finnish_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Finnish);
        }
    }

    #[test]
    fn greek_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Greek);
        }
    }

    #[test]
    fn norwegian_test() {
        use std::fs;
        use std::io;
        use std::io::BufRead;

        let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
        let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());

        let lines = vocab.lines().zip(result.lines());

        for (voc, res) in lines {
            stemms_to(voc.unwrap().as_str(),
                      res.unwrap().as_str(),
                      Algorithm::Norwegian);
        }
    }

}

Coverage Report

Created: 2025-10-10 07:11

Line	Count	Source
1		//! This library provides rust implementations for some stemmer algorithms
2		//! written in the [snowball language](https://snowballstem.org/).
3		//!
4		//!
5		//! All algorithms expect the input to already be lowercased.
6		//!
7		//! # Usage
8		//! ```toml
9		//! [dependencies]
10		//! rust-stemmers = "^1.0"
11		//! ```
12		//!
13		//! ```rust
14		//! extern crate rust_stemmers;
15		//!
16		//! use rust_stemmers::{Algorithm, Stemmer};
17		//!
18		//! fn main() {
19		//! let en_stemmer = Stemmer::create(Algorithm::English);
20		//! assert_eq!(en_stemmer.stem("fruitlessly"), "fruitless");
21		//! }
22		//! ```
23		extern crate serde;
24		#[macro_use]
25		extern crate serde_derive;
26
27		use std::borrow::Cow;
28
29		mod snowball;
30
31		use snowball::SnowballEnv;
32		use snowball::algorithms;
33
34		/// Enum of all supported algorithms.
35		/// Check the [Snowball-Website](https://snowballstem.org/) for details.
36		#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
37		pub enum Algorithm {
38		Arabic,
39		Danish,
40		Dutch,
41		English,
42		Finnish,
43		French,
44		German,
45		Greek,
46		Hungarian,
47		Italian,
48		Norwegian,
49		Portuguese,
50		Romanian,
51		Russian,
52		Spanish,
53		Swedish,
54		Tamil,
55		Turkish
56		}
57
58		/// Wrapps a usable interface around the actual stemmer implementation
59		pub struct Stemmer {
60		stemmer: fn(&mut SnowballEnv) -> bool,
61		}
62
63		impl Stemmer {
64		/// Create a new stemmer from an algorithm
65	0	pub fn create(lang: Algorithm) -> Self {
66	0	match lang {
67	0	Algorithm::Arabic => Stemmer { stemmer: algorithms::arabic::stem },
68	0	Algorithm::Danish => Stemmer { stemmer: algorithms::danish::stem },
69	0	Algorithm::Dutch => Stemmer { stemmer: algorithms::dutch::stem },
70	0	Algorithm::English => Stemmer { stemmer: algorithms::english::stem },
71	0	Algorithm::Finnish => Stemmer { stemmer: algorithms::finnish::stem },
72	0	Algorithm::French => Stemmer { stemmer: algorithms::french::stem },
73	0	Algorithm::German => Stemmer { stemmer: algorithms::german::stem },
74	0	Algorithm::Greek => Stemmer { stemmer: algorithms::greek::stem },
75	0	Algorithm::Hungarian => Stemmer { stemmer: algorithms::hungarian::stem },
76	0	Algorithm::Italian => Stemmer { stemmer: algorithms::italian::stem },
77	0	Algorithm::Norwegian => Stemmer { stemmer: algorithms::norwegian::stem },
78	0	Algorithm::Portuguese => Stemmer { stemmer: algorithms::portuguese::stem },
79	0	Algorithm::Romanian => Stemmer { stemmer: algorithms::romanian::stem },
80	0	Algorithm::Russian => Stemmer { stemmer: algorithms::russian::stem },
81	0	Algorithm::Spanish => Stemmer { stemmer: algorithms::spanish::stem },
82	0	Algorithm::Swedish => Stemmer { stemmer: algorithms::swedish::stem },
83	0	Algorithm::Tamil => Stemmer { stemmer: algorithms::tamil::stem },
84	0	Algorithm::Turkish => Stemmer { stemmer: algorithms::turkish::stem },
85		}
86	0	}
87
88		/// Stem a single word
89		/// Please note, that the input is expected to be all lowercase (if that is applicable).
90	0	pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
91	0	let mut env = SnowballEnv::create(input);
92	0	(self.stemmer)(&mut env);
93	0	env.get_current()
94	0	}
95		}
96
97
98
99		#[cfg(test)]
100		mod tests {
101		use super::{Stemmer, Algorithm};
102
103		fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
104		assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
105		}
106
107		#[test]
108		fn german_test() {
109		use std::fs;
110		use std::io;
111		use std::io::BufRead;
112
113		let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
114		let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
115
116		let lines = vocab.lines().zip(result.lines());
117
118		for (voc, res) in lines {
119		stemms_to(voc.unwrap().as_str(),
120		res.unwrap().as_str(),
121		Algorithm::German);
122		}
123		}
124
125		#[test]
126		fn english_test() {
127		use std::fs;
128		use std::io;
129		use std::io::BufRead;
130
131		let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
132		let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
133
134		let lines = vocab.lines().zip(result.lines());
135
136		for (voc, res) in lines {
137		stemms_to(voc.unwrap().as_str(),
138		res.unwrap().as_str(),
139		Algorithm::English);
140		}
141		}
142
143		#[test]
144		fn french_test() {
145		use std::fs;
146		use std::io;
147		use std::io::BufRead;
148
149		let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
150		let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
151
152		let lines = vocab.lines().zip(result.lines());
153
154		for (voc, res) in lines {
155		stemms_to(voc.unwrap().as_str(),
156		res.unwrap().as_str(),
157		Algorithm::French);
158		}
159		}
160
161		#[test]
162		fn spanish_test() {
163		use std::fs;
164		use std::io;
165		use std::io::BufRead;
166
167		let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
168		let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
169
170		let lines = vocab.lines().zip(result.lines());
171
172		for (voc, res) in lines {
173		stemms_to(voc.unwrap().as_str(),
174		res.unwrap().as_str(),
175		Algorithm::Spanish);
176		}
177		}
178
179		#[test]
180		fn portuguese_test() {
181		use std::fs;
182		use std::io;
183		use std::io::BufRead;
184
185		let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
186		let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
187
188		let lines = vocab.lines().zip(result.lines());
189
190		for (voc, res) in lines {
191		stemms_to(voc.unwrap().as_str(),
192		res.unwrap().as_str(),
193		Algorithm::Portuguese);
194		}
195		}
196
197		#[test]
198		fn italian_test() {
199		use std::fs;
200		use std::io;
201		use std::io::BufRead;
202
203		let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
204		let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
205
206		let lines = vocab.lines().zip(result.lines());
207
208		for (voc, res) in lines {
209		stemms_to(voc.unwrap().as_str(),
210		res.unwrap().as_str(),
211		Algorithm::Italian);
212		}
213		}
214
215		#[test]
216		fn romanian_test() {
217		use std::fs;
218		use std::io;
219		use std::io::BufRead;
220
221		let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
222		let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
223
224		let lines = vocab.lines().zip(result.lines());
225
226		for (voc, res) in lines {
227		stemms_to(voc.unwrap().as_str(),
228		res.unwrap().as_str(),
229		Algorithm::Romanian);
230		}
231		}
232
233		#[test]
234		fn russian_test() {
235		use std::fs;
236		use std::io;
237		use std::io::BufRead;
238
239		let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
240		let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
241
242		let lines = vocab.lines().zip(result.lines());
243
244		for (voc, res) in lines {
245		stemms_to(voc.unwrap().as_str(),
246		res.unwrap().as_str(),
247		Algorithm::Russian);
248		}
249		}
250
251		#[test]
252		fn arabic_test() {
253		use std::fs;
254		use std::io;
255		use std::io::BufRead;
256
257		let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
258		let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
259
260		let lines = vocab.lines().zip(result.lines());
261
262		for (voc, res) in lines {
263		stemms_to(voc.unwrap().as_str(),
264		res.unwrap().as_str(),
265		Algorithm::Arabic);
266		}
267		}
268
269		#[test]
270		fn finnish_test() {
271		use std::fs;
272		use std::io;
273		use std::io::BufRead;
274
275		let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
276		let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());
277
278		let lines = vocab.lines().zip(result.lines());
279
280		for (voc, res) in lines {
281		stemms_to(voc.unwrap().as_str(),
282		res.unwrap().as_str(),
283		Algorithm::Finnish);
284		}
285		}
286
287		#[test]
288		fn greek_test() {
289		use std::fs;
290		use std::io;
291		use std::io::BufRead;
292
293		let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
294		let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());
295
296		let lines = vocab.lines().zip(result.lines());
297
298		for (voc, res) in lines {
299		stemms_to(voc.unwrap().as_str(),
300		res.unwrap().as_str(),
301		Algorithm::Greek);
302		}
303		}
304
305		#[test]
306		fn norwegian_test() {
307		use std::fs;
308		use std::io;
309		use std::io::BufRead;
310
311		let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
312		let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());
313
314		let lines = vocab.lines().zip(result.lines());
315
316		for (voc, res) in lines {
317		stemms_to(voc.unwrap().as_str(),
318		res.unwrap().as_str(),
319		Algorithm::Norwegian);
320		}
321		}
322
323		}