/src/tinysparql/subprojects/libstemmer_c-3.0.1/libstemmer/libstemmer_utf8.c
Line | Count | Source |
1 | | |
2 | | #include <stdlib.h> |
3 | | #include <string.h> |
4 | | #include "../include/libstemmer.h" |
5 | | #include "../runtime/api.h" |
6 | | #include "modules_utf8.h" |
7 | | |
8 | | struct sb_stemmer { |
9 | | struct SN_env * (*create)(void); |
10 | | void (*close)(struct SN_env *); |
11 | | int (*stem)(struct SN_env *); |
12 | | |
13 | | struct SN_env * env; |
14 | | }; |
15 | | |
16 | | extern const char ** |
17 | | sb_stemmer_list(void) |
18 | 0 | { |
19 | 0 | return algorithm_names; |
20 | 0 | } |
21 | | |
22 | | static stemmer_encoding_t |
23 | | sb_getenc(const char * charenc) |
24 | 1.63k | { |
25 | 1.63k | const struct stemmer_encoding * encoding; |
26 | 1.63k | if (charenc == NULL) return ENC_UTF_8; |
27 | 0 | for (encoding = encodings; encoding->name != 0; encoding++) { |
28 | 0 | if (strcmp(encoding->name, charenc) == 0) break; |
29 | 0 | } |
30 | 0 | if (encoding->name == NULL) return ENC_UNKNOWN; |
31 | 0 | return encoding->enc; |
32 | 0 | } |
33 | | |
34 | | extern struct sb_stemmer * |
35 | | sb_stemmer_new(const char * algorithm, const char * charenc) |
36 | 1.63k | { |
37 | 1.63k | stemmer_encoding_t enc; |
38 | 1.63k | const struct stemmer_modules * module; |
39 | 1.63k | struct sb_stemmer * stemmer; |
40 | | |
41 | 1.63k | enc = sb_getenc(charenc); |
42 | 1.63k | if (enc == ENC_UNKNOWN) return NULL; |
43 | | |
44 | 34.2k | for (module = modules; module->name != 0; module++) { |
45 | 34.2k | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; |
46 | 34.2k | } |
47 | 1.63k | if (module->name == NULL) return NULL; |
48 | | |
49 | 1.63k | stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); |
50 | 1.63k | if (stemmer == NULL) return NULL; |
51 | | |
52 | 1.63k | stemmer->create = module->create; |
53 | 1.63k | stemmer->close = module->close; |
54 | 1.63k | stemmer->stem = module->stem; |
55 | | |
56 | 1.63k | stemmer->env = stemmer->create(); |
57 | 1.63k | if (stemmer->env == NULL) |
58 | 0 | { |
59 | 0 | sb_stemmer_delete(stemmer); |
60 | 0 | return NULL; |
61 | 0 | } |
62 | | |
63 | 1.63k | return stemmer; |
64 | 1.63k | } |
65 | | |
66 | | void |
67 | | sb_stemmer_delete(struct sb_stemmer * stemmer) |
68 | 1.63k | { |
69 | 1.63k | if (stemmer == 0) return; |
70 | 1.63k | if (stemmer->close) { |
71 | 1.63k | stemmer->close(stemmer->env); |
72 | 1.63k | stemmer->close = 0; |
73 | 1.63k | } |
74 | 1.63k | free(stemmer); |
75 | 1.63k | } |
76 | | |
77 | | const sb_symbol * |
78 | | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) |
79 | 0 | { |
80 | 0 | int ret; |
81 | 0 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) |
82 | 0 | { |
83 | 0 | stemmer->env->l = 0; |
84 | 0 | return NULL; |
85 | 0 | } |
86 | 0 | ret = stemmer->stem(stemmer->env); |
87 | 0 | if (ret < 0) return NULL; |
88 | 0 | stemmer->env->p[stemmer->env->l] = 0; |
89 | 0 | return (const sb_symbol *)(stemmer->env->p); |
90 | 0 | } |
91 | | |
92 | | int |
93 | | sb_stemmer_length(struct sb_stemmer * stemmer) |
94 | 0 | { |
95 | 0 | return stemmer->env->l; |
96 | 0 | } |