/src/tinysparql/src/common/tracker-language.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2006, Jamie McCracken <jamiemcc@gnome.org> |
3 | | * Copyright (C) 2008, Nokia <ivan.frade@nokia.com> |
4 | | * |
5 | | * This library is free software; you can redistribute it and/or |
6 | | * modify it under the terms of the GNU Lesser General Public |
7 | | * License as published by the Free Software Foundation; either |
8 | | * version 2.1 of the License, or (at your option) any later version. |
9 | | * |
10 | | * This library is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | | * Lesser General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU Lesser General Public |
16 | | * License along with this library; if not, write to the |
17 | | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
18 | | * Boston, MA 02110-1301, USA. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include <string.h> |
24 | | |
25 | | #include <glib.h> |
26 | | |
27 | | #include <libstemmer.h> |
28 | | |
29 | | #include "tracker-language.h" |
30 | | |
31 | | typedef struct _TrackerLanguagePrivate TrackerLanguagePrivate; |
32 | | |
33 | | struct _TrackerLanguagePrivate { |
34 | | gchar *language_code; |
35 | | gboolean lang_has_english; |
36 | | |
37 | | GMutex stemmer_mutex; |
38 | | gpointer stemmer; |
39 | | }; |
40 | | |
41 | | /* GObject properties */ |
42 | | enum { |
43 | | PROP_0, |
44 | | |
45 | | PROP_LANGUAGE_CODE, |
46 | | }; |
47 | | |
48 | | static void language_constructed (GObject *object); |
49 | | static void language_finalize (GObject *object); |
50 | | static void language_set_property (GObject *object, |
51 | | guint param_id, |
52 | | const GValue *value, |
53 | | GParamSpec *pspec); |
54 | | |
55 | 0 | G_DEFINE_TYPE_WITH_PRIVATE (TrackerLanguage, tracker_language, G_TYPE_OBJECT) |
56 | 0 |
|
57 | 0 | static void |
58 | 0 | tracker_language_class_init (TrackerLanguageClass *klass) |
59 | 0 | { |
60 | 0 | GObjectClass *object_class = G_OBJECT_CLASS (klass); |
61 | |
|
62 | 0 | object_class->constructed = language_constructed; |
63 | 0 | object_class->finalize = language_finalize; |
64 | 0 | object_class->set_property = language_set_property; |
65 | |
|
66 | 0 | g_object_class_install_property (object_class, |
67 | 0 | PROP_LANGUAGE_CODE, |
68 | 0 | g_param_spec_string ("language-code", |
69 | 0 | "Language code", |
70 | 0 | "Language code", |
71 | 0 | NULL, |
72 | 0 | G_PARAM_WRITABLE | |
73 | 0 | G_PARAM_CONSTRUCT_ONLY)); |
74 | 0 | } |
75 | | |
76 | | static void |
77 | | tracker_language_init (TrackerLanguage *language) |
78 | 0 | { |
79 | 0 | } |
80 | | |
81 | | static void |
82 | | language_finalize (GObject *object) |
83 | 0 | { |
84 | 0 | TrackerLanguagePrivate *priv; |
85 | |
|
86 | 0 | priv = tracker_language_get_instance_private (TRACKER_LANGUAGE (object)); |
87 | |
|
88 | 0 | if (priv->stemmer) { |
89 | 0 | g_mutex_lock (&priv->stemmer_mutex); |
90 | 0 | sb_stemmer_delete (priv->stemmer); |
91 | 0 | g_mutex_unlock (&priv->stemmer_mutex); |
92 | 0 | } |
93 | 0 | g_mutex_clear (&priv->stemmer_mutex); |
94 | |
|
95 | 0 | g_free (priv->language_code); |
96 | |
|
97 | 0 | (G_OBJECT_CLASS (tracker_language_parent_class)->finalize) (object); |
98 | 0 | } |
99 | | |
100 | | static void |
101 | | language_set_property (GObject *object, |
102 | | guint param_id, |
103 | | const GValue *value, |
104 | | GParamSpec *pspec) |
105 | 0 | { |
106 | 0 | TrackerLanguage *language = TRACKER_LANGUAGE (object); |
107 | 0 | TrackerLanguagePrivate *priv = |
108 | 0 | tracker_language_get_instance_private (language); |
109 | |
|
110 | 0 | switch (param_id) { |
111 | 0 | case PROP_LANGUAGE_CODE: |
112 | 0 | priv->language_code = g_value_dup_string (value); |
113 | 0 | break; |
114 | 0 | default: |
115 | 0 | G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec); |
116 | 0 | break; |
117 | 0 | }; |
118 | 0 | } |
119 | | |
120 | | static void |
121 | | ensure_language (TrackerLanguage *language) |
122 | 0 | { |
123 | 0 | TrackerLanguagePrivate *priv = |
124 | 0 | tracker_language_get_instance_private (language); |
125 | 0 | const gchar * const *langs; |
126 | 0 | gint i; |
127 | |
|
128 | 0 | if (priv->language_code) |
129 | 0 | return; |
130 | | |
131 | 0 | langs = g_get_language_names (); |
132 | |
|
133 | 0 | for (i = 0; langs[i]; i++) { |
134 | 0 | const gchar *sep; |
135 | 0 | gchar *code; |
136 | 0 | int len; |
137 | |
|
138 | 0 | if (strcmp (langs[i], "C") == 0 || |
139 | 0 | strncmp (langs[i], "C.", 2) == 0 || |
140 | 0 | strcmp (langs[i], "POSIX") == 0) |
141 | 0 | continue; |
142 | | |
143 | 0 | sep = strchr (langs[i], '_'); |
144 | 0 | len = sep ? (int) (sep - langs[i]) : (int) strlen(langs[i]); |
145 | 0 | code = g_strndup (langs[i], len); |
146 | |
|
147 | 0 | if (!priv->language_code) |
148 | 0 | priv->language_code = g_strdup (code); |
149 | |
|
150 | 0 | if (strcmp (code, "en") == 0) |
151 | 0 | priv->lang_has_english = TRUE; |
152 | |
|
153 | 0 | g_free (code); |
154 | 0 | } |
155 | |
|
156 | 0 | if (!priv->language_code) |
157 | 0 | priv->language_code = g_strdup ("en"); |
158 | 0 | } |
159 | | |
160 | | static void |
161 | | language_constructed (GObject *object) |
162 | 0 | { |
163 | 0 | TrackerLanguage *language = TRACKER_LANGUAGE (object); |
164 | 0 | TrackerLanguagePrivate *priv = |
165 | 0 | tracker_language_get_instance_private (language); |
166 | |
|
167 | 0 | G_OBJECT_CLASS (tracker_language_parent_class)->constructed (object); |
168 | |
|
169 | 0 | ensure_language (language); |
170 | |
|
171 | 0 | priv->stemmer = sb_stemmer_new (priv->language_code, NULL); |
172 | 0 | if (!priv->stemmer) { |
173 | 0 | g_debug ("No stemmer could be found for language:'%s'", |
174 | 0 | priv->language_code); |
175 | 0 | } |
176 | 0 | } |
177 | | |
178 | | /** |
179 | | * tracker_language_new: |
180 | | * @language_code: language code in ISO 639-1 format |
181 | | * |
182 | | * Creates a new #TrackerLanguage instance for the passed language code. |
183 | | * |
184 | | * Returns: a newly created #TrackerLanguage |
185 | | **/ |
186 | | TrackerLanguage * |
187 | | tracker_language_new (const gchar *language_code) |
188 | 0 | { |
189 | 0 | TrackerLanguage *language; |
190 | |
|
191 | 0 | language = g_object_new (TRACKER_TYPE_LANGUAGE, |
192 | 0 | "language-code", language_code, |
193 | 0 | NULL); |
194 | |
|
195 | 0 | return language; |
196 | 0 | } |
197 | | |
198 | | void |
199 | | tracker_language_stem_word (TrackerLanguage *language, |
200 | | gchar *buffer, |
201 | | gint *buffer_len, |
202 | | gint buffer_size) |
203 | 0 | { |
204 | 0 | TrackerLanguagePrivate *priv; |
205 | |
|
206 | 0 | g_return_if_fail (TRACKER_IS_LANGUAGE (language)); |
207 | 0 | g_return_if_fail (buffer != NULL); |
208 | 0 | g_return_if_fail (buffer_len != NULL); |
209 | 0 | g_return_if_fail (*buffer_len >= 0); |
210 | | |
211 | 0 | priv = tracker_language_get_instance_private (language); |
212 | |
|
213 | 0 | g_mutex_lock (&priv->stemmer_mutex); |
214 | |
|
215 | 0 | if (priv->stemmer) { |
216 | 0 | const sb_symbol *symbol; |
217 | 0 | int len; |
218 | |
|
219 | 0 | symbol = sb_stemmer_stem (priv->stemmer, |
220 | 0 | (const sb_symbol *) buffer, |
221 | 0 | *buffer_len); |
222 | 0 | len = sb_stemmer_length (priv->stemmer); |
223 | |
|
224 | 0 | if (len < buffer_size) { |
225 | 0 | memcpy (buffer, symbol, len + 1); |
226 | 0 | *buffer_len = len; |
227 | 0 | } |
228 | 0 | } |
229 | |
|
230 | 0 | g_mutex_unlock (&priv->stemmer_mutex); |
231 | 0 | } |