1# -*- coding: utf-8 -*-
2"""
3 inflection
4 ~~~~~~~~~~~~
5
6 A port of Ruby on Rails' inflector to Python.
7
8 :copyright: (c) 2012-2020 by Janne Vanhala
9
10 :license: MIT, see LICENSE for more details.
11"""
12import re
13import unicodedata
14
15__version__ = '0.5.1'
16
17PLURALS = [
18 (r"(?i)(quiz)$", r'\1zes'),
19 (r"(?i)^(oxen)$", r'\1'),
20 (r"(?i)^(ox)$", r'\1en'),
21 (r"(?i)(m|l)ice$", r'\1ice'),
22 (r"(?i)(m|l)ouse$", r'\1ice'),
23 (r"(?i)(passer)s?by$", r'\1sby'),
24 (r"(?i)(matr|vert|ind)(?:ix|ex)$", r'\1ices'),
25 (r"(?i)(x|ch|ss|sh)$", r'\1es'),
26 (r"(?i)([^aeiouy]|qu)y$", r'\1ies'),
27 (r"(?i)(hive)$", r'\1s'),
28 (r"(?i)([lr])f$", r'\1ves'),
29 (r"(?i)([^f])fe$", r'\1ves'),
30 (r"(?i)sis$", 'ses'),
31 (r"(?i)([ti])a$", r'\1a'),
32 (r"(?i)([ti])um$", r'\1a'),
33 (r"(?i)(buffal|potat|tomat)o$", r'\1oes'),
34 (r"(?i)(bu)s$", r'\1ses'),
35 (r"(?i)(alias|status)$", r'\1es'),
36 (r"(?i)(octop|vir)i$", r'\1i'),
37 (r"(?i)(octop|vir)us$", r'\1i'),
38 (r"(?i)^(ax|test)is$", r'\1es'),
39 (r"(?i)s$", 's'),
40 (r"$", 's'),
41]
42
43SINGULARS = [
44 (r"(?i)(database)s$", r'\1'),
45 (r"(?i)(quiz)zes$", r'\1'),
46 (r"(?i)(matr)ices$", r'\1ix'),
47 (r"(?i)(vert|ind)ices$", r'\1ex'),
48 (r"(?i)(passer)sby$", r'\1by'),
49 (r"(?i)^(ox)en", r'\1'),
50 (r"(?i)(alias|status)(es)?$", r'\1'),
51 (r"(?i)(octop|vir)(us|i)$", r'\1us'),
52 (r"(?i)^(a)x[ie]s$", r'\1xis'),
53 (r"(?i)(cris|test)(is|es)$", r'\1is'),
54 (r"(?i)(shoe)s$", r'\1'),
55 (r"(?i)(o)es$", r'\1'),
56 (r"(?i)(bus)(es)?$", r'\1'),
57 (r"(?i)(m|l)ice$", r'\1ouse'),
58 (r"(?i)(x|ch|ss|sh)es$", r'\1'),
59 (r"(?i)(m)ovies$", r'\1ovie'),
60 (r"(?i)(s)eries$", r'\1eries'),
61 (r"(?i)([^aeiouy]|qu)ies$", r'\1y'),
62 (r"(?i)([lr])ves$", r'\1f'),
63 (r"(?i)(tive)s$", r'\1'),
64 (r"(?i)(hive)s$", r'\1'),
65 (r"(?i)([^f])ves$", r'\1fe'),
66 (r"(?i)(t)he(sis|ses)$", r"\1hesis"),
67 (r"(?i)(s)ynop(sis|ses)$", r"\1ynopsis"),
68 (r"(?i)(p)rogno(sis|ses)$", r"\1rognosis"),
69 (r"(?i)(p)arenthe(sis|ses)$", r"\1arenthesis"),
70 (r"(?i)(d)iagno(sis|ses)$", r"\1iagnosis"),
71 (r"(?i)(b)a(sis|ses)$", r"\1asis"),
72 (r"(?i)(a)naly(sis|ses)$", r"\1nalysis"),
73 (r"(?i)([ti])a$", r'\1um'),
74 (r"(?i)(n)ews$", r'\1ews'),
75 (r"(?i)(ss)$", r'\1'),
76 (r"(?i)s$", ''),
77]
78
79UNCOUNTABLES = {
80 'equipment',
81 'fish',
82 'information',
83 'jeans',
84 'money',
85 'rice',
86 'series',
87 'sheep',
88 'species'}
89
90
91def _irregular(singular: str, plural: str) -> None:
92 """
93 A convenience function to add appropriate rules to plurals and singular
94 for irregular words.
95
96 :param singular: irregular word in singular form
97 :param plural: irregular word in plural form
98 """
99 def caseinsensitive(string: str) -> str:
100 return ''.join('[' + char + char.upper() + ']' for char in string)
101
102 if singular[0].upper() == plural[0].upper():
103 PLURALS.insert(0, (
104 r"(?i)({}){}$".format(singular[0], singular[1:]),
105 r'\1' + plural[1:]
106 ))
107 PLURALS.insert(0, (
108 r"(?i)({}){}$".format(plural[0], plural[1:]),
109 r'\1' + plural[1:]
110 ))
111 SINGULARS.insert(0, (
112 r"(?i)({}){}$".format(plural[0], plural[1:]),
113 r'\1' + singular[1:]
114 ))
115 else:
116 PLURALS.insert(0, (
117 r"{}{}$".format(singular[0].upper(),
118 caseinsensitive(singular[1:])),
119 plural[0].upper() + plural[1:]
120 ))
121 PLURALS.insert(0, (
122 r"{}{}$".format(singular[0].lower(),
123 caseinsensitive(singular[1:])),
124 plural[0].lower() + plural[1:]
125 ))
126 PLURALS.insert(0, (
127 r"{}{}$".format(plural[0].upper(), caseinsensitive(plural[1:])),
128 plural[0].upper() + plural[1:]
129 ))
130 PLURALS.insert(0, (
131 r"{}{}$".format(plural[0].lower(), caseinsensitive(plural[1:])),
132 plural[0].lower() + plural[1:]
133 ))
134 SINGULARS.insert(0, (
135 r"{}{}$".format(plural[0].upper(), caseinsensitive(plural[1:])),
136 singular[0].upper() + singular[1:]
137 ))
138 SINGULARS.insert(0, (
139 r"{}{}$".format(plural[0].lower(), caseinsensitive(plural[1:])),
140 singular[0].lower() + singular[1:]
141 ))
142
143
144def camelize(string: str, uppercase_first_letter: bool = True) -> str:
145 """
146 Convert strings to CamelCase.
147
148 Examples::
149
150 >>> camelize("device_type")
151 'DeviceType'
152 >>> camelize("device_type", False)
153 'deviceType'
154
155 :func:`camelize` can be thought of as a inverse of :func:`underscore`,
156 although there are some cases where that does not hold::
157
158 >>> camelize(underscore("IOError"))
159 'IoError'
160
161 :param uppercase_first_letter: if set to `True` :func:`camelize` converts
162 strings to UpperCamelCase. If set to `False` :func:`camelize` produces
163 lowerCamelCase. Defaults to `True`.
164 """
165 if uppercase_first_letter:
166 return re.sub(r"(?:^|_)(.)", lambda m: m.group(1).upper(), string)
167 else:
168 return string[0].lower() + camelize(string)[1:]
169
170
171def dasherize(word: str) -> str:
172 """Replace underscores with dashes in the string.
173
174 Example::
175
176 >>> dasherize("puni_puni")
177 'puni-puni'
178
179 """
180 return word.replace('_', '-')
181
182
183def humanize(word: str) -> str:
184 """
185 Capitalize the first word and turn underscores into spaces and strip a
186 trailing ``"_id"``, if any. Like :func:`titleize`, this is meant for
187 creating pretty output.
188
189 Examples::
190
191 >>> humanize("employee_salary")
192 'Employee salary'
193 >>> humanize("author_id")
194 'Author'
195
196 """
197 word = re.sub(r"_id$", "", word)
198 word = word.replace('_', ' ')
199 word = re.sub(r"(?i)([a-z\d]*)", lambda m: m.group(1).lower(), word)
200 word = re.sub(r"^\w", lambda m: m.group(0).upper(), word)
201 return word
202
203
204def ordinal(number: int) -> str:
205 """
206 Return the suffix that should be added to a number to denote the position
207 in an ordered sequence such as 1st, 2nd, 3rd, 4th.
208
209 Examples::
210
211 >>> ordinal(1)
212 'st'
213 >>> ordinal(2)
214 'nd'
215 >>> ordinal(1002)
216 'nd'
217 >>> ordinal(1003)
218 'rd'
219 >>> ordinal(-11)
220 'th'
221 >>> ordinal(-1021)
222 'st'
223
224 """
225 number = abs(int(number))
226 if number % 100 in (11, 12, 13):
227 return "th"
228 else:
229 return {
230 1: "st",
231 2: "nd",
232 3: "rd",
233 }.get(number % 10, "th")
234
235
236def ordinalize(number: int) -> str:
237 """
238 Turn a number into an ordinal string used to denote the position in an
239 ordered sequence such as 1st, 2nd, 3rd, 4th.
240
241 Examples::
242
243 >>> ordinalize(1)
244 '1st'
245 >>> ordinalize(2)
246 '2nd'
247 >>> ordinalize(1002)
248 '1002nd'
249 >>> ordinalize(1003)
250 '1003rd'
251 >>> ordinalize(-11)
252 '-11th'
253 >>> ordinalize(-1021)
254 '-1021st'
255
256 """
257 return "{}{}".format(number, ordinal(number))
258
259
260def parameterize(string: str, separator: str = '-') -> str:
261 """
262 Replace special characters in a string so that it may be used as part of a
263 'pretty' URL.
264
265 Example::
266
267 >>> parameterize(u"Donald E. Knuth")
268 'donald-e-knuth'
269
270 """
271 string = transliterate(string)
272 # Turn unwanted chars into the separator
273 string = re.sub(r"(?i)[^a-z0-9\-_]+", separator, string)
274 if separator:
275 re_sep = re.escape(separator)
276 # No more than one of the separator in a row.
277 string = re.sub(r'%s{2,}' % re_sep, separator, string)
278 # Remove leading/trailing separator.
279 string = re.sub(r"(?i)^{sep}|{sep}$".format(sep=re_sep), '', string)
280
281 return string.lower()
282
283
284def pluralize(word: str) -> str:
285 """
286 Return the plural form of a word.
287
288 Examples::
289
290 >>> pluralize("posts")
291 'posts'
292 >>> pluralize("octopus")
293 'octopi'
294 >>> pluralize("sheep")
295 'sheep'
296 >>> pluralize("CamelOctopus")
297 'CamelOctopi'
298
299 """
300 if not word or word.lower() in UNCOUNTABLES:
301 return word
302 else:
303 for rule, replacement in PLURALS:
304 if re.search(rule, word):
305 return re.sub(rule, replacement, word)
306 return word
307
308
309def singularize(word: str) -> str:
310 """
311 Return the singular form of a word, the reverse of :func:`pluralize`.
312
313 Examples::
314
315 >>> singularize("posts")
316 'post'
317 >>> singularize("octopi")
318 'octopus'
319 >>> singularize("sheep")
320 'sheep'
321 >>> singularize("word")
322 'word'
323 >>> singularize("CamelOctopi")
324 'CamelOctopus'
325
326 """
327 for inflection in UNCOUNTABLES:
328 if re.search(r'(?i)\b(%s)\Z' % inflection, word):
329 return word
330
331 for rule, replacement in SINGULARS:
332 if re.search(rule, word):
333 return re.sub(rule, replacement, word)
334 return word
335
336
337def tableize(word: str) -> str:
338 """
339 Create the name of a table like Rails does for models to table names. This
340 method uses the :func:`pluralize` method on the last word in the string.
341
342 Examples::
343
344 >>> tableize('RawScaledScorer')
345 'raw_scaled_scorers'
346 >>> tableize('egg_and_ham')
347 'egg_and_hams'
348 >>> tableize('fancyCategory')
349 'fancy_categories'
350 """
351 return pluralize(underscore(word))
352
353
354def titleize(word: str) -> str:
355 """
356 Capitalize all the words and replace some characters in the string to
357 create a nicer looking title. :func:`titleize` is meant for creating pretty
358 output.
359
360 Examples::
361
362 >>> titleize("man from the boondocks")
363 'Man From The Boondocks'
364 >>> titleize("x-men: the last stand")
365 'X Men: The Last Stand'
366 >>> titleize("TheManWithoutAPast")
367 'The Man Without A Past'
368 >>> titleize("raiders_of_the_lost_ark")
369 'Raiders Of The Lost Ark'
370
371 """
372 return re.sub(
373 r"\b('?\w)",
374 lambda match: match.group(1).capitalize(),
375 humanize(underscore(word)).title()
376 )
377
378
379def transliterate(string: str) -> str:
380 """
381 Replace non-ASCII characters with an ASCII approximation. If no
382 approximation exists, the non-ASCII character is ignored. The string must
383 be ``unicode``.
384
385 Examples::
386
387 >>> transliterate('älämölö')
388 'alamolo'
389 >>> transliterate('Ærøskøbing')
390 'rskbing'
391
392 """
393 normalized = unicodedata.normalize('NFKD', string)
394 return normalized.encode('ascii', 'ignore').decode('ascii')
395
396
397def underscore(word: str) -> str:
398 """
399 Make an underscored, lowercase form from the expression in the string.
400
401 Example::
402
403 >>> underscore("DeviceType")
404 'device_type'
405
406 As a rule of thumb you can think of :func:`underscore` as the inverse of
407 :func:`camelize`, though there are cases where that does not hold::
408
409 >>> camelize(underscore("IOError"))
410 'IoError'
411
412 """
413 word = re.sub(r"([A-Z]+)([A-Z][a-z])", r'\1_\2', word)
414 word = re.sub(r"([a-z\d])([A-Z])", r'\1_\2', word)
415 word = word.replace("-", "_")
416 return word.lower()
417
418
419_irregular('person', 'people')
420_irregular('man', 'men')
421_irregular('human', 'humans')
422_irregular('child', 'children')
423_irregular('sex', 'sexes')
424_irregular('move', 'moves')
425_irregular('cow', 'kine')
426_irregular('zombie', 'zombies')