1# -*- coding: utf-8 -*-
2"""Python phone number parsing and formatting library
3
4If you use this library, and want to be notified about important changes,
5please sign up to the libphonenumber mailing list at
6https://groups.google.com/forum/#!aboutgroup/libphonenumber-discuss.
7
8NOTE: A lot of methods in this module require Region Code strings. These must
9be provided using CLDR two-letter region-code format. These should be in
10upper-case. The list of the codes can be found here:
11http://www.iso.org/iso/country_codes/iso_3166_code_lists/country_names_and_code_elements.htm
12"""
13# Based on original Java code:
14# java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
15# Copyright (C) 2009-2011 The Libphonenumber Authors
16#
17# Licensed under the Apache License, Version 2.0 (the "License");
18# you may not use this file except in compliance with the License.
19# You may obtain a copy of the License at
20#
21# http://www.apache.org/licenses/LICENSE-2.0
22#
23# Unless required by applicable law or agreed to in writing, software
24# distributed under the License is distributed on an "AS IS" BASIS,
25# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26# See the License for the specific language governing permissions and
27# limitations under the License.
28import sys
29import re
30
31from .re_util import fullmatch # Extra regexp function; see README
32from .util import UnicodeMixin, u, unicod, prnt, to_long
33from .util import U_EMPTY_STRING, U_SPACE, U_DASH, U_TILDE, U_ZERO, U_SEMICOLON
34from .unicode_util import digit as unicode_digit
35
36# Data class definitions
37from .phonenumber import PhoneNumber, CountryCodeSource
38from .phonemetadata import NumberFormat, PhoneMetadata, REGION_CODE_FOR_NON_GEO_ENTITY
39
40# Import auto-generated data structures
41try:
42 from .data import _COUNTRY_CODE_TO_REGION_CODE
43except ImportError: # pragma no cover
44 # Before the generated code exists, the data/ directory is empty.
45 # The generation process imports this module, creating a circular
46 # dependency. The hack below works around this.
47 import os
48 if (os.path.basename(sys.argv[0]) == "buildmetadatafromxml.py" or
49 os.path.basename(sys.argv[0]) == "buildprefixdata.py"):
50 prnt("Failed to import generated data (but OK as during autogeneration)", file=sys.stderr)
51 _COUNTRY_CODE_TO_REGION_CODE = {1: ("US",)}
52 else:
53 raise
54
55# Set the master map from country code to region code. The
56# extra level of indirection allows the unit test to replace
57# the map with test data.
58COUNTRY_CODE_TO_REGION_CODE = _COUNTRY_CODE_TO_REGION_CODE
59
60# Naming convention for phone number arguments and variables:
61# - string arguments are named 'number'
62# - PhoneNumber objects are named 'numobj'
63
64# Flags to use when compiling regular expressions for phone numbers.
65_REGEX_FLAGS = re.UNICODE | re.IGNORECASE
66# The minimum and maximum length of the national significant number.
67_MIN_LENGTH_FOR_NSN = 2
68# The ITU says the maximum length should be 15, but we have found longer
69# numbers in Germany.
70_MAX_LENGTH_FOR_NSN = 17
71# The maximum length of the country calling code.
72_MAX_LENGTH_COUNTRY_CODE = 3
73# We don't allow input strings for parsing to be longer than 250 chars. This
74# prevents malicious input from overflowing the regular-expression engine.
75_MAX_INPUT_STRING_LENGTH = 250
76# Region-code for the unknown region.
77UNKNOWN_REGION = u("ZZ")
78# The set of regions that share country calling code 1.
79_NANPA_COUNTRY_CODE = 1
80# Map of country calling codes that use a mobile token before the area
81# code. One example of when this is relevant is when determining the length of
82# the national destination code, which should be the length of the area code
83# plus the length of the mobile token.
84_MOBILE_TOKEN_MAPPINGS = {54: u('9')}
85# Set of country codes that have geographically assigned mobile numbers (see
86# GEO_MOBILE_COUNTRIES below) which are not based on *area codes*. For example,
87# in China mobile numbers start with a carrier indicator, and beyond that are
88# geographically assigned: this carrier indicator is not considered to be an
89# area code.
90_GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES = frozenset((
91 86,)) # China
92# Set of country codes that doesn't have national prefix, but it has area codes.
93_COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES = frozenset((
94 52,)) # Mexico
95
96# Set of country calling codes that have geographically assigned mobile
97# numbers. This may not be complete; we add calling codes case by case, as we
98# find geographical mobile numbers or hear from user reports. Note that
99# countries like the US, where we can't distinguish between fixed-line or
100# mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE
101# to be a possibly geographically-related type anyway (like FIXED_LINE).
102_GEO_MOBILE_COUNTRIES = _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES | set((
103 52, # Mexico
104 54, # Argentina
105 55, # Brazil
106 62)) # Indonesia: some prefixes only (fixed CMDA wireless)
107# The PLUS_SIGN signifies the international prefix.
108_PLUS_SIGN = u("+")
109_STAR_SIGN = u('*')
110_RFC3966_EXTN_PREFIX = u(";ext=")
111_RFC3966_PREFIX = u("tel:")
112_RFC3966_PHONE_CONTEXT = u(";phone-context=")
113_RFC3966_ISDN_SUBADDRESS = u(";isub=")
114
115# Simple ASCII digits map used to populate _ALPHA_PHONE_MAPPINGS and
116# _ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
117_ASCII_DIGITS_MAP = {u("0"): u("0"), u("1"): u("1"),
118 u("2"): u("2"), u("3"): u("3"),
119 u("4"): u("4"), u("5"): u("5"),
120 u("6"): u("6"), u("7"): u("7"),
121 u("8"): u("8"), u("9"): u("9")}
122
123# Only upper-case variants of alpha characters are stored.
124_ALPHA_MAPPINGS = {u("A"): u("2"),
125 u("B"): u("2"),
126 u("C"): u("2"),
127 u("D"): u("3"),
128 u("E"): u("3"),
129 u("F"): u("3"),
130 u("G"): u("4"),
131 u("H"): u("4"),
132 u("I"): u("4"),
133 u("J"): u("5"),
134 u("K"): u("5"),
135 u("L"): u("5"),
136 u("M"): u("6"),
137 u("N"): u("6"),
138 u("O"): u("6"),
139 u("P"): u("7"),
140 u("Q"): u("7"),
141 u("R"): u("7"),
142 u("S"): u("7"),
143 u("T"): u("8"),
144 u("U"): u("8"),
145 u("V"): u("8"),
146 u("W"): u("9"),
147 u("X"): u("9"),
148 u("Y"): u("9"),
149 u("Z"): u("9"), }
150# For performance reasons, amalgamate both into one map.
151_ALPHA_PHONE_MAPPINGS = dict(_ALPHA_MAPPINGS, **_ASCII_DIGITS_MAP)
152
153# A map that contains characters that are essential when dialling. That means
154# any of the characters in this map must not be removed from a number when
155# dialling, otherwise the call will not reach the intended destination.
156_DIALLABLE_CHAR_MAPPINGS = dict({_PLUS_SIGN: _PLUS_SIGN,
157 u('*'): u('*'),
158 u('#'): u('#')},
159 **_ASCII_DIGITS_MAP)
160
161# Separate map of all symbols that we wish to retain when formatting alpha
162# numbers. This includes digits, ASCII letters and number grouping symbols
163# such as "-" and " ".
164_ALL_PLUS_NUMBER_GROUPING_SYMBOLS = dict({u("-"): u("-"), # Add grouping symbols.
165 u("\uFF0D"): u("-"),
166 u("\u2010"): u("-"),
167 u("\u2011"): u("-"),
168 u("\u2012"): u("-"),
169 u("\u2013"): u("-"),
170 u("\u2014"): u("-"),
171 u("\u2015"): u("-"),
172 u("\u2212"): u("-"),
173 u("/"): u("/"),
174 u("\uFF0F"): u("/"),
175 u(" "): u(" "),
176 u("\u3000"): u(" "),
177 u("\u2060"): u(" "),
178 u("."): u("."),
179 u("\uFF0E"): u(".")},
180 # Put (lower letter -> upper letter) and
181 # (upper letter -> upper letter) mappings.
182 **dict([(_c.lower(), _c) for _c in _ALPHA_MAPPINGS.keys()] +
183 [(_c, _c) for _c in _ALPHA_MAPPINGS.keys()],
184 **_ASCII_DIGITS_MAP))
185
186# Pattern that makes it easy to distinguish whether a region has a single international dialing
187# prefix or not. If a region has a single international prefix (e.g. 011 in USA), it will be
188# represented as a string that contains a sequence of ASCII digits, and possibly a tilde, which
189# signals waiting for the tone. If there are multiple available international prefixes in a
190# region, they will be represented as a regex string that always contains one or more characters
191# that are not ASCII digits or a tilde.
192_SINGLE_INTERNATIONAL_PREFIX = re.compile(u("[\\d]+(?:[~\u2053\u223C\uFF5E][\\d]+)?"))
193
194# Regular expression of acceptable punctuation found in phone numbers. This
195# excludes punctuation found as a leading character only.
196
197# Regular expression of acceptable punctuation found in phone numbers, used to find numbers in
198# text and to decide what is a viable phone number. This excludes diallable characters.
199# This consists of dash characters, white space characters, full stops, slashes, square brackets,
200# parentheses and tildes. It also includes the letter 'x' as that is found as a placeholder for
201# carrier information in some phone numbers. Full-width variants are also present.
202_VALID_PUNCTUATION = (u("-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F ") +
203 u("\u00A0\u00AD\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E"))
204
205_DIGITS = unicod('\\d') # Java "\\p{Nd}", so need "(?u)" or re.UNICODE wherever this is used
206# We accept alpha characters in phone numbers, ASCII only, upper and lower
207# case.
208_VALID_ALPHA = (U_EMPTY_STRING.join(_ALPHA_MAPPINGS.keys()) +
209 U_EMPTY_STRING.join([_k.lower() for _k in _ALPHA_MAPPINGS.keys()]))
210_PLUS_CHARS = u("+\uFF0B")
211_PLUS_CHARS_PATTERN = re.compile(u("[") + _PLUS_CHARS + u("]+"))
212_SEPARATOR_PATTERN = re.compile(u("[") + _VALID_PUNCTUATION + u("]+"))
213_CAPTURING_DIGIT_PATTERN = re.compile(u("(") + _DIGITS + u(")"), re.UNICODE)
214
215# Regular expression of acceptable characters that may start a phone number
216# for the purposes of parsing. This allows us to strip away meaningless
217# prefixes to phone numbers that may be mistakenly given to us. This consists
218# of digits, the plus symbol and arabic-indic digits. This does not contain
219# alpha characters, although they may be used later in the number. It also
220# does not include other punctuation, as this will be stripped later during
221# parsing and is of no information value when parsing a number.
222_VALID_START_CHAR = u("[") + _PLUS_CHARS + _DIGITS + u("]")
223_VALID_START_CHAR_PATTERN = re.compile(_VALID_START_CHAR, re.UNICODE)
224
225# Regular expression of characters typically used to start a second phone
226# number for the purposes of parsing. This allows us to strip off parts of the
227# number that are actually the start of another number, such as for: (530)
228# 583-6985 x302/x2303 -> the second extension here makes this actually two
229# phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the
230# second extension so that the first number is parsed correctly.
231_SECOND_NUMBER_START = u("[\\\\/] *x")
232_SECOND_NUMBER_START_PATTERN = re.compile(_SECOND_NUMBER_START)
233
234# Regular expression of trailing characters that we want to remove. We remove
235# all characters that are not alpha or numerical characters. The hash
236# character is retained here, as it may signify the previous block was an
237# extension.
238#
239# The original Java regexp is:
240# [[\\P{N}&&\\P{L}]&&[^#]]+$
241# which splits out as:
242# [ ]+$ : >=1 of the following chars at end of string
243# [ ]&&[ ] : intersection of these two sets of chars
244# [ && ] : intersection of these two sets of chars
245# \\P{N} : characters without the "Number" Unicode property
246# \\P{L} : characters without the "Letter" Unicode property
247# [^#] : character other than hash
248# which nets down to: >=1 non-Number, non-Letter, non-# characters at string end
249# In Python Unicode regexp mode '(?u)', the class '[^#\w]' will match anything
250# that is not # and is not alphanumeric and is not underscore.
251_UNWANTED_END_CHARS = u(r"(?u)(?:_|[^#\w])+$")
252_UNWANTED_END_CHAR_PATTERN = re.compile(_UNWANTED_END_CHARS)
253
254# We use this pattern to check if the phone number has at least three letters
255# in it - if so, then we treat it as a number where some phone-number digits
256# are represented by letters.
257_VALID_ALPHA_PHONE_PATTERN = re.compile(u("(?:.*?[A-Za-z]){3}.*"))
258
259# Regular expression of viable phone numbers. This is location
260# independent. Checks we have at least three leading digits, and only valid
261# punctuation, alpha characters and digits in the phone number. Does not
262# include extension data. The symbol 'x' is allowed here as valid punctuation
263# since it is often used as a placeholder for carrier codes, for example in
264# Brazilian phone numbers. We also allow multiple "+" characters at the start.
265# Corresponds to the following:
266# [digits]{minLengthNsn}|
267# plus_sign*(([punctuation]|[star])*[digits]){3,}([punctuation]|[star]|[digits]|[alpha])*
268#
269# The first reg-ex is to allow short numbers (two digits long) to be parsed if
270# they are entered as "15" etc, but only if there is no punctuation in
271# them. The second expression restricts the number of digits to three or more,
272# but then allows them to be in international form, and to have
273# alpha-characters and punctuation.
274#
275# Note VALID_PUNCTUATION starts with a -, so must be the first in the range.
276_VALID_PHONE_NUMBER = (_DIGITS + (u("{%d}") % _MIN_LENGTH_FOR_NSN) + u("|") +
277 u("[") + _PLUS_CHARS + u("]*(?:[") + _VALID_PUNCTUATION + _STAR_SIGN + u("]*") + _DIGITS + u("){3,}[") +
278 _VALID_PUNCTUATION + _STAR_SIGN + _VALID_ALPHA + _DIGITS + u("]*"))
279
280# Default extension prefix to use when formatting. This will be put in front
281# of any extension component of the number, after the main national number is
282# formatted. For example, if you wish the default extension formatting to be
283# " extn: 3456", then you should specify " extn: " here as the default
284# extension prefix. This can be overridden by region-specific preferences.
285_DEFAULT_EXTN_PREFIX = u(" ext. ")
286
287
288# Helper method for constructing regular expressions for parsing. Creates an expression that
289# captures up to max_length digits.
290def _extn_digits(max_length):
291 return u("(") + _DIGITS + (u("{1,%d})") % max_length)
292
293
294# Helper initialiser method to create the regular-expression pattern to match extensions.
295# Note that there are currently six capturing groups for the extension itself. If this number is
296# changed, _maybe_strip_extension needs to be updated.
297def _create_extn_pattern(for_parsing):
298 # We cap the maximum length of an extension based on the ambiguity of the way the extension is
299 # prefixed. As per ITU, the officially allowed length for extensions is actually 40, but we
300 # don't support this since we haven't seen real examples and this introduces many false
301 # interpretations as the extension labels are not standardized.
302 ext_limit_after_explicit_label = 20
303 ext_limit_after_likely_label = 15
304 ext_limit_after_ambiguous_char = 9
305 ext_limit_when_not_sure = 6
306
307 possible_separators_between_number_and_ext_label = u("[ \u00A0\\t,]*")
308 # Optional full stop (.) or colon, followed by zero or more spaces/tabs/commas.
309 possible_chars_after_ext_label = u("[:\\.\uFF0E]?[ \u00A0\\t,-]*")
310 optional_extn_suffix = u("#?")
311
312 # Here the extension is called out in more explicit way, i.e mentioning it obvious patterns
313 # like "ext.". Canonical-equivalence doesn't seem to be an option with Android java, so we
314 # allow two options for representing the accented o - the character itself, and one in the
315 # unicode decomposed form with the combining acute accent.
316 explicit_ext_labels = u("(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|\u0434\u043E\u0431|anexo)")
317 # One-character symbols that can be used to indicate an extension, and less commonly used
318 # or more ambiguous extension labels.
319 ambiguous_ext_labels = u("(?:[x\uFF58#\uFF03~\uFF5E]|int|\uFF49\uFF4E\uFF54)")
320 # When extension is not separated clearly.
321 ambiguous_separator = u("[- ]+")
322
323 rfc_extn = _RFC3966_EXTN_PREFIX + _extn_digits(ext_limit_after_explicit_label)
324 explicit_extn = (possible_separators_between_number_and_ext_label + explicit_ext_labels +
325 possible_chars_after_ext_label + _extn_digits(ext_limit_after_explicit_label) +
326 optional_extn_suffix)
327 ambiguous_extn = (possible_separators_between_number_and_ext_label + ambiguous_ext_labels +
328 possible_chars_after_ext_label + _extn_digits(ext_limit_after_ambiguous_char) + optional_extn_suffix)
329 american_style_extn_with_suffix = (ambiguous_separator + _extn_digits(ext_limit_when_not_sure) + u("#"))
330
331 # The first regular expression covers RFC 3966 format, where the extension is added using
332 # ";ext=". The second more generic where extension is mentioned with explicit labels like
333 # "ext:". In both the above cases we allow more numbers in extension than any other extension
334 # labels. The third one captures when single character extension labels or less commonly used
335 # labels are used. In such cases we capture fewer extension digits in order to reduce the
336 # chance of falsely interpreting two numbers beside each other as a number + extension. The
337 # fourth one covers the special case of American numbers where the extension is written with a
338 # hash at the end, such as "- 503#".
339 extension_pattern = (rfc_extn + u("|") +
340 explicit_extn + u("|") +
341 ambiguous_extn + u("|") +
342 american_style_extn_with_suffix)
343 # Additional pattern that is supported when parsing extensions, not when matching.
344 if for_parsing:
345 # This is same as possible_separators_between_number_and_ext_label, but not matching comma as
346 # extension label may have it.
347 possible_separators_number_ext_label_no_comma = u("[ \u00A0\\t]*")
348 # ",," is commonly used for auto dialling the extension when connected. First comma is matched
349 # through possible_separators_between_number_and_ext_label, so we do not repeat it here. Semi-colon
350 # works in Iphone and Android also to pop up a button with the extension number following.
351 auto_dialling_and_ext_labels_found = u("(?:,{2}|;)")
352
353 auto_dialling_extn = (possible_separators_number_ext_label_no_comma +
354 auto_dialling_and_ext_labels_found + possible_chars_after_ext_label +
355 _extn_digits(ext_limit_after_likely_label) + optional_extn_suffix)
356 only_commas_extn = (possible_separators_number_ext_label_no_comma +
357 u("(?:,)+") + possible_chars_after_ext_label + _extn_digits(ext_limit_after_ambiguous_char) +
358 optional_extn_suffix)
359 # Here the first pattern is exclusively for extension autodialling formats which are used
360 # when dialling and in this case we accept longer extensions. However, the second pattern
361 # is more liberal on the number of commas that acts as extension labels, so we have a strict
362 # cap on the number of digits in such extensions.
363 return (extension_pattern + u("|") +
364 auto_dialling_extn + u("|") +
365 only_commas_extn)
366 return extension_pattern
367
368
369# Regexp of all possible ways to write extensions, for use when parsing. This
370# will be run as a case-insensitive regexp match. Wide character versions are
371# also provided after each ASCII version.
372_EXTN_PATTERNS_FOR_PARSING = _create_extn_pattern(True)
373_EXTN_PATTERNS_FOR_MATCHING = _create_extn_pattern(False)
374
375# Regular expression of valid global-number-digits for the phone-context
376# parameter, following the syntax defined in RFC3966.
377_RFC3966_VISUAL_SEPARATOR = "[\\-\\.\\(\\)]?"
378_RFC3966_PHONE_DIGIT = "(" + _DIGITS + "|" + _RFC3966_VISUAL_SEPARATOR + ")"
379_RFC3966_GLOBAL_NUMBER_DIGITS = "^\\" + _PLUS_SIGN + _RFC3966_PHONE_DIGIT + "*" + _DIGITS + _RFC3966_PHONE_DIGIT + "*$"
380_RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN = re.compile(_RFC3966_GLOBAL_NUMBER_DIGITS)
381
382# Regular expression of valid domainname for the phone-context parameter,
383# following the syntax defined in RFC3966.
384_ALPHANUM = _VALID_ALPHA + _DIGITS
385_RFC3966_DOMAINLABEL = "[" + _ALPHANUM + "]+((\\-)*[" + _ALPHANUM + "])*"
386_RFC3966_TOPLABEL = "[" + _VALID_ALPHA + "]+((\\-)*[" + _ALPHANUM + "])*"
387_RFC3966_DOMAINNAME = "^(" + _RFC3966_DOMAINLABEL + "\\.)*" + _RFC3966_TOPLABEL + "\\.?$"
388_RFC3966_DOMAINNAME_PATTERN = re.compile(_RFC3966_DOMAINNAME)
389
390# Regexp of all known extension prefixes used by different regions followed by
391# 1 or more valid digits, for use when parsing.
392_EXTN_PATTERN = re.compile(u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")$"), _REGEX_FLAGS)
393
394# We append optionally the extension pattern to the end here, as a valid phone
395# number may have an extension prefix appended, followed by 1 or more digits.
396_VALID_PHONE_NUMBER_PATTERN = re.compile(_VALID_PHONE_NUMBER + u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")?"), _REGEX_FLAGS)
397
398# We use a non-capturing group because Python's re.split() returns any capturing
399# groups interspersed with the other results (unlike Java's Pattern.split()).
400NON_DIGITS_PATTERN = re.compile(u("(?:\\D+)"))
401
402# The FIRST_GROUP_PATTERN was originally set to \1 but there are some
403# countries for which the first group is not used in the national pattern
404# (e.g. Argentina) so the \1 group does not match correctly. Therefore, we
405# use \d, so that the first group actually used in the pattern will be
406# matched.
407_FIRST_GROUP_PATTERN = re.compile(u(r"(\\\d)"))
408# Constants used in the formatting rules to represent the national prefix, first group and
409# carrier code respectively.
410_NP_STRING = "$NP"
411_FG_STRING = "$FG"
412_CC_STRING = "$CC"
413
414# A pattern that is used to determine if the national prefix formatting rule
415# has the first group only, i.e., does not start with the national
416# prefix. Note that the pattern explicitly allows for unbalanced parentheses.
417_FIRST_GROUP_ONLY_PREFIX_PATTERN = re.compile("\\(?\\\\1\\)?")
418
419
420class PhoneNumberFormat(object):
421 """
422 Phone number format.
423
424 INTERNATIONAL and NATIONAL formats are consistent with the definition in
425 ITU-T Recommendation E123. However we follow local conventions such as using
426 '-' instead of whitespace as separators. For example, the number of the
427 Google Switzerland office will be written as "+41 44 668 1800" in
428 INTERNATIONAL format, and as "044 668 1800" in NATIONAL format. E164 format
429 is as per INTERNATIONAL format but with no formatting applied,
430 e.g. "+41446681800". RFC3966 is as per INTERNATIONAL format, but with all
431 spaces and other separating symbols replaced with a hyphen, and with any
432 phone number extension appended with ";ext=". It also will have a prefix of
433 "tel:" added, e.g. "tel:+41-44-668-1800".
434
435 Note: If you are considering storing the number in a neutral format, you
436 are highly advised to use the PhoneNumber class.
437 """
438 E164 = 0
439 INTERNATIONAL = 1
440 NATIONAL = 2
441 RFC3966 = 3
442
443 @classmethod
444 def to_string(cls, val):
445 """Return a string representation of a PhoneNumberFormat value"""
446 if val == PhoneNumberFormat.E164:
447 return u("E164")
448 elif val == PhoneNumberFormat.INTERNATIONAL:
449 return u("INTERNATIONAL")
450 elif val == PhoneNumberFormat.NATIONAL:
451 return u("NATIONAL")
452 elif val == PhoneNumberFormat.RFC3966:
453 return u("RFC3966")
454 else:
455 return u("INVALID (%d)" % val)
456
457
458class PhoneNumberType(object):
459 """Type of phone numbers."""
460 FIXED_LINE = 0
461 MOBILE = 1
462 # In some regions (e.g. the USA), it is impossible to distinguish between
463 # fixed-line and mobile numbers by looking at the phone number itself.
464 FIXED_LINE_OR_MOBILE = 2
465 # Freephone lines
466 TOLL_FREE = 3
467 PREMIUM_RATE = 4
468 # The cost of this call is shared between the caller and the recipient,
469 # and is hence typically less than PREMIUM_RATE calls. See
470 # http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
471 SHARED_COST = 5
472 # Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
473 VOIP = 6
474 # A personal number is associated with a particular person, and may be
475 # routed to either a MOBILE or FIXED_LINE number. Some more information
476 # can be found here: http://en.wikipedia.org/wiki/Personal_Numbers
477 PERSONAL_NUMBER = 7
478 PAGER = 8
479 # Used for "Universal Access Numbers" or "Company Numbers". They may be
480 # further routed to specific offices, but allow one number to be used for
481 # a company.
482 UAN = 9
483 # Used for "Voice Mail Access Numbers".
484 VOICEMAIL = 10
485 # A phone number is of type UNKNOWN when it does not fit any of the known
486 # patterns for a specific region.
487 UNKNOWN = 99
488
489 @classmethod
490 def values(cls):
491 return (PhoneNumberType.FIXED_LINE,
492 PhoneNumberType.MOBILE,
493 PhoneNumberType.FIXED_LINE_OR_MOBILE,
494 PhoneNumberType.TOLL_FREE,
495 PhoneNumberType.PREMIUM_RATE,
496 PhoneNumberType.SHARED_COST,
497 PhoneNumberType.VOIP,
498 PhoneNumberType.PERSONAL_NUMBER,
499 PhoneNumberType.PAGER,
500 PhoneNumberType.UAN,
501 PhoneNumberType.VOICEMAIL,
502 PhoneNumberType.UNKNOWN)
503
504 @classmethod
505 def to_string(cls, val):
506 """Return a string representation of a PhoneNumberType value"""
507 if val == PhoneNumberType.FIXED_LINE:
508 return u("FIXED_LINE")
509 elif val == PhoneNumberType.MOBILE:
510 return u("MOBILE")
511 elif val == PhoneNumberType.FIXED_LINE_OR_MOBILE:
512 return u("FIXED_LINE_OR_MOBILE")
513 elif val == PhoneNumberType.TOLL_FREE:
514 return u("TOLL_FREE")
515 elif val == PhoneNumberType.PREMIUM_RATE:
516 return u("PREMIUM_RATE")
517 elif val == PhoneNumberType.SHARED_COST:
518 return u("SHARED_COST")
519 elif val == PhoneNumberType.VOIP:
520 return u("VOIP")
521 elif val == PhoneNumberType.PERSONAL_NUMBER:
522 return u("PERSONAL_NUMBER")
523 elif val == PhoneNumberType.PAGER:
524 return u("PAGER")
525 elif val == PhoneNumberType.UAN:
526 return u("UAN")
527 elif val == PhoneNumberType.VOICEMAIL:
528 return u("VOICEMAIL")
529 elif val == PhoneNumberType.UNKNOWN:
530 return u("UNKNOWN")
531 else:
532 return u("INVALID (%d)" % val)
533
534
535class MatchType(object):
536 """Types of phone number matches."""
537 # Not a telephone number
538 NOT_A_NUMBER = 0
539 # None of the match types below apply
540 NO_MATCH = 1
541 # Returns SHORT_NSN_MATCH if either or both has no region specified, or
542 # the region specified is the same, and one NSN could be a shorter version
543 # of the other number. This includes the case where one has an extension
544 # specified, and the other does not.
545 SHORT_NSN_MATCH = 2
546 # Either or both has no region specified, and the NSNs and extensions are
547 # the same.
548 NSN_MATCH = 3
549 # The country_code, NSN, presence of a leading zero for Italian numbers
550 # and any extension present are the same.
551 EXACT_MATCH = 4
552
553 @classmethod
554 def to_string(cls, val):
555 """Return a string representation of a MatchType value"""
556 if val == MatchType.NOT_A_NUMBER:
557 return u("NOT_A_NUMBER")
558 elif val == MatchType.NO_MATCH:
559 return u("NO_MATCH")
560 elif val == MatchType.SHORT_NSN_MATCH:
561 return u("SHORT_NSN_MATCH")
562 elif val == MatchType.NSN_MATCH:
563 return u("NSN_MATCH")
564 elif val == MatchType.EXACT_MATCH:
565 return u("EXACT_MATCH")
566 else:
567 return u("INVALID (%d)" % val)
568
569
570class ValidationResult(object):
571 """Possible outcomes when testing if a PhoneNumber is a possible number."""
572 # The number length matches that of valid numbers for this region.
573 IS_POSSIBLE = 0
574 # The number length matches that of local numbers for this region only
575 # (i.e. numbers that may be able to be dialled within an area, but do not
576 # have all the information to be dialled from anywhere inside or outside
577 # the country).
578 IS_POSSIBLE_LOCAL_ONLY = 4
579 # The number has an invalid country calling code.
580 INVALID_COUNTRY_CODE = 1
581 # The number is shorter than all valid numbers for this region.
582 TOO_SHORT = 2
583 # The number is longer than the shortest valid numbers for this region,
584 # shorter than the longest valid numbers for this region, and does not
585 # itself have a number length that matches valid numbers for this region.
586 # This can also be returned in the case where
587 # is_possible_number_for_type_with_reason was called, and there are no
588 # numbers of this type at all for this region.
589 INVALID_LENGTH = 5
590 # The number is longer than all valid numbers for this region.
591 TOO_LONG = 3
592
593 @classmethod
594 def to_string(cls, val):
595 """Return a string representation of a ValidationResult value"""
596 if val == ValidationResult.IS_POSSIBLE:
597 return u("IS_POSSIBLE")
598 elif val == ValidationResult.IS_POSSIBLE_LOCAL_ONLY:
599 return u("IS_POSSIBLE_LOCAL_ONLY")
600 elif val == ValidationResult.INVALID_COUNTRY_CODE:
601 return u("INVALID_COUNTRY_CODE")
602 elif val == ValidationResult.TOO_SHORT:
603 return u("TOO_SHORT")
604 elif val == ValidationResult.INVALID_LENGTH:
605 return u("INVALID_LENGTH")
606 elif val == ValidationResult.TOO_LONG:
607 return u("TOO_LONG")
608 else:
609 return u("INVALID (%d)" % val)
610
611
612# Derived data structures
613SUPPORTED_REGIONS = set()
614COUNTRY_CODES_FOR_NON_GEO_REGIONS = set()
615_NANPA_REGIONS = set()
616
617
618def _regenerate_derived_data():
619 global SUPPORTED_REGIONS, COUNTRY_CODES_FOR_NON_GEO_REGIONS, _NANPA_REGIONS
620 SUPPORTED_REGIONS.clear()
621 COUNTRY_CODES_FOR_NON_GEO_REGIONS.clear()
622 for cc, region_codes in COUNTRY_CODE_TO_REGION_CODE.items():
623 if (len(region_codes) == 1 and region_codes[0] == REGION_CODE_FOR_NON_GEO_ENTITY):
624 COUNTRY_CODES_FOR_NON_GEO_REGIONS.add(cc)
625 else:
626 SUPPORTED_REGIONS.update(region_codes)
627 if REGION_CODE_FOR_NON_GEO_ENTITY in SUPPORTED_REGIONS: # pragma no cover
628 SUPPORTED_REGIONS.remove(REGION_CODE_FOR_NON_GEO_ENTITY)
629 _NANPA_REGIONS.clear()
630 _NANPA_REGIONS.update(COUNTRY_CODE_TO_REGION_CODE[_NANPA_COUNTRY_CODE])
631
632
633_regenerate_derived_data()
634
635
636def _copy_number_format(other):
637 """Return a mutable copy of the given NumberFormat object"""
638 copy = NumberFormat(pattern=other.pattern,
639 format=other.format,
640 leading_digits_pattern=list(other.leading_digits_pattern),
641 national_prefix_formatting_rule=other.national_prefix_formatting_rule,
642 national_prefix_optional_when_formatting=other.national_prefix_optional_when_formatting,
643 domestic_carrier_code_formatting_rule=other.domestic_carrier_code_formatting_rule)
644 copy._mutable = True
645 return copy
646
647
648def _extract_possible_number(number):
649 """Attempt to extract a possible number from the string passed in.
650
651 This currently strips all leading characters that cannot be used to
652 start a phone number. Characters that can be used to start a phone number
653 are defined in the VALID_START_CHAR_PATTERN. If none of these characters
654 are found in the number passed in, an empty string is returned. This
655 function also attempts to strip off any alternative extensions or endings
656 if two or more are present, such as in the case of: (530) 583-6985
657 x302/x2303. The second extension here makes this actually two phone
658 numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the
659 second extension so that the first number is parsed correctly.
660
661 Arguments:
662 number -- The string that might contain a phone number.
663
664 Returns the number, stripped of any non-phone-number prefix (such
665 as "Tel:") or an empty string if no character used to start phone
666 numbers (such as + or any digit) is found in the number
667 """
668 match = _VALID_START_CHAR_PATTERN.search(number)
669 if match:
670 number = number[match.start():]
671 # Remove trailing non-alpha non-numerical characters.
672 trailing_chars_match = _UNWANTED_END_CHAR_PATTERN.search(number)
673 if trailing_chars_match:
674 number = number[:trailing_chars_match.start()]
675 # Check for extra numbers at the end.
676 second_number_match = _SECOND_NUMBER_START_PATTERN.search(number)
677 if second_number_match:
678 number = number[:second_number_match.start()]
679 return number
680 else:
681 return U_EMPTY_STRING
682
683
684def _is_viable_phone_number(number):
685 """Checks to see if a string could possibly be a phone number.
686
687 At the moment, checks to see that the string begins with at least 2
688 digits, ignoring any punctuation commonly found in phone numbers. This
689 method does not require the number to be normalized in advance - but does
690 assume that leading non-number symbols have been removed, such as by the
691 method _extract_possible_number.
692
693 Arguments:
694 number -- string to be checked for viability as a phone number
695
696 Returns True if the number could be a phone number of some sort, otherwise
697 False
698 """
699 if len(number) < _MIN_LENGTH_FOR_NSN:
700 return False
701 match = fullmatch(_VALID_PHONE_NUMBER_PATTERN, number)
702 return bool(match)
703
704
705def _normalize(number):
706 """Normalizes a string of characters representing a phone number.
707
708 This performs the following conversions:
709 - Punctuation is stripped.
710 - For ALPHA/VANITY numbers:
711 - Letters are converted to their numeric representation on a telephone
712 keypad. The keypad used here is the one defined in ITU
713 Recommendation E.161. This is only done if there are 3 or more
714 letters in the number, to lessen the risk that such letters are
715 typos.
716 - For other numbers:
717 - Wide-ascii digits are converted to normal ASCII (European) digits.
718 - Arabic-Indic numerals are converted to European numerals.
719 - Spurious alpha characters are stripped.
720
721 Arguments:
722 number -- string representing a phone number
723
724 Returns the normalized string version of the phone number.
725 """
726 m = fullmatch(_VALID_ALPHA_PHONE_PATTERN, number)
727 if m:
728 return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, True)
729 else:
730 return normalize_digits_only(number)
731
732
733def normalize_digits_only(number, keep_non_digits=False):
734 """Normalizes a string of characters representing a phone number.
735
736 This converts wide-ascii and arabic-indic numerals to European numerals,
737 and strips punctuation and alpha characters (optional).
738
739 Arguments:
740 number -- a string representing a phone number
741 keep_non_digits -- whether to keep non-digits
742
743 Returns the normalized string version of the phone number.
744 """
745 number = unicod(number)
746 number_length = len(number)
747 normalized_digits = U_EMPTY_STRING
748 for ii in range(number_length):
749 d = unicode_digit(number[ii], -1)
750 if d != -1:
751 normalized_digits += unicod(d)
752 elif keep_non_digits:
753 normalized_digits += number[ii]
754 return normalized_digits
755
756
757def normalize_diallable_chars_only(number):
758 """Normalizes a string of characters representing a phone number.
759
760 This strips all characters which are not diallable on a mobile phone
761 keypad (including all non-ASCII digits).
762
763 Arguments:
764 number -- a string of characters representing a phone number
765
766 Returns the normalized string version of the phone number.
767 """
768 return _normalize_helper(number, _DIALLABLE_CHAR_MAPPINGS, True)
769
770
771def convert_alpha_characters_in_number(number):
772 """Convert alpha chars in a number to their respective digits on a keypad,
773 but retains existing formatting."""
774 return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, False)
775
776
777def length_of_geographical_area_code(numobj):
778 """Return length of the geographical area code for a number.
779
780 Gets the length of the geographical area code from the PhoneNumber object
781 passed in, so that clients could use it to split a national significant
782 number into geographical area code and subscriber number. It works in such
783 a way that the resultant subscriber number should be diallable, at least
784 on some devices. An example of how this could be used:
785
786 >>> import phonenumbers
787 >>> numobj = phonenumbers.parse("16502530000", "US")
788 >>> nsn = phonenumbers.national_significant_number(numobj)
789 >>> ac_len = phonenumbers.length_of_geographical_area_code(numobj)
790 >>> if ac_len > 0:
791 ... area_code = nsn[:ac_len]
792 ... subscriber_number = nsn[ac_len:]
793 ... else:
794 ... area_code = ""
795 ... subscriber_number = nsn
796
797 N.B.: area code is a very ambiguous concept, so the I18N team generally
798 recommends against using it for most purposes, but recommends using the
799 more general national_number instead. Read the following carefully before
800 deciding to use this method:
801
802 - geographical area codes change over time, and this method honors those
803 changes; therefore, it doesn't guarantee the stability of the result it
804 produces.
805 - subscriber numbers may not be diallable from all devices (notably
806 mobile devices, which typically require the full national_number to be
807 dialled in most countries).
808 - most non-geographical numbers have no area codes, including numbers
809 from non-geographical entities.
810 - some geographical numbers have no area codes.
811
812 Arguments:
813 numobj -- The PhoneNumber object to find the length of the area code form.
814
815 Returns the length of area code of the PhoneNumber object passed in.
816 """
817 metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None)
818 if metadata is None:
819 return 0
820
821 ntype = number_type(numobj)
822 country_code = numobj.country_code
823 # If a country doesn't use a national prefix, and this number doesn't have
824 # an Italian leading zero, we assume it is a closed dialling plan with no
825 # area codes.
826 # Note:this is our general assumption, but there are exceptions which are tracked in
827 # _COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES.
828 if (metadata.national_prefix is None and not numobj.italian_leading_zero and
829 country_code not in _COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES):
830 return 0
831
832 if (ntype == PhoneNumberType.MOBILE and
833 (country_code in _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES)):
834 # Note this is a rough heuristic; it doesn't cover Indonesia well, for
835 # example, where area codes are present for some mobile phones but not
836 # for others. We have no better way of representing this in the
837 # metadata at this point.
838 return 0
839
840 if not is_number_type_geographical(ntype, country_code):
841 return 0
842
843 return length_of_national_destination_code(numobj)
844
845
846def length_of_national_destination_code(numobj):
847 """Return length of the national destination code code for a number.
848
849 Gets the length of the national destination code (NDC) from the
850 PhoneNumber object passed in, so that clients could use it to split a
851 national significant number into NDC and subscriber number. The NDC of a
852 phone number is normally the first group of digit(s) right after the
853 country calling code when the number is formatted in the international
854 format, if there is a subscriber number part that follows.
855
856 N.B.: similar to an area code, not all numbers have an NDC!
857
858 An example of how this could be used:
859
860 >>> import phonenumbers
861 >>> numobj = phonenumbers.parse("18002530000", "US")
862 >>> nsn = phonenumbers.national_significant_number(numobj)
863 >>> ndc_len = phonenumbers.length_of_national_destination_code(numobj)
864 >>> if ndc_len > 0:
865 ... national_destination_code = nsn[:ndc_len]
866 ... subscriber_number = nsn[ndc_len:]
867 ... else:
868 ... national_destination_code = ""
869 ... subscriber_number = nsn
870
871 Refer to the unittests to see the difference between this function and
872 length_of_geographical_area_code.
873
874 Arguments:
875 numobj -- The PhoneNumber object to find the length of the NDC from.
876
877 Returns the length of NDC of the PhoneNumber object passed in, which
878 could be zero.
879 """
880 if numobj.extension is not None:
881 # We don't want to alter the object given to us, but we don't want to
882 # include the extension when we format it, so we copy it and clear the
883 # extension here.
884 copied_numobj = PhoneNumber()
885 copied_numobj.merge_from(numobj)
886 copied_numobj.extension = None
887 else:
888 copied_numobj = numobj
889
890 nsn = format_number(copied_numobj, PhoneNumberFormat.INTERNATIONAL)
891 number_groups = re.split(NON_DIGITS_PATTERN, nsn)
892
893 # The pattern will start with "+COUNTRY_CODE " so the first group will
894 # always be the empty string (before the + symbol) and the second group
895 # will be the country calling code. The third group will be area code if
896 # it is not the last group.
897 if len(number_groups) <= 3:
898 return 0
899
900 if number_type(numobj) == PhoneNumberType.MOBILE:
901 # For example Argentinian mobile numbers, when formatted in the
902 # international format, are in the form of +54 9 NDC XXXX... As a
903 # result, we take the length of the third group (NDC) and add the
904 # length of the second group (which is the mobile token), which also
905 # forms part of the national significant number. This assumes that
906 # the mobile token is always formatted separately from the rest of the
907 # phone number.
908 mobile_token = country_mobile_token(numobj.country_code)
909 if mobile_token != U_EMPTY_STRING:
910 return len(number_groups[2]) + len(number_groups[3])
911 return len(number_groups[2])
912
913
914def country_mobile_token(country_code):
915 """Returns the mobile token for the provided country calling code if it has one, otherwise
916 returns an empty string. A mobile token is a number inserted before the area code when dialing
917 a mobile number from that country from abroad.
918
919 Arguments:
920 country_code -- the country calling code for which we want the mobile token
921 Returns the mobile token, as a string, for the given country calling code.
922 """
923 return _MOBILE_TOKEN_MAPPINGS.get(country_code, U_EMPTY_STRING)
924
925
926def _normalize_helper(number, replacements, remove_non_matches):
927 """Normalizes a string of characters representing a phone number by
928 replacing all characters found in the accompanying map with the values
929 therein, and stripping all other characters if remove_non_matches is true.
930
931 Arguments:
932 number -- a string representing a phone number
933 replacements -- a mapping of characters to what they should be replaced
934 by in the normalized version of the phone number
935 remove_non_matches -- indicates whether characters that are not able to be
936 replaced should be stripped from the number. If this is False,
937 they will be left unchanged in the number.
938
939 Returns the normalized string version of the phone number.
940 """
941 normalized_number = []
942 for char in number:
943 new_digit = replacements.get(char.upper(), None)
944 if new_digit is not None:
945 normalized_number.append(new_digit)
946 elif not remove_non_matches:
947 normalized_number.append(char)
948 # If neither of the above are true, we remove this character
949 return U_EMPTY_STRING.join(normalized_number)
950
951
952def supported_calling_codes():
953 """Returns all country calling codes the library has metadata for, covering
954 both non-geographical entities (global network calling codes) and those
955 used for geographical entities. This could be used to populate a drop-down
956 box of country calling codes for a phone-number widget, for instance.
957
958 Returns an unordered set of the country calling codes for every geographica
959 and non-geographical entity the library supports.
960 """
961 return set(COUNTRY_CODE_TO_REGION_CODE.keys())
962
963
964def _desc_has_possible_number_data(desc):
965
966 """Returns true if there is any possible number data set for a particular PhoneNumberDesc."""
967 # If this is empty, it means numbers of this type inherit from the "general desc" -> the value
968 # "-1" means that no numbers exist for this type.
969 if desc is None:
970 return False
971 return len(desc.possible_length) != 1 or desc.possible_length[0] != -1
972
973
974# Note: desc_has_data must account for any of MetadataFilter's excludableChildFields potentially
975# being absent from the metadata. It must check them all. For any changes in descHasData, ensure
976# that all the excludableChildFields are still being checked. If your change is safe simply
977# mention why during a review without needing to change MetadataFilter.
978def _desc_has_data(desc):
979 """Returns true if there is any data set for a particular PhoneNumberDesc."""
980 if desc is None:
981 return False
982 # Checking most properties since we don't know what's present, since a custom build may have
983 # stripped just one of them (e.g. liteBuild strips exampleNumber). We don't bother checking the
984 # possibleLengthsLocalOnly, since if this is the only thing that's present we don't really
985 # support the type at all: no type-specific methods will work with only this data.
986 return ((desc.example_number is not None) or
987 _desc_has_possible_number_data(desc) or
988 (desc.national_number_pattern is not None))
989
990
991def _supported_types_for_metadata(metadata):
992 """Returns the types we have metadata for based on the PhoneMetadata object passed in, which must be non-None."""
993 numtypes = set()
994 for numtype in PhoneNumberType.values():
995 if numtype in (PhoneNumberType.FIXED_LINE_OR_MOBILE, PhoneNumberType.UNKNOWN):
996 # Never return FIXED_LINE_OR_MOBILE (it is a convenience type, and represents that a
997 # particular number type can't be determined) or UNKNOWN (the non-type).
998 continue
999 if _desc_has_data(_number_desc_by_type(metadata, numtype)):
1000 numtypes.add(numtype)
1001 return numtypes
1002
1003
1004def supported_types_for_region(region_code):
1005 """Returns the types for a given region which the library has metadata for.
1006
1007 Will not include FIXED_LINE_OR_MOBILE (if numbers in this region could
1008 be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would
1009 be present) and UNKNOWN.
1010
1011 No types will be returned for invalid or unknown region codes.
1012 """
1013 if not _is_valid_region_code(region_code):
1014 return set()
1015 metadata = PhoneMetadata.metadata_for_region(region_code.upper())
1016 assert metadata is not None # due to _is_valid_region_code() check
1017 return _supported_types_for_metadata(metadata)
1018
1019
1020def supported_types_for_non_geo_entity(country_code):
1021 """Returns the types for a country-code belonging to a non-geographical entity
1022 which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE
1023 (if numbers for this non-geographical entity could be classified as
1024 FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would be present) and
1025 UNKNOWN.
1026
1027 No types will be returned for country calling codes that do not map to a
1028 known non-geographical entity.
1029 """
1030 metadata = PhoneMetadata.metadata_for_nongeo_region(country_code, None)
1031 if metadata is None:
1032 return set()
1033 return _supported_types_for_metadata(metadata)
1034
1035
1036def _formatting_rule_has_first_group_only(national_prefix_formatting_rule):
1037 """Helper function to check if the national prefix formatting rule has the
1038 first group only, i.e., does not start with the national prefix.
1039 """
1040 if national_prefix_formatting_rule is None:
1041 return True
1042 return bool(fullmatch(_FIRST_GROUP_ONLY_PREFIX_PATTERN,
1043 national_prefix_formatting_rule))
1044
1045
1046def is_number_geographical(numobj):
1047 """Tests whether a phone number has a geographical association.
1048
1049 It checks if the number is associated with a certain region in the country
1050 to which it belongs. Note that this doesn't verify if the number is
1051 actually in use.
1052 country_code -- the country calling code for which we want the mobile token
1053 """
1054 return is_number_type_geographical(number_type(numobj), numobj.country_code)
1055
1056
1057def is_number_type_geographical(num_type, country_code):
1058 """Tests whether a phone number has a geographical association,
1059 as represented by its type and the country it belongs to.
1060
1061 This version of isNumberGeographical exists since calculating the phone
1062 number type is expensive; if we have already done this, we don't want to
1063 do it again.
1064 """
1065 return (num_type == PhoneNumberType.FIXED_LINE or
1066 num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE or
1067 ((country_code in _GEO_MOBILE_COUNTRIES) and
1068 num_type == PhoneNumberType.MOBILE))
1069
1070
1071def _is_valid_region_code(region_code):
1072 """Helper function to check region code is not unknown or None"""
1073 if region_code is None:
1074 return False
1075 return (region_code in SUPPORTED_REGIONS)
1076
1077
1078def _has_valid_country_calling_code(country_calling_code):
1079 return (country_calling_code in COUNTRY_CODE_TO_REGION_CODE)
1080
1081
1082def format_number(numobj, num_format):
1083 """Formats a phone number in the specified format using default rules.
1084
1085 Note that this does not promise to produce a phone number that the user
1086 can dial from where they are - although we do format in either 'national'
1087 or 'international' format depending on what the client asks for, we do not
1088 currently support a more abbreviated format, such as for users in the same
1089 "area" who could potentially dial the number without area code. Note that
1090 if the phone number has a country calling code of 0 or an otherwise
1091 invalid country calling code, we cannot work out which formatting rules to
1092 apply so we return the national significant number with no formatting
1093 applied.
1094
1095 Arguments:
1096 numobj -- The phone number to be formatted.
1097 num_format -- The format the phone number should be formatted into
1098
1099 Returns the formatted phone number.
1100 """
1101 if numobj.national_number == 0:
1102 # Unparseable numbers that kept their raw input just use that, unless default country was
1103 # specified and the format is E164. In that case, we prepend the raw input with the country
1104 # code
1105 raw_input = numobj.raw_input or ""
1106 if (len(raw_input) > 0 and
1107 numobj.country_code is not None and
1108 numobj.country_code_source == CountryCodeSource.FROM_DEFAULT_COUNTRY and
1109 num_format == PhoneNumberFormat.E164):
1110 return _prefix_number_with_country_calling_code(numobj.country_code, num_format, raw_input)
1111 elif len(raw_input) > 0 or numobj.country_code is None:
1112 return numobj.raw_input
1113 country_calling_code = numobj.country_code
1114 nsn = national_significant_number(numobj)
1115 if num_format == PhoneNumberFormat.E164:
1116 # Early exit for E164 case (even if the country calling code is
1117 # invalid) since no formatting of the national number needs to be
1118 # applied. Extensions are not formatted.
1119 return _prefix_number_with_country_calling_code(country_calling_code, num_format, nsn)
1120 if not _has_valid_country_calling_code(country_calling_code):
1121 return nsn
1122 # Note region_code_for_country_code() is used because formatting
1123 # information for regions which share a country calling code is contained
1124 # by only one region for performance reasons. For example, for NANPA
1125 # regions it will be contained in the metadata for US.
1126 region_code = region_code_for_country_code(country_calling_code)
1127 # Metadata cannot be None because the country calling code is valid (which
1128 # means that the region code cannot be ZZ and must be one of our supported
1129 # region codes).
1130 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_calling_code, region_code.upper())
1131 formatted_number = _format_nsn(nsn, metadata, num_format)
1132 formatted_number = _maybe_append_formatted_extension(numobj,
1133 metadata,
1134 num_format,
1135 formatted_number)
1136 return _prefix_number_with_country_calling_code(country_calling_code,
1137 num_format,
1138 formatted_number)
1139
1140
1141def format_by_pattern(numobj, number_format, user_defined_formats):
1142 """Formats a phone number using client-defined formatting rules.
1143
1144 Note that if the phone number has a country calling code of zero or an
1145 otherwise invalid country calling code, we cannot work out things like
1146 whether there should be a national prefix applied, or how to format
1147 extensions, so we return the national significant number with no
1148 formatting applied.
1149
1150 Arguments:
1151 numobj -- The phone number to be formatted
1152 number_format -- The format the phone number should be formatted into,
1153 as a PhoneNumberFormat value.
1154 user_defined_formats -- formatting rules specified by clients, as a list
1155 of NumberFormat objects.
1156
1157 Returns the formatted phone number.
1158 """
1159 country_code = numobj.country_code
1160 nsn = national_significant_number(numobj)
1161 if not _has_valid_country_calling_code(country_code):
1162 return nsn
1163 # Note region_code_for_country_code() is used because formatting
1164 # information for regions which share a country calling code is contained
1165 # by only one region for performance reasons. For example, for NANPA
1166 # regions it will be contained in the metadata for US.
1167 region_code = region_code_for_country_code(country_code)
1168 # Metadata cannot be None because the country calling code is valid.
1169 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1170
1171 formatted_number = U_EMPTY_STRING
1172 formatting_pattern = _choose_formatting_pattern_for_number(user_defined_formats, nsn)
1173 if formatting_pattern is None:
1174 # If no pattern above is matched, we format the number as a whole.
1175 formatted_number = nsn
1176 else:
1177 num_format_copy = _copy_number_format(formatting_pattern)
1178 # Before we do a replacement of the national prefix pattern $NP with
1179 # the national prefix, we need to copy the rule so that subsequent
1180 # replacements for different numbers have the appropriate national
1181 # prefix.
1182 np_formatting_rule = formatting_pattern.national_prefix_formatting_rule
1183 if np_formatting_rule:
1184 national_prefix = metadata.national_prefix
1185 if national_prefix:
1186 # Replace $NP with national prefix and $FG with the first
1187 # group (\1) matcher.
1188 np_formatting_rule = np_formatting_rule.replace(_NP_STRING, national_prefix)
1189 np_formatting_rule = np_formatting_rule.replace(_FG_STRING, unicod("\\1"))
1190 num_format_copy.national_prefix_formatting_rule = np_formatting_rule
1191 else:
1192 # We don't want to have a rule for how to format the national
1193 # prefix if there isn't one.
1194 num_format_copy.national_prefix_formatting_rule = None
1195 formatted_number = _format_nsn_using_pattern(nsn, num_format_copy, number_format)
1196 formatted_number = _maybe_append_formatted_extension(numobj,
1197 metadata,
1198 number_format,
1199 formatted_number)
1200 formatted_number = _prefix_number_with_country_calling_code(country_code,
1201 number_format,
1202 formatted_number)
1203 return formatted_number
1204
1205
1206def format_national_number_with_carrier_code(numobj, carrier_code):
1207 """Format a number in national format for dialing using the specified carrier.
1208
1209 The carrier-code will always be used regardless of whether the phone
1210 number already has a preferred domestic carrier code stored. If
1211 carrier_code contains an empty string, returns the number in national
1212 format without any carrier code.
1213
1214 Arguments:
1215 numobj -- The phone number to be formatted
1216 carrier_code -- The carrier selection code to be used
1217
1218 Returns the formatted phone number in national format for dialing using
1219 the carrier as specified in the carrier_code.
1220 """
1221 country_code = numobj.country_code
1222 nsn = national_significant_number(numobj)
1223 if not _has_valid_country_calling_code(country_code):
1224 return nsn
1225 # Note region_code_for_country_code() is used because formatting
1226 # information for regions which share a country calling code is contained
1227 # by only one region for performance reasons. For example, for NANPA
1228 # regions it will be contained in the metadata for US.
1229 region_code = region_code_for_country_code(country_code)
1230 # Metadata cannot be None because the country calling code is valid
1231 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1232 formatted_number = _format_nsn(nsn,
1233 metadata,
1234 PhoneNumberFormat.NATIONAL,
1235 carrier_code)
1236 formatted_number = _maybe_append_formatted_extension(numobj,
1237 metadata,
1238 PhoneNumberFormat.NATIONAL,
1239 formatted_number)
1240 formatted_number = _prefix_number_with_country_calling_code(country_code,
1241 PhoneNumberFormat.NATIONAL,
1242 formatted_number)
1243 return formatted_number
1244
1245
1246def format_national_number_with_preferred_carrier_code(numobj, fallback_carrier_code):
1247 """Formats a phone number in national format for dialing using the carrier
1248 as specified in the preferred_domestic_carrier_code field of the
1249 PhoneNumber object passed in. If that is missing, use the
1250 fallback_carrier_code passed in instead. If there is no
1251 preferred_domestic_carrier_code, and the fallback_carrier_code contains an
1252 empty string, return the number in national format without any carrier
1253 code.
1254
1255 Use format_national_number_with_carrier_code instead if the carrier code
1256 passed in should take precedence over the number's
1257 preferred_domestic_carrier_code when formatting.
1258
1259 Arguments:
1260 numobj -- The phone number to be formatted
1261 carrier_code -- The carrier selection code to be used, if none is found in the
1262 phone number itself.
1263
1264 Returns the formatted phone number in national format for dialing using
1265 the number's preferred_domestic_carrier_code, or the fallback_carrier_code
1266 pass in if none is found.
1267 """
1268 # Historically, we set this to an empty string when parsing with raw input
1269 # if none was found in the input string. However, this doesn't result in a
1270 # number we can dial. For this reason, we treat the empty string the same
1271 # as if it isn't set at all.
1272 if (numobj.preferred_domestic_carrier_code is not None and
1273 len(numobj.preferred_domestic_carrier_code) > 0):
1274 carrier_code = numobj.preferred_domestic_carrier_code
1275 else:
1276 carrier_code = fallback_carrier_code
1277 return format_national_number_with_carrier_code(numobj, carrier_code)
1278
1279
1280def format_number_for_mobile_dialing(numobj, region_calling_from, with_formatting):
1281 """Returns a number formatted in such a way that it can be dialed from a
1282 mobile phone in a specific region.
1283
1284 If the number cannot be reached from the region (e.g. some countries block
1285 toll-free numbers from being called outside of the country), the method
1286 returns an empty string.
1287
1288 Arguments:
1289 numobj -- The phone number to be formatted
1290 region_calling_from -- The region where the call is being placed.
1291
1292 with_formatting -- whether the number should be returned with formatting
1293 symbols, such as spaces and dashes.
1294
1295 Returns the formatted phone number.
1296 """
1297 country_calling_code = numobj.country_code
1298 if not _has_valid_country_calling_code(country_calling_code):
1299 if numobj.raw_input is None:
1300 return U_EMPTY_STRING
1301 else:
1302 return numobj.raw_input
1303 formatted_number = U_EMPTY_STRING
1304 # Clear the extension, as that part cannot normally be dialed together with the main number.
1305 numobj_no_ext = PhoneNumber()
1306 numobj_no_ext.merge_from(numobj)
1307 numobj_no_ext.extension = None
1308 region_code = region_code_for_country_code(country_calling_code)
1309 numobj_type = number_type(numobj_no_ext)
1310 is_valid_number = (numobj_type != PhoneNumberType.UNKNOWN)
1311 if region_calling_from == region_code:
1312 is_fixed_line_or_mobile = ((numobj_type == PhoneNumberType.FIXED_LINE) or
1313 (numobj_type == PhoneNumberType.MOBILE) or
1314 (numobj_type == PhoneNumberType.FIXED_LINE_OR_MOBILE))
1315 # Carrier codes may be needed in some countries. We handle this here.
1316 if region_code == "BR" and is_fixed_line_or_mobile:
1317 # Historically, we set this to an empty string when parsing with
1318 # raw input if none was found in the input string. However, this
1319 # doesn't result in a number we can dial. For this reason, we
1320 # treat the empty string the same as if it isn't set at all.
1321 if (numobj_no_ext.preferred_domestic_carrier_code is not None and
1322 len(numobj_no_ext.preferred_domestic_carrier_code) > 0):
1323 formatted_number = format_national_number_with_preferred_carrier_code(numobj_no_ext, "")
1324 else:
1325 # Brazilian fixed line and mobile numbers need to be dialed with a
1326 # carrier code when called within Brazil. Without that, most of
1327 # the carriers won't connect the call. Because of that, we return
1328 # an empty string here.
1329 formatted_number = U_EMPTY_STRING
1330 elif country_calling_code == _NANPA_COUNTRY_CODE:
1331 # For NANPA countries, we output international format for numbers
1332 # that can be dialed internationally, since that always works,
1333 # except for numbers which might potentially be short numbers,
1334 # which are always dialled in national format.
1335 metadata = PhoneMetadata.metadata_for_region(region_calling_from)
1336 assert metadata is not None # due to _has_valid_country_calling_code() check
1337 if (can_be_internationally_dialled(numobj_no_ext) and
1338 _test_number_length(national_significant_number(numobj_no_ext),
1339 metadata) != ValidationResult.TOO_SHORT):
1340 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1341 else:
1342 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL)
1343 else:
1344 # For non-geographical countries, and Mexican, Chilean, and Uzbek
1345 # fixed line and mobile numbers, we output international format for
1346 # numbers that can be dialed internationally as that always works.
1347 if ((region_code == REGION_CODE_FOR_NON_GEO_ENTITY or
1348 ((region_code == unicod("MX") or region_code == unicod("CL") or
1349 region_code == unicod("UZ")) and
1350 is_fixed_line_or_mobile)) and
1351 can_be_internationally_dialled(numobj_no_ext)):
1352 # MX fixed line and mobile numbers should always be formatted
1353 # in international format, even when dialed within MX. For
1354 # national format to work, a carrier code needs to be used,
1355 # and the correct carrier code depends on if the caller and
1356 # callee are from the same local area. It is trickier to get
1357 # that to work correctly than using international format,
1358 # which is tested to work fine on all carriers.
1359 # CL fixed line numbers need the national prefix when dialing
1360 # in the national format, but don't have it when used for
1361 # display. The reverse is true for mobile numbers. As a
1362 # result, we output them in the international format to make
1363 # it work.
1364 # UZ mobile and fixed-line numbers have to be formatted in
1365 # international format or prefixed with special codes like 03,
1366 # 04 (for fixed-line) and 05 (for mobile) for dialling
1367 # successfully from mobile devices. As we do not have complete
1368 # information on special codes and to be consistent with
1369 # formatting across all phone types we return the number in
1370 # international format here.
1371 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1372 else:
1373 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL)
1374 elif is_valid_number and can_be_internationally_dialled(numobj_no_ext):
1375 # We assume that short numbers are not diallable from outside their
1376 # region, so if a number is not a valid regular length phone number,
1377 # we treat it as if it cannot be internationally dialled.
1378 if with_formatting:
1379 return format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1380 else:
1381 return format_number(numobj_no_ext, PhoneNumberFormat.E164)
1382
1383 if with_formatting:
1384 return formatted_number
1385 else:
1386 return normalize_diallable_chars_only(formatted_number)
1387
1388
1389def format_out_of_country_calling_number(numobj, region_calling_from):
1390 """Formats a phone number for out-of-country dialing purposes.
1391
1392 If no region_calling_from is supplied, we format the number in its
1393 INTERNATIONAL format. If the country calling code is the same as that of
1394 the region where the number is from, then NATIONAL formatting will be
1395 applied.
1396
1397 If the number itself has a country calling code of zero or an otherwise
1398 invalid country calling code, then we return the number with no formatting
1399 applied.
1400
1401 Note this function takes care of the case for calling inside of NANPA and
1402 between Russia and Kazakhstan (who share the same country calling
1403 code). In those cases, no international prefix is used. For regions which
1404 have multiple international prefixes, the number in its INTERNATIONAL
1405 format will be returned instead.
1406
1407 Arguments:
1408 numobj -- The phone number to be formatted
1409 region_calling_from -- The region where the call is being placed
1410
1411 Returns the formatted phone number
1412 """
1413 if not _is_valid_region_code(region_calling_from):
1414 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)
1415 country_code = numobj.country_code
1416 nsn = national_significant_number(numobj)
1417 if not _has_valid_country_calling_code(country_code):
1418 return nsn
1419 if country_code == _NANPA_COUNTRY_CODE:
1420 if is_nanpa_country(region_calling_from):
1421 # For NANPA regions, return the national format for these regions
1422 # but prefix it with the country calling code.
1423 return (unicod(country_code) + U_SPACE +
1424 format_number(numobj, PhoneNumberFormat.NATIONAL))
1425 elif country_code == country_code_for_valid_region(region_calling_from):
1426 # If regions share a country calling code, the country calling code
1427 # need not be dialled. This also applies when dialling within a
1428 # region, so this if clause covers both these cases. Technically this
1429 # is the case for dialling from La Reunion to other overseas
1430 # departments of France (French Guiana, Martinique, Guadeloupe), but
1431 # not vice versa - so we don't cover this edge case for now and for
1432 # those cases return the version including country calling code.
1433 # Details here:
1434 # http://www.petitfute.com/voyage/225-info-pratiques-reunion
1435 return format_number(numobj, PhoneNumberFormat.NATIONAL)
1436
1437 # Metadata cannot be None because we checked '_is_valid_region_code()' above.
1438 metadata_for_region_calling_from = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_calling_from.upper())
1439 international_prefix = metadata_for_region_calling_from.international_prefix
1440
1441 # In general, if there is a preferred international prefix, use that. Otherwise, for regions
1442 # that have multiple international prefixes, the international format of the number is
1443 # returned since we would not know which one to use.
1444 i18n_prefix_for_formatting = U_EMPTY_STRING
1445 if metadata_for_region_calling_from.preferred_international_prefix is not None:
1446 i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix
1447 elif fullmatch(_SINGLE_INTERNATIONAL_PREFIX, international_prefix):
1448 i18n_prefix_for_formatting = international_prefix
1449
1450 region_code = region_code_for_country_code(country_code)
1451 # Metadata cannot be None because the country calling code is valid.
1452 metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper())
1453 formatted_national_number = _format_nsn(nsn,
1454 metadata_for_region,
1455 PhoneNumberFormat.INTERNATIONAL)
1456 formatted_number = _maybe_append_formatted_extension(numobj,
1457 metadata_for_region,
1458 PhoneNumberFormat.INTERNATIONAL,
1459 formatted_national_number)
1460 if len(i18n_prefix_for_formatting) > 0:
1461 formatted_number = (i18n_prefix_for_formatting + U_SPACE +
1462 unicod(country_code) + U_SPACE + formatted_number)
1463 else:
1464 formatted_number = _prefix_number_with_country_calling_code(country_code,
1465 PhoneNumberFormat.INTERNATIONAL,
1466 formatted_number)
1467 return formatted_number
1468
1469
1470def format_in_original_format(numobj, region_calling_from):
1471 """Formats a phone number using the original phone number format
1472 (e.g. INTERNATIONAL or NATIONAL) that the number is parsed from, provided
1473 that the number has been parsed with parse(.., keep_raw_input=True).
1474 Otherwise the number will be formatted in NATIONAL format.
1475
1476 The original format is embedded in the country_code_source field of the
1477 PhoneNumber object passed in, which is only set when parsing keeps the raw
1478 input. When we don't have a formatting pattern for the number, the method
1479 falls back to returning the raw input.
1480
1481 Note this method guarantees no digit will be inserted, removed or modified
1482 as a result of formatting.
1483
1484 Arguments:
1485 number -- The phone number that needs to be formatted in its original
1486 number format
1487 region_calling_from -- The region whose IDD needs to be prefixed if the
1488 original number has one.
1489
1490 Returns the formatted phone number in its original number format.
1491 """
1492 format_rule = _choose_formatting_pattern_for_numobj(numobj)
1493 if (numobj.raw_input is not None and format_rule is None):
1494 # We check if we have the formatting pattern because without that, we
1495 # might format the number as a group without national prefix.
1496 return numobj.raw_input
1497 if numobj.country_code_source is CountryCodeSource.UNSPECIFIED:
1498 return format_number(numobj, PhoneNumberFormat.NATIONAL)
1499
1500 formatted_number = _format_original_allow_mods(numobj, region_calling_from, format_rule)
1501 num_raw_input = numobj.raw_input
1502 # If no digit is inserted/removed/modified as a result of our formatting,
1503 # we return the formatted phone number; otherwise we return the raw input
1504 # the user entered.
1505 if (formatted_number is not None and num_raw_input):
1506 normalized_formatted_number = normalize_diallable_chars_only(formatted_number)
1507 normalized_raw_input = normalize_diallable_chars_only(num_raw_input)
1508 if normalized_formatted_number != normalized_raw_input:
1509 formatted_number = num_raw_input
1510 return formatted_number
1511
1512
1513def _format_original_allow_mods(numobj, region_calling_from, format_rule):
1514 if (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN):
1515 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)
1516 elif numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_IDD:
1517 return format_out_of_country_calling_number(numobj, region_calling_from)
1518 elif (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN):
1519 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)[1:]
1520 else:
1521 region_code = region_code_for_country_code(numobj.country_code)
1522 # We strip non-digits from the NDD here, and from the raw input later, so that we can
1523 # compare them easily.
1524 national_prefix = ndd_prefix_for_region(region_code, True) # strip non-digits
1525 national_format = format_number(numobj, PhoneNumberFormat.NATIONAL)
1526 if (national_prefix is None or len(national_prefix) == 0):
1527 # If the region doesn't have a national prefix at all, we can
1528 # safely return the national format without worrying about a
1529 # national prefix being added.
1530 return national_format
1531 # Otherwise, we check if the original number was entered with a national prefix.
1532 if (_raw_input_contains_national_prefix(numobj.raw_input, national_prefix, region_code)):
1533 # If so, we can safely return the national format.
1534 return national_format
1535 # The format rule could still be null here if the national number was
1536 # 0 and there was no raw input (this should not be possible for
1537 # numbers generated by the phonenumber library as they would also not
1538 # have a country calling code and we would have exited earlier).
1539 if format_rule is None:
1540 return national_format
1541 # When the format we apply to this number doesn't contain national
1542 # prefix, we can just return the national format.
1543 # TODO: Refactor the code below with the code in isNationalPrefixPresentIfRequired.
1544 candidate_national_prefix_rule = format_rule.national_prefix_formatting_rule
1545 # We assume that the first-group symbol will never be _before_ the national prefix.
1546 if candidate_national_prefix_rule is None:
1547 return national_format
1548 index_of_first_group = candidate_national_prefix_rule.find("\\1")
1549 if (index_of_first_group <= 0):
1550 return national_format
1551 candidate_national_prefix_rule = candidate_national_prefix_rule[:index_of_first_group]
1552 candidate_national_prefix_rule = normalize_digits_only(candidate_national_prefix_rule)
1553 if len(candidate_national_prefix_rule) == 0:
1554 # National prefix not used when formatting this number.
1555 return national_format
1556 # Otherwise, we need to remove the national prefix from our output.
1557 new_format_rule = _copy_number_format(format_rule)
1558 new_format_rule.national_prefix_formatting_rule = None
1559 return format_by_pattern(numobj, PhoneNumberFormat.NATIONAL, [new_format_rule])
1560
1561
1562def _raw_input_contains_national_prefix(raw_input, national_prefix, region_code):
1563 """Check if raw_input, which is assumed to be in the national format, has a
1564 national prefix. The national prefix is assumed to be in digits-only
1565 form."""
1566 nnn = normalize_digits_only(raw_input)
1567 if nnn.startswith(national_prefix):
1568 try:
1569 # Some Japanese numbers (e.g. 00777123) might be mistaken to
1570 # contain the national prefix when written without it
1571 # (e.g. 0777123) if we just do prefix matching. To tackle that, we
1572 # check the validity of the number if the assumed national prefix
1573 # is removed (777123 won't be valid in Japan).
1574 return is_valid_number(parse(nnn[len(national_prefix):], region_code))
1575 except NumberParseException:
1576 return False
1577 return False
1578
1579
1580def _choose_formatting_pattern_for_numobj(numobj):
1581 country_code = numobj.country_code
1582 phone_number_region = region_code_for_country_code(country_code)
1583 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, phone_number_region)
1584 if metadata is None:
1585 return None
1586 national_number = national_significant_number(numobj)
1587 return _choose_formatting_pattern_for_number(metadata.number_format, national_number)
1588
1589
1590def _choose_formatting_pattern_for_number(available_formats, national_number):
1591 for num_format in available_formats:
1592 size = len(num_format.leading_digits_pattern)
1593 # We always use the last leading_digits_pattern, as it is the most detailed.
1594 if size > 0:
1595 ld_pattern = re.compile(num_format.leading_digits_pattern[-1])
1596 ld_match = ld_pattern.match(national_number)
1597 if size == 0 or ld_match:
1598 format_pattern = re.compile(num_format.pattern)
1599 if fullmatch(format_pattern, national_number):
1600 return num_format
1601 return None
1602
1603
1604def format_out_of_country_keeping_alpha_chars(numobj, region_calling_from):
1605 """Formats a phone number for out-of-country dialing purposes.
1606
1607 Note that in this version, if the number was entered originally using
1608 alpha characters and this version of the number is stored in raw_input,
1609 this representation of the number will be used rather than the digit
1610 representation. Grouping information, as specified by characters such as
1611 "-" and " ", will be retained.
1612
1613 Caveats:
1614
1615 - This will not produce good results if the country calling code is both
1616 present in the raw input _and_ is the start of the national
1617 number. This is not a problem in the regions which typically use alpha
1618 numbers.
1619
1620 - This will also not produce good results if the raw input has any
1621 grouping information within the first three digits of the national
1622 number, and if the function needs to strip preceding digits/words in
1623 the raw input before these digits. Normally people group the first
1624 three digits together so this is not a huge problem - and will be fixed
1625 if it proves to be so.
1626
1627 Arguments:
1628 numobj -- The phone number that needs to be formatted.
1629 region_calling_from -- The region where the call is being placed.
1630
1631 Returns the formatted phone number
1632 """
1633 num_raw_input = numobj.raw_input
1634 # If there is no raw input, then we can't keep alpha characters because there aren't any.
1635 # In this case, we return format_out_of_country_calling_number.
1636 if num_raw_input is None or len(num_raw_input) == 0:
1637 return format_out_of_country_calling_number(numobj, region_calling_from)
1638 country_code = numobj.country_code
1639 if not _has_valid_country_calling_code(country_code):
1640 return num_raw_input
1641 # Strip any prefix such as country calling code, IDD, that was present. We
1642 # do this by comparing the number in raw_input with the parsed number. To
1643 # do this, first we normalize punctuation. We retain number grouping
1644 # symbols such as " " only.
1645 num_raw_input = _normalize_helper(num_raw_input,
1646 _ALL_PLUS_NUMBER_GROUPING_SYMBOLS,
1647 True)
1648 # Now we trim everything before the first three digits in the parsed
1649 # number. We choose three because all valid alpha numbers have 3 digits at
1650 # the start - if it does not, then we don't trim anything at
1651 # all. Similarly, if the national number was less than three digits, we
1652 # don't trim anything at all.
1653 national_number = national_significant_number(numobj)
1654 if len(national_number) > 3:
1655 first_national_number_digit = num_raw_input.find(national_number[:3])
1656 if first_national_number_digit != -1:
1657 num_raw_input = num_raw_input[first_national_number_digit:]
1658
1659 metadata_for_region_calling_from = PhoneMetadata.metadata_for_region(region_calling_from.upper(), None)
1660 if country_code == _NANPA_COUNTRY_CODE:
1661 if is_nanpa_country(region_calling_from):
1662 return unicod(country_code) + U_SPACE + num_raw_input
1663 elif (metadata_for_region_calling_from is not None and
1664 country_code == country_code_for_region(region_calling_from)):
1665 formatting_pattern = _choose_formatting_pattern_for_number(metadata_for_region_calling_from.number_format,
1666 national_number)
1667 if formatting_pattern is None:
1668 # If no pattern above is matched, we format the original input
1669 return num_raw_input
1670 new_format = _copy_number_format(formatting_pattern)
1671 # The first group is the first group of digits that the user
1672 # wrote together.
1673 new_format.pattern = u("(\\d+)(.*)")
1674 # Here we just concatenate them back together after the national
1675 # prefix has been fixed.
1676 new_format.format = u(r"\1\2")
1677 # Now we format using this pattern instead of the default pattern,
1678 # but with the national prefix prefixed if necessary.
1679 # This will not work in the cases where the pattern (and not the
1680 # leading digits) decide whether a national prefix needs to be used,
1681 # since we have overridden the pattern to match anything, but that is
1682 # not the case in the metadata to date.
1683 return _format_nsn_using_pattern(num_raw_input,
1684 new_format,
1685 PhoneNumberFormat.NATIONAL)
1686 i18n_prefix_for_formatting = U_EMPTY_STRING
1687 # If an unsupported region-calling-from is entered, or a country with
1688 # multiple international prefixes, the international format of the number
1689 # is returned, unless there is a preferred international prefix.
1690 if metadata_for_region_calling_from is not None:
1691 international_prefix = metadata_for_region_calling_from.international_prefix
1692 i18n_match = fullmatch(_SINGLE_INTERNATIONAL_PREFIX, international_prefix)
1693 if i18n_match:
1694 i18n_prefix_for_formatting = international_prefix
1695 else:
1696 i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix
1697
1698 region_code = region_code_for_country_code(country_code)
1699 # Metadata cannot be None because the country calling code is valid.
1700 metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1701 # Strip any extension
1702 extension, stripped_number = _maybe_strip_extension(num_raw_input)
1703 # Append the formatted extension
1704 formatted_number = _maybe_append_formatted_extension(numobj,
1705 metadata_for_region,
1706 PhoneNumberFormat.INTERNATIONAL,
1707 stripped_number)
1708 if i18n_prefix_for_formatting:
1709 formatted_number = (i18n_prefix_for_formatting + U_SPACE +
1710 unicod(country_code) + U_SPACE + formatted_number)
1711 else:
1712 # Invalid region entered as country-calling-from (so no metadata was
1713 # found for it) or the region chosen has multiple international
1714 # dialling prefixes.
1715 formatted_number = _prefix_number_with_country_calling_code(country_code,
1716 PhoneNumberFormat.INTERNATIONAL,
1717 formatted_number)
1718 return formatted_number
1719
1720
1721def national_significant_number(numobj):
1722 """Gets the national significant number of a phone number.
1723
1724 Note that a national significant number doesn't contain a national prefix
1725 or any formatting.
1726
1727 Arguments:
1728 numobj -- The PhoneNumber object for which the national significant number
1729 is needed.
1730
1731 Returns the national significant number of the PhoneNumber object passed
1732 in.
1733 """
1734 # If leading zero(s) have been set, we prefix this now. Note this is not a
1735 # national prefix.
1736 national_number = U_EMPTY_STRING
1737 if numobj.italian_leading_zero:
1738 num_zeros = numobj.number_of_leading_zeros
1739 if num_zeros is None:
1740 num_zeros = 1
1741 if num_zeros > 0:
1742 national_number = U_ZERO * num_zeros
1743 national_number += str(numobj.national_number)
1744 return national_number
1745
1746
1747def _prefix_number_with_country_calling_code(country_code, num_format, formatted_number):
1748 """A helper function that is used by format_number and format_by_pattern."""
1749 if num_format == PhoneNumberFormat.E164:
1750 return _PLUS_SIGN + unicod(country_code) + formatted_number
1751 elif num_format == PhoneNumberFormat.INTERNATIONAL:
1752 return _PLUS_SIGN + unicod(country_code) + U_SPACE + formatted_number
1753 elif num_format == PhoneNumberFormat.RFC3966:
1754 return _RFC3966_PREFIX + _PLUS_SIGN + unicod(country_code) + U_DASH + formatted_number
1755 else:
1756 return formatted_number
1757
1758
1759def _format_nsn(number, metadata, num_format, carrier_code=None):
1760 """Format a national number."""
1761 # Note in some regions, the national number can be written in two
1762 # completely different ways depending on whether it forms part of the
1763 # NATIONAL format or INTERNATIONAL format. The num_format parameter here
1764 # is used to specify which format to use for those cases. If a carrier_code
1765 # is specified, this will be inserted into the formatted string to replace
1766 # $CC.
1767 intl_number_formats = metadata.intl_number_format
1768
1769 # When the intl_number_formats exists, we use that to format national
1770 # number for the INTERNATIONAL format instead of using the
1771 # number_desc.number_formats.
1772 if (len(intl_number_formats) == 0 or
1773 num_format == PhoneNumberFormat.NATIONAL):
1774 available_formats = metadata.number_format
1775 else:
1776 available_formats = metadata.intl_number_format
1777 formatting_pattern = _choose_formatting_pattern_for_number(available_formats, number)
1778 if formatting_pattern is None:
1779 return number
1780 else:
1781 return _format_nsn_using_pattern(number, formatting_pattern, num_format, carrier_code)
1782
1783
1784def _format_nsn_using_pattern(national_number, formatting_pattern, number_format,
1785 carrier_code=None):
1786 # Note that carrier_code is optional - if None or an empty string, no
1787 # carrier code replacement will take place.
1788 number_format_rule = formatting_pattern.format
1789 m_re = re.compile(formatting_pattern.pattern)
1790 formatted_national_number = U_EMPTY_STRING
1791
1792 if (number_format == PhoneNumberFormat.NATIONAL and carrier_code and
1793 formatting_pattern.domestic_carrier_code_formatting_rule):
1794 # Replace the $CC in the formatting rule with the desired
1795 # carrier code.
1796 cc_format_rule = formatting_pattern.domestic_carrier_code_formatting_rule
1797 cc_format_rule = cc_format_rule.replace(_CC_STRING, carrier_code)
1798
1799 # Now replace the $FG in the formatting rule with the
1800 # first group and the carrier code combined in the
1801 # appropriate way.
1802 number_format_rule = re.sub(_FIRST_GROUP_PATTERN,
1803 cc_format_rule,
1804 number_format_rule,
1805 count=1)
1806 formatted_national_number = re.sub(m_re, number_format_rule, national_number)
1807 else:
1808 # Use the national prefix formatting rule instead.
1809 national_prefix_formatting_rule = formatting_pattern.national_prefix_formatting_rule
1810 if (number_format == PhoneNumberFormat.NATIONAL and
1811 national_prefix_formatting_rule):
1812 first_group_rule = re.sub(_FIRST_GROUP_PATTERN,
1813 national_prefix_formatting_rule,
1814 number_format_rule,
1815 count=1)
1816 formatted_national_number = re.sub(m_re, first_group_rule, national_number)
1817 else:
1818 formatted_national_number = re.sub(m_re, number_format_rule, national_number)
1819
1820 if number_format == PhoneNumberFormat.RFC3966:
1821 # Strip any leading punctuation.
1822 m = _SEPARATOR_PATTERN.match(formatted_national_number)
1823 if m:
1824 formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_EMPTY_STRING, formatted_national_number, count=1)
1825 # Replace the rest with a dash between each number group
1826 formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_DASH, formatted_national_number)
1827
1828 return formatted_national_number
1829
1830
1831def example_number(region_code):
1832 """Gets a valid number for the specified region.
1833
1834 Arguments:
1835 region_code -- The region for which an example number is needed.
1836
1837 Returns a valid fixed-line number for the specified region. Returns None
1838 when the metadata does not contain such information, or the region 001 is
1839 passed in. For 001 (representing non-geographical numbers), call
1840 example_number_for_non_geo_entity instead.
1841 """
1842 return example_number_for_type(region_code, PhoneNumberType.FIXED_LINE)
1843
1844
1845def invalid_example_number(region_code):
1846 """Gets an invalid number for the specified region.
1847
1848 This is useful for unit-testing purposes, where you want to test what
1849 will happen with an invalid number. Note that the number that is
1850 returned will always be able to be parsed and will have the correct
1851 country code. It may also be a valid *short* number/code for this
1852 region. Validity checking such numbers is handled with shortnumberinfo.
1853
1854 Arguments:
1855 region_code -- The region for which an example number is needed.
1856
1857
1858 Returns an invalid number for the specified region. Returns None when an
1859 unsupported region or the region 001 (Earth) is passed in.
1860 """
1861 if not _is_valid_region_code(region_code):
1862 return None
1863 # We start off with a valid fixed-line number since every country
1864 # supports this. Alternatively we could start with a different number
1865 # type, since fixed-line numbers typically have a wide breadth of valid
1866 # number lengths and we may have to make it very short before we get an
1867 # invalid number.
1868 metadata = PhoneMetadata.metadata_for_region(region_code.upper())
1869 assert metadata is not None # due to _is_valid_region_code() check
1870 desc = _number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE)
1871 if desc is None or desc.example_number is None:
1872 # This shouldn't happen; we have a test for this.
1873 return None # pragma no cover
1874 example_number = desc.example_number
1875 # Try and make the number invalid. We do this by changing the length. We
1876 # try reducing the length of the number, since currently no region has a
1877 # number that is the same length as MIN_LENGTH_FOR_NSN. This is probably
1878 # quicker than making the number longer, which is another
1879 # alternative. We could also use the possible number pattern to extract
1880 # the possible lengths of the number to make this faster, but this
1881 # method is only for unit-testing so simplicity is preferred to
1882 # performance. We don't want to return a number that can't be parsed,
1883 # so we check the number is long enough. We try all possible lengths
1884 # because phone number plans often have overlapping prefixes so the
1885 # number 123456 might be valid as a fixed-line number, and 12345 as a
1886 # mobile number. It would be faster to loop in a different order, but we
1887 # prefer numbers that look closer to real numbers (and it gives us a
1888 # variety of different lengths for the resulting phone numbers -
1889 # otherwise they would all be MIN_LENGTH_FOR_NSN digits long.)
1890 phone_number_length = len(example_number) - 1
1891 while phone_number_length >= _MIN_LENGTH_FOR_NSN:
1892 number_to_try = example_number[:phone_number_length]
1893 try:
1894 possibly_valid_number = parse(number_to_try, region_code)
1895 if not is_valid_number(possibly_valid_number):
1896 return possibly_valid_number
1897 except NumberParseException: # pragma no cover
1898 # Shouldn't happen: we have already checked the length, we know
1899 # example numbers have only valid digits, and we know the region
1900 # code is fine.
1901 pass
1902 phone_number_length -= 1
1903
1904 # We have a test to check that this doesn't happen for any of our
1905 # supported regions.
1906 return None # pragma no cover
1907
1908
1909def example_number_for_type(region_code, num_type):
1910 """Gets a valid number for the specified region and number type.
1911
1912 If None is given as the region_code, then the returned number object
1913 may belong to any country.
1914
1915 Arguments:
1916 region_code -- The region for which an example number is needed, or None.
1917 num_type -- The type of number that is needed.
1918
1919 Returns a valid number for the specified region and type. Returns None
1920 when the metadata does not contain such information or if an invalid
1921 region or region 001 was specified. For 001 (representing
1922 non-geographical numbers), call example_number_for_non_geo_entity instead.
1923 """
1924 if region_code is None:
1925 return _example_number_anywhere_for_type(num_type)
1926 # Check the region code is valid.
1927 if not _is_valid_region_code(region_code):
1928 return None
1929 metadata = PhoneMetadata.metadata_for_region(region_code.upper())
1930 assert metadata is not None # due to _is_valid_region_code() check
1931 desc = _number_desc_by_type(metadata, num_type)
1932 if desc is not None and desc.example_number is not None:
1933 try:
1934 return parse(desc.example_number, region_code)
1935 except NumberParseException: # pragma no cover
1936 pass
1937 return None
1938
1939
1940def _example_number_anywhere_for_type(num_type):
1941 """Gets a valid number for the specified number type (it may belong to any country).
1942
1943 Arguments:
1944 num_type -- The type of number that is needed.
1945
1946 Returns a valid number for the specified type. Returns None when the
1947 metadata does not contain such information. This should only happen when
1948 no numbers of this type are allocated anywhere in the world anymore.
1949 """
1950 for region_code in SUPPORTED_REGIONS:
1951 example_numobj = example_number_for_type(region_code, num_type)
1952 if example_numobj is not None:
1953 return example_numobj
1954 # If there wasn't an example number for a region, try the non-geographical entities.
1955 for country_calling_code in COUNTRY_CODES_FOR_NON_GEO_REGIONS:
1956 metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None)
1957 desc = _number_desc_by_type(metadata, num_type)
1958 if desc is not None and desc.example_number is not None:
1959 try:
1960 return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION)
1961 except NumberParseException: # pragma no cover
1962 pass
1963
1964 # There are no example numbers of this type for any country in the library.
1965 return None # pragma no cover
1966
1967
1968def example_number_for_non_geo_entity(country_calling_code):
1969 """Gets a valid number for the specified country calling code for a non-geographical entity.
1970
1971 Arguments:
1972 country_calling_code -- The country calling code for a non-geographical entity.
1973
1974 Returns a valid number for the non-geographical entity. Returns None when
1975 the metadata does not contain such information, or the country calling
1976 code passed in does not belong to a non-geographical entity.
1977 """
1978 metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None)
1979 if metadata is not None:
1980 # For geographical entities, fixed-line data is always present. However, for non-geographical
1981 # entities, this is not the case, so we have to go through different types to find the
1982 # example number. We don't check fixed-line or personal number since they aren't used by
1983 # non-geographical entities (if this changes, a unit-test will catch this.)
1984 for desc in (metadata.mobile, metadata.toll_free, metadata.shared_cost, metadata.voip,
1985 metadata.voicemail, metadata.uan, metadata.premium_rate):
1986 try:
1987 if (desc is not None and desc.example_number is not None):
1988 return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION)
1989 except NumberParseException:
1990 pass
1991 return None
1992
1993
1994def _maybe_append_formatted_extension(numobj, metadata, num_format, number):
1995 """Appends the formatted extension of a phone number to formatted number,
1996 if the phone number had an extension specified.
1997 """
1998 if numobj.extension:
1999 if num_format == PhoneNumberFormat.RFC3966:
2000 return number + _RFC3966_EXTN_PREFIX + numobj.extension
2001 else:
2002 if metadata.preferred_extn_prefix is not None:
2003 return number + metadata.preferred_extn_prefix + numobj.extension
2004 else:
2005 return number + _DEFAULT_EXTN_PREFIX + numobj.extension
2006 return number
2007
2008
2009def _number_desc_by_type(metadata, num_type):
2010 """Return the PhoneNumberDesc of the metadata for the given number type"""
2011 if num_type == PhoneNumberType.PREMIUM_RATE:
2012 return metadata.premium_rate
2013 elif num_type == PhoneNumberType.TOLL_FREE:
2014 return metadata.toll_free
2015 elif num_type == PhoneNumberType.MOBILE:
2016 return metadata.mobile
2017 elif (num_type == PhoneNumberType.FIXED_LINE or
2018 num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE):
2019 return metadata.fixed_line
2020 elif num_type == PhoneNumberType.SHARED_COST:
2021 return metadata.shared_cost
2022 elif num_type == PhoneNumberType.VOIP:
2023 return metadata.voip
2024 elif num_type == PhoneNumberType.PERSONAL_NUMBER:
2025 return metadata.personal_number
2026 elif num_type == PhoneNumberType.PAGER:
2027 return metadata.pager
2028 elif num_type == PhoneNumberType.UAN:
2029 return metadata.uan
2030 elif num_type == PhoneNumberType.VOICEMAIL:
2031 return metadata.voicemail
2032 else:
2033 return metadata.general_desc
2034
2035
2036def number_type(numobj):
2037 """Gets the type of a valid phone number.
2038
2039 Arguments:
2040 numobj -- The PhoneNumber object that we want to know the type of.
2041
2042 Returns the type of the phone number, as a PhoneNumberType value;
2043 returns PhoneNumberType.UNKNOWN if it is invalid.
2044 """
2045 region_code = region_code_for_number(numobj)
2046 metadata = PhoneMetadata.metadata_for_region_or_calling_code(numobj.country_code, region_code)
2047 if metadata is None:
2048 return PhoneNumberType.UNKNOWN
2049 national_number = national_significant_number(numobj)
2050 return _number_type_helper(national_number, metadata)
2051
2052
2053def _number_type_helper(national_number, metadata):
2054 """Return the type of the given number against the metadata"""
2055 if not _is_number_matching_desc(national_number, metadata.general_desc):
2056 return PhoneNumberType.UNKNOWN
2057 if _is_number_matching_desc(national_number, metadata.premium_rate):
2058 return PhoneNumberType.PREMIUM_RATE
2059 if _is_number_matching_desc(national_number, metadata.toll_free):
2060 return PhoneNumberType.TOLL_FREE
2061 if _is_number_matching_desc(national_number, metadata.shared_cost):
2062 return PhoneNumberType.SHARED_COST
2063 if _is_number_matching_desc(national_number, metadata.voip):
2064 return PhoneNumberType.VOIP
2065 if _is_number_matching_desc(national_number, metadata.personal_number):
2066 return PhoneNumberType.PERSONAL_NUMBER
2067 if _is_number_matching_desc(national_number, metadata.pager):
2068 return PhoneNumberType.PAGER
2069 if _is_number_matching_desc(national_number, metadata.uan):
2070 return PhoneNumberType.UAN
2071 if _is_number_matching_desc(national_number, metadata.voicemail):
2072 return PhoneNumberType.VOICEMAIL
2073
2074 if _is_number_matching_desc(national_number, metadata.fixed_line):
2075 if metadata.same_mobile_and_fixed_line_pattern:
2076 return PhoneNumberType.FIXED_LINE_OR_MOBILE
2077 elif _is_number_matching_desc(national_number, metadata.mobile):
2078 return PhoneNumberType.FIXED_LINE_OR_MOBILE
2079 return PhoneNumberType.FIXED_LINE
2080
2081 # Otherwise, test to see if the number is mobile. Only do this if certain
2082 # that the patterns for mobile and fixed line aren't the same.
2083 if (not metadata.same_mobile_and_fixed_line_pattern and
2084 _is_number_matching_desc(national_number, metadata.mobile)):
2085 return PhoneNumberType.MOBILE
2086 return PhoneNumberType.UNKNOWN
2087
2088
2089def _is_number_matching_desc(national_number, number_desc):
2090 """Determine if the number matches the given PhoneNumberDesc"""
2091 # Check if any possible number lengths are present; if so, we use them to avoid checking the
2092 # validation pattern if they don't match. If they are absent, this means they match the general
2093 # description, which we have already checked before checking a specific number type.
2094 if number_desc is None:
2095 return False
2096 actual_length = len(national_number)
2097 possible_lengths = number_desc.possible_length
2098 if len(possible_lengths) > 0 and actual_length not in possible_lengths:
2099 return False
2100 return _match_national_number(national_number, number_desc, False)
2101
2102
2103def is_valid_number(numobj):
2104 """Tests whether a phone number matches a valid pattern.
2105
2106 Note this doesn't verify the number is actually in use, which is
2107 impossible to tell by just looking at a number itself. It only verifies
2108 whether the parsed, canonicalised number is valid: not whether a
2109 particular series of digits entered by the user is diallable from the
2110 region provided when parsing. For example, the number +41 (0) 78 927 2696
2111 can be parsed into a number with country code "41" and national
2112 significant number "789272696". This is valid, while the original string
2113 is not diallable.
2114
2115 Arguments:
2116 numobj -- The phone number object that we want to validate
2117
2118 Returns a boolean that indicates whether the number is of a valid pattern.
2119 """
2120 region_code = region_code_for_number(numobj)
2121 return is_valid_number_for_region(numobj, region_code)
2122
2123
2124def is_valid_number_for_region(numobj, region_code):
2125 """Tests whether a phone number is valid for a certain region.
2126
2127 Note this doesn't verify the number is actually in use, which is
2128 impossible to tell by just looking at a number itself. If the country
2129 calling code is not the same as the country calling code for the region,
2130 this immediately exits with false. After this, the specific number pattern
2131 rules for the region are examined. This is useful for determining for
2132 example whether a particular number is valid for Canada, rather than just
2133 a valid NANPA number.
2134
2135 Warning: In most cases, you want to use is_valid_number instead. For
2136 example, this method will mark numbers from British Crown dependencies
2137 such as the Isle of Man as invalid for the region "GB" (United Kingdom),
2138 since it has its own region code, "IM", which may be undesirable.
2139
2140 Arguments:
2141 numobj -- The phone number object that we want to validate.
2142 region_code -- The region that we want to validate the phone number for.
2143
2144 Returns a boolean that indicates whether the number is of a valid pattern.
2145 """
2146 country_code = numobj.country_code
2147 if region_code is None:
2148 return False
2149 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper())
2150 if (metadata is None or
2151 (region_code != REGION_CODE_FOR_NON_GEO_ENTITY and
2152 country_code != country_code_for_valid_region(region_code))):
2153 # Either the region code was invalid, or the country calling code for
2154 # this number does not match that of the region code.
2155 return False
2156 nsn = national_significant_number(numobj)
2157 return (_number_type_helper(nsn, metadata) != PhoneNumberType.UNKNOWN)
2158
2159
2160def region_code_for_number(numobj):
2161 """Returns the region where a phone number is from.
2162
2163 This could be used for geocoding at the region level. Only guarantees
2164 correct results for valid, full numbers (not short-codes, or invalid
2165 numbers).
2166
2167 Arguments:
2168 numobj -- The phone number object whose origin we want to know
2169
2170 Returns the region where the phone number is from, or None if no region
2171 matches this calling code.
2172
2173 """
2174 country_code = numobj.country_code
2175 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2176 if regions is None:
2177 return None
2178
2179 if len(regions) == 1:
2180 return regions[0]
2181 else:
2182 return _region_code_for_number_from_list(numobj, regions)
2183
2184
2185def _region_code_for_number_from_list(numobj, regions):
2186 """Find the region in a list that matches a number"""
2187 national_number = national_significant_number(numobj)
2188 for region_code in regions:
2189 # If leading_digits is present, use this. Otherwise, do full
2190 # validation.
2191 # Metadata cannot be None because the region codes come from
2192 # the country calling code map.
2193 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2194 if metadata is None:
2195 continue
2196 if metadata.leading_digits is not None:
2197 leading_digit_re = re.compile(metadata.leading_digits)
2198 match = leading_digit_re.match(national_number)
2199 if match:
2200 return region_code
2201 elif _number_type_helper(national_number, metadata) != PhoneNumberType.UNKNOWN:
2202 return region_code
2203 return None
2204
2205
2206def region_code_for_country_code(country_code):
2207 """Returns the region code that matches a specific country calling code.
2208
2209 In the case of no region code being found, UNKNOWN_REGION ('ZZ') will be
2210 returned. In the case of multiple regions, the one designated in the
2211 metadata as the "main" region for this calling code will be returned. If
2212 the country_code entered is valid but doesn't match a specific region
2213 (such as in the case of non-geographical calling codes like 800) the value
2214 "001" will be returned (corresponding to the value for World in the UN
2215 M.49 schema).
2216 """
2217 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2218 if regions is None:
2219 return UNKNOWN_REGION
2220 else:
2221 return regions[0]
2222
2223
2224def region_codes_for_country_code(country_code):
2225 """Returns a list with the region codes that match the specific country calling code.
2226
2227 For non-geographical country calling codes, the region code 001 is
2228 returned. Also, in the case of no region code being found, an empty
2229 list is returned.
2230 """
2231 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2232 if regions is None:
2233 return ()
2234 else:
2235 return regions
2236
2237
2238def country_code_for_region(region_code):
2239 """Returns the country calling code for a specific region.
2240
2241 For example, this would be 1 for the United States, and 64 for New
2242 Zealand.
2243
2244 Arguments:
2245 region_code -- The region that we want to get the country calling code for.
2246
2247 Returns the country calling code for the region denoted by region_code.
2248 """
2249 if not _is_valid_region_code(region_code):
2250 return 0
2251 return country_code_for_valid_region(region_code)
2252
2253
2254def country_code_for_valid_region(region_code):
2255 """Returns the country calling code for a specific region.
2256
2257 For example, this would be 1 for the United States, and 64 for New
2258 Zealand. Assumes the region is already valid.
2259
2260 Arguments:
2261 region_code -- The region that we want to get the country calling code for.
2262
2263 Returns the country calling code for the region denoted by region_code.
2264 """
2265 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2266 if metadata is None:
2267 raise Exception("Invalid region code %s" % region_code)
2268 return metadata.country_code
2269
2270
2271def ndd_prefix_for_region(region_code, strip_non_digits):
2272 """Returns the national dialling prefix for a specific region.
2273
2274 For example, this would be 1 for the United States, and 0 for New
2275 Zealand. Set strip_non_digits to True to strip symbols like "~" (which
2276 indicates a wait for a dialling tone) from the prefix returned. If no
2277 national prefix is present, we return None.
2278
2279 Warning: Do not use this method for do-your-own formatting - for some
2280 regions, the national dialling prefix is used only for certain types of
2281 numbers. Use the library's formatting functions to prefix the national
2282 prefix when required.
2283
2284 Arguments:
2285 region_code -- The region that we want to get the dialling prefix for.
2286 strip_non_digits -- whether to strip non-digits from the national
2287 dialling prefix.
2288
2289 Returns the dialling prefix for the region denoted by region_code.
2290 """
2291 if region_code is None:
2292 return None
2293 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2294 if metadata is None:
2295 return None
2296 national_prefix = metadata.national_prefix
2297 if national_prefix is None or len(national_prefix) == 0:
2298 return None
2299 if strip_non_digits:
2300 # Note: if any other non-numeric symbols are ever used in national
2301 # prefixes, these would have to be removed here as well.
2302 national_prefix = re.sub(U_TILDE, U_EMPTY_STRING, national_prefix)
2303 return national_prefix
2304
2305
2306def is_nanpa_country(region_code):
2307 """Checks if this region is a NANPA region.
2308
2309 Returns True if region_code is one of the regions under the North American
2310 Numbering Plan Administration (NANPA).
2311 """
2312 return region_code in _NANPA_REGIONS
2313
2314
2315def is_alpha_number(number):
2316 """Checks if the number is a valid vanity (alpha) number such as 800
2317 MICROSOFT. A valid vanity number will start with at least 3 digits and
2318 will have three or more alpha characters. This does not do region-specific
2319 checks - to work out if this number is actually valid for a region, it
2320 should be parsed and methods such as is_possible_number_with_reason() and
2321 is_valid_number() should be used.
2322
2323 Arguments:
2324 number -- the number that needs to be checked
2325
2326 Returns True if the number is a valid vanity number
2327 """
2328 if not _is_viable_phone_number(number):
2329 # Number is too short, or doesn't match the basic phone number pattern.
2330 return False
2331 extension, stripped_number = _maybe_strip_extension(number)
2332 return bool(fullmatch(_VALID_ALPHA_PHONE_PATTERN, stripped_number))
2333
2334
2335def is_possible_number(numobj):
2336 """Convenience wrapper around is_possible_number_with_reason.
2337
2338 Instead of returning the reason for failure, this method returns true if
2339 the number is either a possible fully-qualified number (containing the area
2340 code and country code), or if the number could be a possible local number
2341 (with a country code, but missing an area code). Local numbers are
2342 considered possible if they could be possibly dialled in this format: if
2343 the area code is needed for a call to connect, the number is not considered
2344 possible without it.
2345
2346 Arguments:
2347 numobj -- the number object that needs to be checked
2348
2349 Returns True if the number is possible
2350
2351 """
2352 result = is_possible_number_with_reason(numobj)
2353 return (result == ValidationResult.IS_POSSIBLE or
2354 result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY)
2355
2356
2357def is_possible_number_for_type(numobj, numtype):
2358 """Convenience wrapper around is_possible_number_for_type_with_reason.
2359
2360 Instead of returning the reason for failure, this method returns true if
2361 the number is either a possible fully-qualified number (containing the area
2362 code and country code), or if the number could be a possible local number
2363 (with a country code, but missing an area code). Local numbers are
2364 considered possible if they could be possibly dialled in this format: if
2365 the area code is needed for a call to connect, the number is not considered
2366 possible without it.
2367
2368 Arguments:
2369 numobj -- the number object that needs to be checked
2370 numtype -- the type we are interested in
2371
2372 Returns True if the number is possible
2373
2374 """
2375 result = is_possible_number_for_type_with_reason(numobj, numtype)
2376 return (result == ValidationResult.IS_POSSIBLE or
2377 result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY)
2378
2379
2380def _test_number_length(national_number, metadata, numtype=PhoneNumberType.UNKNOWN):
2381 """Helper method to check a number against possible lengths for this number,
2382 and determine whether it matches, or is too short or too long.
2383 """
2384 desc_for_type = _number_desc_by_type(metadata, numtype)
2385 if desc_for_type is None:
2386 possible_lengths = metadata.general_desc.possible_length
2387 local_lengths = ()
2388 else:
2389 # There should always be "possibleLengths" set for every element. This is declared in the XML
2390 # schema which is verified by PhoneNumberMetadataSchemaTest.
2391 # For size efficiency, where a sub-description (e.g. fixed-line) has the same possibleLengths
2392 # as the parent, this is missing, so we fall back to the general desc (where no numbers of the
2393 # type exist at all, there is one possible length (-1) which is guaranteed not to match the
2394 # length of any real phone number).
2395 possible_lengths = desc_for_type.possible_length
2396 if len(possible_lengths) == 0: # pragma no cover: Python sub-descs all have possible_length
2397 possible_lengths = metadata.general_desc.possible_length
2398 local_lengths = desc_for_type.possible_length_local_only
2399
2400 if numtype == PhoneNumberType.FIXED_LINE_OR_MOBILE:
2401 if not _desc_has_possible_number_data(_number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE)):
2402 # The rare case has been encountered where no fixedLine data is available (true for some
2403 # non-geographical entities), so we just check mobile.
2404 return _test_number_length(national_number, metadata, PhoneNumberType.MOBILE)
2405 else:
2406 mobile_desc = _number_desc_by_type(metadata, PhoneNumberType.MOBILE)
2407 if _desc_has_possible_number_data(mobile_desc):
2408 # Merge the mobile data in if there was any. We have to make a copy to do this.
2409 possible_lengths = list(possible_lengths)
2410 # Note that when adding the possible lengths from mobile, we have to again check they
2411 # aren't empty since if they are this indicates they are the same as the general desc and
2412 # should be obtained from there.
2413 if len(mobile_desc.possible_length) == 0: # pragma no cover: Python sub-descs all have possible_length
2414 possible_lengths += metadata.general_desc.possible_length
2415 else:
2416 possible_lengths += mobile_desc.possible_length
2417 # The current list is sorted; we need to merge in the new list and re-sort (duplicates
2418 # are okay). Sorting isn't so expensive because the lists are very small.
2419 list.sort(possible_lengths)
2420
2421 if len(local_lengths) == 0:
2422 local_lengths = mobile_desc.possible_length_local_only
2423 else:
2424 local_lengths = list(local_lengths)
2425 local_lengths += mobile_desc.possible_length_local_only
2426 list.sort(local_lengths)
2427
2428 # If the type is not supported at all (indicated by a missing PhoneNumberDesc) we return invalid length.
2429 if desc_for_type is None:
2430 return ValidationResult.INVALID_LENGTH
2431
2432 actual_length = len(national_number)
2433 # This is safe because there is never an overlap between the possible lengths and the local-only
2434 # lengths; this is checked at build time.
2435 if actual_length in local_lengths:
2436 return ValidationResult.IS_POSSIBLE_LOCAL_ONLY
2437
2438 minimum_length = possible_lengths[0]
2439 if minimum_length == actual_length:
2440 return ValidationResult.IS_POSSIBLE
2441 elif minimum_length > actual_length:
2442 return ValidationResult.TOO_SHORT
2443 elif possible_lengths[-1] < actual_length:
2444 return ValidationResult.TOO_LONG
2445 # We skip the first element; we've already checked it.
2446 if actual_length in possible_lengths[1:]:
2447 return ValidationResult.IS_POSSIBLE
2448 else:
2449 return ValidationResult.INVALID_LENGTH
2450
2451
2452def is_possible_number_with_reason(numobj):
2453 """See documentation for is_possible_number_for_type_with_reason"""
2454 return is_possible_number_for_type_with_reason(numobj, PhoneNumberType.UNKNOWN)
2455
2456
2457def is_possible_number_for_type_with_reason(numobj, numtype):
2458 """Check whether a phone number is a possible number of a particular type.
2459
2460 For types that don't exist in a particular region, this will return a result
2461 that isn't so useful; it is recommended that you use
2462 supported_types_for_region or supported_types_for_non_geo_entity
2463 respectively before calling this method to determine whether you should call
2464 it for this number at all.
2465
2466 This provides a more lenient check than is_valid_number in the following sense:
2467
2468 - It only checks the length of phone numbers. In particular, it doesn't
2469 check starting digits of the number.
2470
2471 - For some numbers (particularly fixed-line), many regions have the
2472 concept of area code, which together with subscriber number constitute
2473 the national significant number. It is sometimes okay to dial only the
2474 subscriber number when dialing in the same area. This function will
2475 return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is
2476 passed in. On the other hand, because is_valid_number validates using
2477 information on both starting digits (for fixed line numbers, that would
2478 most likely be area codes) and length (obviously includes the length of
2479 area codes for fixed line numbers), it will return false for the
2480 subscriber-number-only version.
2481
2482 There is a known <a
2483 href="https://issuetracker.google.com/issues/335892662">issue</a> with this
2484 method: if a number is possible only in a certain region among several
2485 regions that share the same country calling code, this method will consider
2486 only the "main" region. For example, +1310xxxx are valid numbers in
2487 Canada. However, they are not possible in the US. As a result, this method
2488 will return IS_POSSIBLE_LOCAL_ONLY for +1310xxxx.
2489
2490 Arguments:
2491 numobj -- The number object that needs to be checked
2492 numtype -- The type we are interested in
2493
2494 Returns a value from ValidationResult which indicates whether the number
2495 is possible
2496 """
2497 national_number = national_significant_number(numobj)
2498 country_code = numobj.country_code
2499 # Note: For regions that share a country calling code, like NANPA numbers,
2500 # we just use the rules from the default region (US in this case) since the
2501 # region_code_for_number will not work if the number is possible but not
2502 # valid. There is in fact one country calling code (290) where the possible
2503 # number pattern differs between various regions (Saint Helena and Tristan
2504 # da Cuñha), but this is handled by putting all possible lengths for any
2505 # country with this country calling code in the metadata for the default
2506 # region in this case.
2507 if not _has_valid_country_calling_code(country_code):
2508 return ValidationResult.INVALID_COUNTRY_CODE
2509 region_code = region_code_for_country_code(country_code)
2510 # Metadata cannot be None because the country calling code is valid.
2511 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
2512 return _test_number_length(national_number, metadata, numtype)
2513
2514
2515def is_possible_number_string(number, region_dialing_from):
2516 """Check whether a phone number string is a possible number.
2517
2518 Takes a number in the form of a string, and the region where the number
2519 could be dialed from. It provides a more lenient check than
2520 is_valid_number; see is_possible_number_with_reason() for details.
2521
2522 This method first parses the number, then invokes is_possible_number with
2523 the resultant PhoneNumber object.
2524
2525 Arguments:
2526 number -- The number that needs to be checked, in the form of a string.
2527 region_dialling_from -- The region that we are expecting the number to be
2528 dialed from. Note this is different from the region where the
2529 number belongs. For example, the number +1 650 253 0000 is a
2530 number that belongs to US. When written in this form, it can be
2531 dialed from any region. When it is written as 00 1 650 253 0000,
2532 it can be dialed from any region which uses an international
2533 dialling prefix of 00. When it is written as 650 253 0000, it
2534 can only be dialed from within the US, and when written as 253
2535 0000, it can only be dialed from within a smaller area in the US
2536 (Mountain View, CA, to be more specific).
2537
2538 Returns True if the number is possible
2539 """
2540 try:
2541 return is_possible_number(parse(number, region_dialing_from))
2542 except NumberParseException:
2543 return False
2544
2545
2546def truncate_too_long_number(numobj):
2547 """Truncate a number object that is too long.
2548
2549 Attempts to extract a valid number from a phone number that is too long
2550 to be valid, and resets the PhoneNumber object passed in to that valid
2551 version. If no valid number could be extracted, the PhoneNumber object
2552 passed in will not be modified.
2553
2554 Arguments:
2555 numobj -- A PhoneNumber object which contains a number that is too long to
2556 be valid.
2557
2558 Returns True if a valid phone number can be successfully extracted.
2559 """
2560 if is_valid_number(numobj):
2561 return True
2562 numobj_copy = PhoneNumber()
2563 numobj_copy.merge_from(numobj)
2564 national_number = numobj.national_number
2565
2566 while not is_valid_number(numobj_copy):
2567 # Strip a digit off the RHS
2568 national_number = national_number // 10
2569 numobj_copy.national_number = national_number
2570 validation_result = is_possible_number_with_reason(numobj_copy)
2571 if (validation_result == ValidationResult.TOO_SHORT or
2572 national_number == 0):
2573 return False
2574 # To reach here, numobj_copy is a valid number. Modify the original object
2575 numobj.national_number = national_number
2576 return True
2577
2578
2579def _extract_country_code(number):
2580 """Extracts country calling code from number.
2581
2582 Returns a 2-tuple of (country_calling_code, rest_of_number). It assumes
2583 that the leading plus sign or IDD has already been removed. Returns (0,
2584 number) if number doesn't start with a valid country calling code.
2585 """
2586
2587 if len(number) == 0 or number[0] == U_ZERO:
2588 # Country codes do not begin with a '0'.
2589 return (0, number)
2590 for ii in range(1, min(len(number), _MAX_LENGTH_COUNTRY_CODE) + 1):
2591 try:
2592 country_code = int(number[:ii])
2593 if country_code in COUNTRY_CODE_TO_REGION_CODE:
2594 return (country_code, number[ii:])
2595 except Exception:
2596 pass
2597 return (0, number)
2598
2599
2600def _maybe_extract_country_code(number, metadata, keep_raw_input, numobj):
2601 """Tries to extract a country calling code from a number.
2602
2603 This method will return zero if no country calling code is considered to
2604 be present. Country calling codes are extracted in the following ways:
2605
2606 - by stripping the international dialing prefix of the region the person
2607 is dialing from, if this is present in the number, and looking at the
2608 next digits
2609
2610 - by stripping the '+' sign if present and then looking at the next
2611 digits
2612
2613 - by comparing the start of the number and the country calling code of
2614 the default region. If the number is not considered possible for the
2615 numbering plan of the default region initially, but starts with the
2616 country calling code of this region, validation will be reattempted
2617 after stripping this country calling code. If this number is considered
2618 a possible number, then the first digits will be considered the country
2619 calling code and removed as such.
2620
2621 It will raise a NumberParseException if the number starts with a '+' but
2622 the country calling code supplied after this does not match that of any
2623 known region.
2624
2625 Arguments:
2626 number -- non-normalized telephone number that we wish to extract a
2627 country calling code from; may begin with '+'
2628 metadata -- metadata about the region this number may be from, or None
2629 keep_raw_input -- True if the country_code_source and
2630 preferred_carrier_code fields of numobj should be populated.
2631 numobj -- The PhoneNumber object where the country_code and
2632 country_code_source need to be populated. Note the country_code
2633 is always populated, whereas country_code_source is only
2634 populated when keep_raw_input is True.
2635
2636 Returns a 2-tuple containing:
2637 - the country calling code extracted or 0 if none could be extracted
2638 - a string holding the national significant number, in the case
2639 that a country calling code was extracted. If no country calling code
2640 was extracted, this will be empty.
2641 """
2642 if len(number) == 0:
2643 return (0, U_EMPTY_STRING)
2644 full_number = number
2645 # Set the default prefix to be something that will never match.
2646 possible_country_idd_prefix = unicod("NonMatch")
2647 if metadata is not None and metadata.international_prefix is not None:
2648 possible_country_idd_prefix = metadata.international_prefix
2649
2650 country_code_source, full_number = _maybe_strip_i18n_prefix_and_normalize(full_number,
2651 possible_country_idd_prefix)
2652 if keep_raw_input:
2653 numobj.country_code_source = country_code_source
2654
2655 if country_code_source != CountryCodeSource.FROM_DEFAULT_COUNTRY:
2656 if len(full_number) <= _MIN_LENGTH_FOR_NSN:
2657 raise NumberParseException(NumberParseException.TOO_SHORT_AFTER_IDD,
2658 "Phone number had an IDD, but after this was not " +
2659 "long enough to be a viable phone number.")
2660 potential_country_code, rest_of_number = _extract_country_code(full_number)
2661 if potential_country_code != 0:
2662 numobj.country_code = potential_country_code
2663 return (potential_country_code, rest_of_number)
2664
2665 # If this fails, they must be using a strange country calling code
2666 # that we don't recognize, or that doesn't exist.
2667 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2668 "Country calling code supplied was not recognised.")
2669 elif metadata is not None:
2670 # Check to see if the number starts with the country calling code for
2671 # the default region. If so, we remove the country calling code, and
2672 # do some checks on the validity of the number before and after.
2673 default_country_code = metadata.country_code
2674 default_country_code_str = str(metadata.country_code)
2675 normalized_number = full_number
2676 if normalized_number.startswith(default_country_code_str):
2677 potential_national_number = full_number[len(default_country_code_str):]
2678 general_desc = metadata.general_desc
2679 _, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number,
2680 metadata)
2681
2682 # If the number was not valid before but is valid now, or if it
2683 # was too long before, we consider the number with the country
2684 # calling code stripped to be a better result and keep that
2685 # instead.
2686 if ((not _match_national_number(full_number, general_desc, False) and
2687 _match_national_number(potential_national_number, general_desc, False)) or
2688 (_test_number_length(full_number, metadata) == ValidationResult.TOO_LONG)):
2689 if keep_raw_input:
2690 numobj.country_code_source = CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN
2691 numobj.country_code = default_country_code
2692 return (default_country_code, potential_national_number)
2693
2694 # No country calling code present.
2695 numobj.country_code = 0
2696 return (0, U_EMPTY_STRING)
2697
2698
2699def _parse_prefix_as_idd(idd_pattern, number):
2700 """Strips the IDD from the start of the number if present.
2701
2702 Helper function used by _maybe_strip_i18n_prefix_and_normalize().
2703
2704 Returns a 2-tuple:
2705 - Boolean indicating if IDD was stripped
2706 - Number with IDD stripped
2707 """
2708 match = idd_pattern.match(number)
2709 if match:
2710 match_end = match.end()
2711 # Only strip this if the first digit after the match is not a 0, since
2712 # country calling codes cannot begin with 0.
2713 digit_match = _CAPTURING_DIGIT_PATTERN.search(number[match_end:])
2714 if digit_match:
2715 normalized_group = normalize_digits_only(digit_match.group(1))
2716 if normalized_group == U_ZERO:
2717 return (False, number)
2718 return (True, number[match_end:])
2719 return (False, number)
2720
2721
2722def _maybe_strip_i18n_prefix_and_normalize(number, possible_idd_prefix):
2723 """Strips any international prefix (such as +, 00, 011) present in the
2724 number provided, normalizes the resulting number, and indicates if an
2725 international prefix was present.
2726
2727 Arguments:
2728 number -- The non-normalized telephone number that we wish to strip any international
2729 dialing prefix from.
2730 possible_idd_prefix -- The international direct dialing prefix from the region we
2731 think this number may be dialed in.
2732
2733 Returns a 2-tuple containing:
2734 - The corresponding CountryCodeSource if an international dialing prefix
2735 could be removed from the number, otherwise
2736 CountryCodeSource.FROM_DEFAULT_COUNTRY if the number did not seem to
2737 be in international format.
2738 - The number with the prefix stripped.
2739 """
2740 if len(number) == 0:
2741 return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number)
2742 # Check to see if the number begins with one or more plus signs.
2743 m = _PLUS_CHARS_PATTERN.match(number)
2744 if m:
2745 number = number[m.end():]
2746 # Can now normalize the rest of the number since we've consumed the
2747 # "+" sign at the start.
2748 return (CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN,
2749 _normalize(number))
2750
2751 # Attempt to parse the first digits as an international prefix.
2752 idd_pattern = re.compile(possible_idd_prefix)
2753 number = _normalize(number)
2754 stripped, number = _parse_prefix_as_idd(idd_pattern, number)
2755 if stripped:
2756 return (CountryCodeSource.FROM_NUMBER_WITH_IDD, number)
2757 else:
2758 return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number)
2759
2760
2761def _maybe_strip_national_prefix_carrier_code(number, metadata):
2762 """Strips any national prefix (such as 0, 1) present in a number.
2763
2764 Arguments:
2765 number -- The normalized telephone number that we wish to strip any
2766 national dialing prefix from
2767 metadata -- The metadata for the region that we think this number
2768 is from.
2769
2770 Returns a 3-tuple of
2771 - The carrier code extracted if it is present, otherwise an empty string.
2772 - The number with the prefix stripped.
2773 - Boolean indicating if a national prefix or carrier code (or both) could be extracted.
2774 """
2775 carrier_code = U_EMPTY_STRING
2776 possible_national_prefix = metadata.national_prefix_for_parsing
2777 if (len(number) == 0 or
2778 possible_national_prefix is None or
2779 len(possible_national_prefix) == 0):
2780 # Early return for numbers of zero length.
2781 return (U_EMPTY_STRING, number, False)
2782
2783 # Attempt to parse the first digits as a national prefix.
2784 prefix_pattern = re.compile(possible_national_prefix)
2785 prefix_match = prefix_pattern.match(number)
2786 if prefix_match:
2787 general_desc = metadata.general_desc
2788 # Check if the original number is viable.
2789 is_viable_original_number = _match_national_number(number, general_desc, False)
2790 # prefix_match.groups() == () implies nothing was captured by the
2791 # capturing groups in possible_national_prefix; therefore, no
2792 # transformation is necessary, and we just remove the national prefix.
2793 num_groups = len(prefix_match.groups())
2794 transform_rule = metadata.national_prefix_transform_rule
2795 if (transform_rule is None or
2796 len(transform_rule) == 0 or
2797 prefix_match.groups()[num_groups - 1] is None):
2798 # If the original number was viable, and the resultant number is not, we return.
2799 # Check that the resultant number is viable. If not, return.
2800 national_number_match = _match_national_number(number[prefix_match.end():], general_desc, False)
2801 if (is_viable_original_number and not national_number_match):
2802 return (U_EMPTY_STRING, number, False)
2803
2804 if (num_groups > 0 and
2805 prefix_match.groups(num_groups) is not None):
2806 carrier_code = prefix_match.group(1)
2807 return (carrier_code, number[prefix_match.end():], True)
2808 else:
2809 # Check that the resultant number is still viable. If not,
2810 # return. Check this by copying the number and making the
2811 # transformation on the copy first.
2812 transformed_number = re.sub(prefix_pattern, transform_rule, number, count=1)
2813 national_number_match = _match_national_number(transformed_number, general_desc, False)
2814 if (is_viable_original_number and not national_number_match):
2815 return ("", number, False)
2816 if num_groups > 1:
2817 carrier_code = prefix_match.group(1)
2818 return (carrier_code, transformed_number, True)
2819 else:
2820 return (carrier_code, number, False)
2821
2822
2823def _maybe_strip_extension(number):
2824 """Strip extension from the end of a number string.
2825
2826 Strips any extension (as in, the part of the number dialled after the
2827 call is connected, usually indicated with extn, ext, x or similar) from
2828 the end of the number, and returns it.
2829
2830 Arguments:
2831 number -- the non-normalized telephone number that we wish to strip the extension from.
2832
2833 Returns a 2-tuple of:
2834 - the phone extension (or "" or not present)
2835 - the number before the extension.
2836 """
2837 match = _EXTN_PATTERN.search(number)
2838 # If we find a potential extension, and the number preceding this is a
2839 # viable number, we assume it is an extension.
2840 if match and _is_viable_phone_number(number[:match.start()]):
2841 # The numbers are captured into groups in the regular expression.
2842 for group in match.groups():
2843 # We go through the capturing groups until we find one that
2844 # captured some digits. If none did, then we will return the empty
2845 # string.
2846 if group is not None:
2847 return (group, number[:match.start()])
2848 return ("", number)
2849
2850
2851def _check_region_for_parsing(number, default_region):
2852 """Checks to see that the region code used is valid, or if it is not
2853 valid, that the number to parse starts with a + symbol so that we can
2854 attempt to infer the region from the number. Returns False if it cannot
2855 use the region provided and the region cannot be inferred.
2856 """
2857 if not _is_valid_region_code(default_region):
2858 # If the number is None or empty, we can't infer the region.
2859 if number is None or len(number) == 0:
2860 return False
2861 match = _PLUS_CHARS_PATTERN.match(number)
2862 if match is None:
2863 return False
2864 return True
2865
2866
2867def _set_italian_leading_zeros_for_phone_number(national_number, numobj):
2868 """A helper function to set the values related to leading zeros in a
2869 PhoneNumber."""
2870 if len(national_number) > 1 and national_number[0] == U_ZERO:
2871 numobj.italian_leading_zero = True
2872 number_of_leading_zeros = 1
2873 # Note that if the number is all "0"s, the last "0" is not counted as
2874 # a leading zero.
2875 while (number_of_leading_zeros < len(national_number) - 1 and
2876 national_number[number_of_leading_zeros] == U_ZERO):
2877 number_of_leading_zeros += 1
2878 if number_of_leading_zeros != 1:
2879 numobj.number_of_leading_zeros = number_of_leading_zeros
2880
2881
2882def parse(number, region=None, keep_raw_input=False,
2883 numobj=None, _check_region=True):
2884 """Parse a string and return a corresponding PhoneNumber object.
2885
2886 The method is quite lenient and looks for a number in the input text
2887 (raw input) and does not check whether the string is definitely only a
2888 phone number. To do this, it ignores punctuation and white-space, as
2889 well as any text before the number (e.g. a leading "Tel: ") and trims
2890 the non-number bits. It will accept a number in any format (E164,
2891 national, international etc), assuming it can be interpreted with the
2892 defaultRegion supplied. It also attempts to convert any alpha characters
2893 into digits if it thinks this is a vanity number of the type "1800
2894 MICROSOFT".
2895
2896 This method will throw a NumberParseException if the number is not
2897 considered to be a possible number. Note that validation of whether the
2898 number is actually a valid number for a particular region is not
2899 performed. This can be done separately with is_valid_number.
2900
2901 Note this method canonicalizes the phone number such that different
2902 representations can be easily compared, no matter what form it was
2903 originally entered in (e.g. national, international). If you want to
2904 record context about the number being parsed, such as the raw input that
2905 was entered, how the country code was derived etc. then ensure
2906 keep_raw_input is set.
2907
2908 Note if any new field is added to this method that should always be filled
2909 in, even when keep_raw_input is False, it should also be handled in the
2910 _copy_core_fields_only() function.
2911
2912 Arguments:
2913 number -- The number that we are attempting to parse. This can
2914 contain formatting such as +, ( and -, as well as a phone
2915 number extension. It can also be provided in RFC3966 format.
2916 region -- The region that we are expecting the number to be from. This
2917 is only used if the number being parsed is not written in
2918 international format. The country_code for the number in
2919 this case would be stored as that of the default region
2920 supplied. If the number is guaranteed to start with a '+'
2921 followed by the country calling code, then None or
2922 UNKNOWN_REGION can be supplied.
2923 keep_raw_input -- Whether to populate the raw_input field of the
2924 PhoneNumber object with number (as well as the
2925 country_code_source field).
2926 numobj -- An optional existing PhoneNumber object to receive the
2927 parsing results
2928 _check_region -- Whether to check the supplied region parameter;
2929 should always be True for external callers.
2930
2931 Returns a PhoneNumber object filled with the parse number.
2932
2933 Raises:
2934 NumberParseException if the string is not considered to be a viable
2935 phone number (e.g. too few or too many digits) or if no default
2936 region was supplied and the number is not in international format
2937 (does not start with +).
2938
2939 """
2940 if numobj is None:
2941 numobj = PhoneNumber()
2942 if number is None:
2943 raise NumberParseException(NumberParseException.NOT_A_NUMBER,
2944 "The phone number supplied was None.")
2945 elif len(number) > _MAX_INPUT_STRING_LENGTH:
2946 raise NumberParseException(NumberParseException.TOO_LONG,
2947 "The string supplied was too long to parse.")
2948
2949 national_number = _build_national_number_for_parsing(number)
2950
2951 if not _is_viable_phone_number(national_number):
2952 raise NumberParseException(NumberParseException.NOT_A_NUMBER,
2953 "The string supplied did not seem to be a phone number.")
2954
2955 # Check the region supplied is valid, or that the extracted number starts
2956 # with some sort of + sign so the number's region can be determined.
2957 if _check_region and not _check_region_for_parsing(national_number, region):
2958 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2959 "Missing or invalid default region.")
2960 if keep_raw_input:
2961 numobj.raw_input = number
2962
2963 # Attempt to parse extension first, since it doesn't require
2964 # region-specific data and we want to have the non-normalised number here.
2965 extension, national_number = _maybe_strip_extension(national_number)
2966 if len(extension) > 0:
2967 numobj.extension = extension
2968 if region is None:
2969 metadata = None
2970 else:
2971 metadata = PhoneMetadata.metadata_for_region(region.upper(), None)
2972
2973 country_code = 0
2974 try:
2975 country_code, normalized_national_number = _maybe_extract_country_code(national_number,
2976 metadata,
2977 keep_raw_input,
2978 numobj)
2979 except NumberParseException:
2980 _, e, _ = sys.exc_info()
2981 matchobj = _PLUS_CHARS_PATTERN.match(national_number)
2982 if (e.error_type == NumberParseException.INVALID_COUNTRY_CODE and
2983 matchobj is not None):
2984 # Strip the plus-char, and try again.
2985 country_code, normalized_national_number = _maybe_extract_country_code(national_number[matchobj.end():],
2986 metadata,
2987 keep_raw_input,
2988 numobj)
2989 if country_code == 0:
2990 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2991 "Could not interpret numbers after plus-sign.")
2992 else:
2993 raise
2994
2995 if country_code != 0:
2996 number_region = region_code_for_country_code(country_code)
2997 if number_region != region:
2998 # Metadata cannot be None because the country calling code is valid.
2999 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, number_region)
3000 assert metadata is not None
3001 else:
3002 # If no extracted country calling code, use the region supplied
3003 # instead. The national number is just the normalized version of the
3004 # number we were given to parse.
3005 normalized_national_number += _normalize(national_number)
3006 if region is not None:
3007 country_code = metadata.country_code
3008 numobj.country_code = country_code
3009 elif keep_raw_input:
3010 numobj.country_code_source = CountryCodeSource.UNSPECIFIED
3011
3012 if len(normalized_national_number) < _MIN_LENGTH_FOR_NSN:
3013 raise NumberParseException(NumberParseException.TOO_SHORT_NSN,
3014 "The string supplied is too short to be a phone number.")
3015 if metadata is not None:
3016 potential_national_number = normalized_national_number
3017 carrier_code, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number,
3018 metadata)
3019 # We require that the NSN remaining after stripping the national
3020 # prefix and carrier code be long enough to be a possible length for
3021 # the region. Otherwise, we don't do the stripping, since the original
3022 # number could be a valid short number.
3023 validation_result = _test_number_length(potential_national_number, metadata)
3024 if validation_result not in (ValidationResult.TOO_SHORT,
3025 ValidationResult.IS_POSSIBLE_LOCAL_ONLY,
3026 ValidationResult.INVALID_LENGTH):
3027 normalized_national_number = potential_national_number
3028 if keep_raw_input and carrier_code is not None and len(carrier_code) > 0:
3029 numobj.preferred_domestic_carrier_code = carrier_code
3030 len_national_number = len(normalized_national_number)
3031 if len_national_number < _MIN_LENGTH_FOR_NSN: # pragma no cover
3032 # Check of _is_viable_phone_number() at the top of this function makes
3033 # this effectively unhittable.
3034 raise NumberParseException(NumberParseException.TOO_SHORT_NSN,
3035 "The string supplied is too short to be a phone number.")
3036 if len_national_number > _MAX_LENGTH_FOR_NSN:
3037 raise NumberParseException(NumberParseException.TOO_LONG,
3038 "The string supplied is too long to be a phone number.")
3039 _set_italian_leading_zeros_for_phone_number(normalized_national_number, numobj)
3040 numobj.national_number = to_long(normalized_national_number)
3041 return numobj
3042
3043
3044def _extract_phone_context(number_to_extract_from, index_of_phone_context):
3045 """Extracts the value of the phone-context parameter of number_to_extract_from where the index of
3046 ";phone-context=" is the parameter index_of_phone_context, following the syntax defined in
3047 RFC3966.
3048
3049 Returns the extracted string (possibly empty), or None if no phone-context parameter is found."""
3050 # If no phone-context parameter is present
3051 if index_of_phone_context == -1:
3052 return None
3053
3054 phone_context_start = index_of_phone_context + len(_RFC3966_PHONE_CONTEXT)
3055 # If phone-context parameter is empty
3056 if phone_context_start >= len(number_to_extract_from):
3057 return U_EMPTY_STRING
3058
3059 phone_context_end = number_to_extract_from.find(';', phone_context_start)
3060 # If phone-context is not the last parameter
3061 if phone_context_end != -1:
3062 return number_to_extract_from[phone_context_start:phone_context_end]
3063 else:
3064 return number_to_extract_from[phone_context_start:]
3065
3066
3067def _is_phone_context_valid(phone_context):
3068 """"Returns whether the value of phoneContext follows the syntax defined in RFC3966."""
3069 if phone_context is None:
3070 return True
3071 if len(phone_context) == 0:
3072 return False
3073
3074 # Does phone-context value match pattern of global-number-digits or domainname
3075 return (fullmatch(_RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN, phone_context) or
3076 fullmatch(_RFC3966_DOMAINNAME_PATTERN, phone_context))
3077
3078
3079def _build_national_number_for_parsing(number):
3080 """Converts number to a form that we can parse and return it if it is
3081 written in RFC3966; otherwise extract a possible number out of it and return it."""
3082 index_of_phone_context = number.find(_RFC3966_PHONE_CONTEXT)
3083
3084 phone_context = _extract_phone_context(number, index_of_phone_context)
3085 if not _is_phone_context_valid(phone_context):
3086 raise NumberParseException(NumberParseException.NOT_A_NUMBER, "The phone-context value is invalid")
3087 if phone_context is not None:
3088 # If the phone context contains a phone number prefix, we need to
3089 # capture it, whereas domains will be ignored.
3090 if phone_context[0] == _PLUS_SIGN:
3091 # Additional parameters might follow the phone context. If so, we
3092 # will remove them here because the parameters after phone context
3093 # are not important for parsing the phone number.
3094 national_number = phone_context
3095 else:
3096 national_number = U_EMPTY_STRING
3097
3098 # Now append everything between the "tel:" prefix and the
3099 # phone-context. This should include the national number, an optional
3100 # extension or isdn-subaddress component. Note we also handle the case
3101 # when "tel:" is missing, as we have seen in some of the phone number
3102 # inputs. In that case we append everything from the beginning.
3103 index_of_rfc3996_prefix = number.find(_RFC3966_PREFIX)
3104 index_of_national_number = ((index_of_rfc3996_prefix + len(_RFC3966_PREFIX))
3105 if (index_of_rfc3996_prefix >= 0) else 0)
3106 national_number += number[index_of_national_number:index_of_phone_context]
3107 else:
3108 # Extract a possible number from the string passed in (this strips leading characters that
3109 # could not be the start of a phone number.)
3110 national_number = _extract_possible_number(number)
3111
3112 # Delete the isdn-subaddress and everything after it if it is
3113 # present. Note extension won't appear at the same time with
3114 # isdn-subaddress according to paragraph 5.3 of the RFC3966 spec,
3115 index_of_isdn = national_number.find(_RFC3966_ISDN_SUBADDRESS)
3116 if index_of_isdn > 0:
3117 national_number = national_number[:index_of_isdn]
3118 # If both phone context and isdn-subaddress are absent but other
3119 # parameters are present, the parameters are left in national_number. This
3120 # is because we are concerned about deleting content from a potential
3121 # number string when there is no strong evidence that the number is
3122 # actually written in RFC3966.
3123 return national_number
3124
3125
3126def _copy_core_fields_only(inobj):
3127 """Returns a new phone number containing only the fields needed to uniquely
3128 identify a phone number, rather than any fields that capture the context in
3129 which the phone number was created.
3130 """
3131 numobj = PhoneNumber()
3132 numobj.country_code = inobj.country_code
3133 numobj.national_number = inobj.national_number
3134 if inobj.extension is not None and len(inobj.extension) > 0:
3135 numobj.extension = inobj.extension
3136 if inobj.italian_leading_zero:
3137 numobj.italian_leading_zero = True
3138 # This field is only relevant if there are leading zeros at all.
3139 numobj.number_of_leading_zeros = inobj.number_of_leading_zeros
3140 if numobj.number_of_leading_zeros is None:
3141 # No number set is implicitly a count of 1; make it explicit.
3142 numobj.number_of_leading_zeros = 1
3143 return numobj
3144
3145
3146def _is_number_match_OO(numobj1_in, numobj2_in):
3147 """Takes two phone number objects and compares them for equality."""
3148 # We only care about the fields that uniquely define a number, so we copy these across explicitly.
3149 numobj1 = _copy_core_fields_only(numobj1_in)
3150 numobj2 = _copy_core_fields_only(numobj2_in)
3151
3152 # Early exit if both had extensions and these are different.
3153 if (numobj1.extension is not None and
3154 numobj2.extension is not None and
3155 numobj1.extension != numobj2.extension):
3156 return MatchType.NO_MATCH
3157
3158 country_code1 = numobj1.country_code
3159 country_code2 = numobj2.country_code
3160 # Both had country_code specified.
3161 if country_code1 != 0 and country_code2 != 0:
3162 if numobj1 == numobj2:
3163 return MatchType.EXACT_MATCH
3164 elif (country_code1 == country_code2 and
3165 _is_national_number_suffix_of_other(numobj1, numobj2)):
3166 # A SHORT_NSN_MATCH occurs if there is a difference because of the
3167 # presence or absence of an 'Italian leading zero', the presence
3168 # or absence of an extension, or one NSN being a shorter variant
3169 # of the other.
3170 return MatchType.SHORT_NSN_MATCH
3171 # This is not a match.
3172 return MatchType.NO_MATCH
3173
3174 # Checks cases where one or both country_code fields were not
3175 # specified. To make equality checks easier, we first set the country_code
3176 # fields to be equal.
3177 numobj1.country_code = country_code2
3178 # If all else was the same, then this is an NSN_MATCH.
3179 if numobj1 == numobj2:
3180 return MatchType.NSN_MATCH
3181 if _is_national_number_suffix_of_other(numobj1, numobj2):
3182 return MatchType.SHORT_NSN_MATCH
3183 return MatchType.NO_MATCH
3184
3185
3186def _is_national_number_suffix_of_other(numobj1, numobj2):
3187 """Returns true when one national number is the suffix of the other or both
3188 are the same.
3189 """
3190 nn1 = str(numobj1.national_number)
3191 nn2 = str(numobj2.national_number)
3192 # Note that endswith returns True if the numbers are equal.
3193 return nn1.endswith(nn2) or nn2.endswith(nn1)
3194
3195
3196def _is_number_match_SS(number1, number2):
3197 """Takes two phone numbers as strings and compares them for equality.
3198
3199 This is a convenience wrapper for _is_number_match_OO/_is_number_match_OS.
3200 No default region is known.
3201 """
3202 try:
3203 numobj1 = parse(number1, UNKNOWN_REGION)
3204 return _is_number_match_OS(numobj1, number2)
3205 except NumberParseException:
3206 _, exc, _ = sys.exc_info()
3207 if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3208 try:
3209 numobj2 = parse(number2, UNKNOWN_REGION)
3210 return _is_number_match_OS(numobj2, number1)
3211 except NumberParseException:
3212 _, exc2, _ = sys.exc_info()
3213 if exc2.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3214 try:
3215 numobj1 = parse(number1, None, keep_raw_input=False,
3216 _check_region=False, numobj=None)
3217 numobj2 = parse(number2, None, keep_raw_input=False,
3218 _check_region=False, numobj=None)
3219 return _is_number_match_OO(numobj1, numobj2)
3220 except NumberParseException:
3221 return MatchType.NOT_A_NUMBER
3222
3223 # One or more of the phone numbers we are trying to match is not a viable
3224 # phone number.
3225 return MatchType.NOT_A_NUMBER
3226
3227
3228def _is_number_match_OS(numobj1, number2):
3229 """Wrapper variant of _is_number_match_OO that copes with one
3230 PhoneNumber object and one string."""
3231 # First see if the second number has an implicit country calling code, by
3232 # attempting to parse it.
3233 try:
3234 numobj2 = parse(number2, UNKNOWN_REGION)
3235 return _is_number_match_OO(numobj1, numobj2)
3236 except NumberParseException:
3237 _, exc, _ = sys.exc_info()
3238 if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3239 # The second number has no country calling code. EXACT_MATCH is no
3240 # longer possible. We parse it as if the region was the same as
3241 # that for the first number, and if EXACT_MATCH is returned, we
3242 # replace this with NSN_MATCH.
3243 region1 = region_code_for_country_code(numobj1.country_code)
3244 try:
3245 if region1 != UNKNOWN_REGION:
3246 numobj2 = parse(number2, region1)
3247 match = _is_number_match_OO(numobj1, numobj2)
3248 if match == MatchType.EXACT_MATCH:
3249 return MatchType.NSN_MATCH
3250 else:
3251 return match
3252 else:
3253 # If the first number didn't have a valid country calling
3254 # code, then we parse the second number without one as
3255 # well.
3256 numobj2 = parse(number2, None, keep_raw_input=False,
3257 _check_region=False, numobj=None)
3258 return _is_number_match_OO(numobj1, numobj2)
3259 except NumberParseException:
3260 return MatchType.NOT_A_NUMBER
3261 # One or more of the phone numbers we are trying to match is not a viable
3262 # phone number.
3263 return MatchType.NOT_A_NUMBER
3264
3265
3266def is_number_match(num1, num2):
3267 """Takes two phone numbers and compares them for equality.
3268
3269 For example, the numbers +1 345 657 1234 and 657 1234 are a SHORT_NSN_MATCH.
3270 The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
3271
3272 Arguments
3273 num1 -- First number object or string to compare. Can contain formatting,
3274 and can have country calling code specified with + at the start.
3275 num2 -- Second number object or string to compare. Can contain formatting,
3276 and can have country calling code specified with + at the start.
3277
3278 Returns:
3279 - EXACT_MATCH if the country_code, NSN, presence of a leading zero for
3280 Italian numbers and any extension present are the same.
3281 - NSN_MATCH if either or both has no region specified, and the NSNs and
3282 extensions are the same.
3283 - SHORT_NSN_MATCH if either or both has no region specified, or the
3284 region specified is the same, and one NSN could be a shorter version of
3285 the other number. This includes the case where one has an extension
3286 specified, and the other does not.
3287 - NO_MATCH otherwise.
3288 """
3289 if isinstance(num1, PhoneNumber) and isinstance(num2, PhoneNumber):
3290 return _is_number_match_OO(num1, num2)
3291 elif isinstance(num1, PhoneNumber):
3292 return _is_number_match_OS(num1, num2)
3293 elif isinstance(num2, PhoneNumber):
3294 return _is_number_match_OS(num2, num1)
3295 else:
3296 return _is_number_match_SS(num1, num2)
3297
3298
3299def can_be_internationally_dialled(numobj):
3300 """Returns True if the number can only be dialled from outside the region,
3301 or unknown.
3302
3303 If the number can only be dialled from within the region
3304 as well, returns False. Does not check the number is a valid number.
3305 Note that, at the moment, this method does not handle short numbers (which
3306 are currently all presumed to not be diallable from outside their country).
3307
3308 Arguments:
3309 numobj -- the phone number objectfor which we want to know whether it is
3310 diallable from outside the region.
3311 """
3312 metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None)
3313 if metadata is None:
3314 # Note numbers belonging to non-geographical entities (e.g. +800
3315 # numbers) are always internationally diallable, and will be caught
3316 # here.
3317 return True
3318 nsn = national_significant_number(numobj)
3319 return not _is_number_matching_desc(nsn, metadata.no_international_dialling)
3320
3321
3322def is_mobile_number_portable_region(region_code):
3323 """Returns true if the supplied region supports mobile number portability.
3324 Returns false for invalid, unknown or regions that don't support mobile
3325 number portability.
3326
3327 Arguments:
3328 region_code -- the region for which we want to know whether it supports mobile number
3329 portability or not.
3330 """
3331 metadata = PhoneMetadata.metadata_for_region(region_code, None)
3332 if metadata is None:
3333 return False
3334 return metadata.mobile_number_portable_region
3335
3336
3337class NumberParseException(UnicodeMixin, Exception):
3338 """Exception when attempting to parse a putative phone number"""
3339
3340 # The reason a string could not be interpreted as a phone number.
3341
3342 # The country code supplied did not belong to a supported country or
3343 # non-geographical entity.
3344 INVALID_COUNTRY_CODE = 0
3345
3346 # This generally indicates the string passed in had fewer than 3 digits in
3347 # it. The number failed to match the regular expression
3348 # _VALID_PHONE_NUMBER in phonenumberutil.py.
3349
3350 # This indicates the string passed is not a valid number. Either the string
3351 # had less than 3 digits in it or had an invalid phone-context
3352 # parameter. More specifically, the number failed to match the regular
3353 # expression _VALID_PHONE_NUMBER, )RFC3966_GLOBAL_NUMBER_DIGITS, or
3354 # _RFC3966_DOMAINNAME in phonenumberutil.py.
3355 NOT_A_NUMBER = 1
3356
3357 # This indicates the string started with an international dialing prefix,
3358 # but after this was removed, it had fewer digits than any valid phone
3359 # number (including country code) could have.
3360 TOO_SHORT_AFTER_IDD = 2
3361
3362 # This indicates the string, after any country code has been stripped,
3363 # had fewer digits than any valid phone number could have.
3364 TOO_SHORT_NSN = 3
3365
3366 # This indicates the string had more digits than any valid phone number
3367 # could have
3368 TOO_LONG = 4
3369
3370 def __init__(self, error_type, msg):
3371 Exception.__init__(self, msg)
3372 self.error_type = error_type
3373 self._msg = msg
3374
3375 def __reduce__(self):
3376 return (type(self), (self.error_type, self._msg))
3377
3378 def __unicode__(self):
3379 return unicod("(%s) %s") % (self.error_type, self._msg)
3380
3381
3382def _match_national_number(number, number_desc, allow_prefix_match):
3383 """Returns whether the given national number (a string containing only decimal digits) matches
3384 the national number pattern defined in the given PhoneNumberDesc object.
3385 """
3386 # We don't want to consider it a prefix match when matching non-empty input against an empty
3387 # pattern.
3388 if number_desc is None or number_desc.national_number_pattern is None or len(number_desc.national_number_pattern) == 0:
3389 return False
3390 return _match(number, re.compile(number_desc.national_number_pattern), allow_prefix_match)
3391
3392
3393def _match(number, pattern, allow_prefix_match):
3394 if not pattern.match(number):
3395 return False
3396 else:
3397 if fullmatch(pattern, number):
3398 return True
3399 else:
3400 return allow_prefix_match