1# $Id$
2# Author: Adam Turner.
3# Copyright: This module is placed in the public domain
4# or under the `Zero Clause BSD licence`_,
5# whichever is more permissive.
6#
7# .. _Zero Clause BSD licence: https://opensource.org/license/0BSD
8
9"""Conversion between integers and roman numerals."""
10
11from __future__ import annotations
12
13import sys
14from typing import TYPE_CHECKING, final
15
16if TYPE_CHECKING:
17 from typing import Final, Self
18
19__all__ = (
20 'MIN',
21 'MAX',
22 'RomanNumeral',
23 'OutOfRangeError',
24 'InvalidRomanNumeralError',
25)
26
27MIN: Final = 1
28"""The value of the smallest well-formed roman numeral."""
29
30# Note that 4,999 (MMMMCMXCIX) breaks one of the rules of Roman numerals,
31# that the same character may not appear more than thrice consecutively,
32# meaning the largest 'well-formed' Roman numeral is 3,999 (MMMCMXCIX).
33# We use 4,999 for backwards compatibility reasons.
34MAX: Final = 4_999
35"""The value of the largest well-formed roman numeral."""
36
37
38class OutOfRangeError(TypeError):
39 """Number out of range (must be between 1 and 4,999)."""
40
41
42class InvalidRomanNumeralError(ValueError):
43 """Not a valid Roman numeral."""
44
45 def __init__(self, value, *args):
46 msg = f'Invalid Roman numeral: {value}'
47 super().__init__(msg, *args)
48
49
50@final
51class RomanNumeral:
52 """A Roman numeral.
53
54 Only values between 1 and 4,999 are valid.
55 Stores the value internally as an ``int``.
56
57 >>> answer = RomanNumeral(42)
58 >>> print(answer.to_uppercase())
59 XLII
60 """
61 __slots__ = ('_value',)
62 _value: int
63
64 def __init__(self, value: int, /) -> None:
65 if not isinstance(value, int):
66 msg = 'RomanNumeral: an integer is required, not %r'
67 raise TypeError(msg % type(value).__qualname__)
68 if value < MIN or value > MAX:
69 msg = 'Number out of range (must be between 1 and 4,999). Got %s.'
70 raise OutOfRangeError(msg % value)
71 super().__setattr__('_value', value)
72
73 def __int__(self) -> int:
74 return self._value
75
76 def __str__(self) -> str:
77 return self.to_uppercase()
78
79 def __repr__(self):
80 return f'{self.__class__.__name__}({self._value!r})'
81
82 def __eq__(self, other):
83 if isinstance(other, RomanNumeral):
84 return self._value == other._value
85 return NotImplemented
86
87 def __lt__(self, other):
88 if isinstance(other, RomanNumeral):
89 return self._value < other._value
90 return NotImplemented
91
92 def __hash__(self):
93 return hash(self._value)
94
95 def __setattr__(self, key, value):
96 if key == '_value':
97 raise AttributeError('Cannot set the value attribute.')
98 super().__setattr__(key, value)
99
100 def to_uppercase(self) -> str:
101 """Converts a ``RomanNumeral`` to an uppercase string.
102
103 >>> answer = RomanNumeral(42)
104 >>> assert answer.to_uppercase() == 'XLII'
105 """
106 out = []
107 n = int(self)
108 for value, name, _ in _ROMAN_NUMERAL_PREFIXES:
109 while n >= value:
110 n -= value
111 out.append(name)
112 return ''.join(out)
113
114 def to_lowercase(self) -> str:
115 """Converts a ``RomanNumeral`` to a lowercase string.
116
117 >>> answer = RomanNumeral(42)
118 >>> assert answer.to_lowercase() == 'xlii'
119 """
120 out = []
121 n = int(self)
122 for value, _, name in _ROMAN_NUMERAL_PREFIXES:
123 while n >= value:
124 n -= value
125 out.append(name)
126 return ''.join(out)
127
128 @classmethod
129 def from_string(self, string: str, /) -> Self:
130 """Creates a ``RomanNumeral`` from a well-formed string representation.
131
132 Returns ``RomanNumeral`` or raises ``InvalidRomanNumeralError``.
133
134 >>> answer = RomanNumeral.from_string('XLII')
135 >>> assert int(answer) == 42
136 """
137 # Not an empty string.
138 if not string or not isinstance(string, str):
139 raise InvalidRomanNumeralError(string)
140
141 # ASCII-only uppercase string.
142 if string.isascii() and string.isupper():
143 chars = string.encode('ascii')
144 elif string.isascii() and string.islower():
145 chars = string.upper().encode('ascii')
146 else:
147 # Either Non-ASCII or mixed-case ASCII.
148 raise InvalidRomanNumeralError(string)
149
150 # ASCII-only uppercase string only containing I, V, X, L, C, D, M.
151 if not frozenset(b'IVXLCDM').issuperset(chars):
152 raise InvalidRomanNumeralError(string)
153
154 result: int = 0
155 idx: int = 0
156
157 # Thousands: between 0 and 4 "M" characters at the start
158 for _ in range(4):
159 if chars[idx:idx + 1] == b'M':
160 result += 1000
161 idx += 1
162 else:
163 break
164 if len(chars) == idx:
165 return RomanNumeral(result)
166
167 # Hundreds: 900 ("CM"), 400 ("CD"), 0-300 (0 to 3 "C" chars),
168 # or 500-800 ("D", followed by 0 to 3 "C" chars)
169 if chars[idx:idx + 2] == b'CM':
170 result += 900
171 idx += 2
172 elif chars[idx:idx + 2] == b'CD':
173 result += 400
174 idx += 2
175 else:
176 if chars[idx:idx + 1] == b'D':
177 result += 500
178 idx += 1
179 for _ in range(3):
180 if chars[idx:idx + 1] == b'C':
181 result += 100
182 idx += 1
183 else:
184 break
185 if len(chars) == idx:
186 return RomanNumeral(result)
187
188 # Tens: 90 ("XC"), 40 ("XL"), 0-30 (0 to 3 "X" chars),
189 # or 50-80 ("L", followed by 0 to 3 "X" chars)
190 if chars[idx:idx + 2] == b'XC':
191 result += 90
192 idx += 2
193 elif chars[idx:idx + 2] == b'XL':
194 result += 40
195 idx += 2
196 else:
197 if chars[idx:idx + 1] == b'L':
198 result += 50
199 idx += 1
200 for _ in range(3):
201 if chars[idx:idx + 1] == b'X':
202 result += 10
203 idx += 1
204 else:
205 break
206 if len(chars) == idx:
207 return RomanNumeral(result)
208
209 # Ones: 9 ("IX"), 4 ("IV"), 0-3 (0 to 3 "I" chars),
210 # or 5-8 ("V", followed by 0 to 3 "I" chars)
211 if chars[idx:idx + 2] == b'IX':
212 result += 9
213 idx += 2
214 elif chars[idx:idx + 2] == b'IV':
215 result += 4
216 idx += 2
217 else:
218 if chars[idx:idx + 1] == b'V':
219 result += 5
220 idx += 1
221 for _ in range(3):
222 if chars[idx:idx + 1] == b'I':
223 result += 1
224 idx += 1
225 else:
226 break
227 if len(chars) == idx:
228 return RomanNumeral(result)
229 raise InvalidRomanNumeralError(string)
230
231
232_ROMAN_NUMERAL_PREFIXES: Final = [
233 (1000, sys.intern('M'), sys.intern('m')),
234 (900, sys.intern('CM'), sys.intern('cm')),
235 (500, sys.intern('D'), sys.intern('d')),
236 (400, sys.intern('CD'), sys.intern('cd')),
237 (100, sys.intern('C'), sys.intern('c')),
238 (90, sys.intern('XC'), sys.intern('xc')),
239 (50, sys.intern('L'), sys.intern('l')),
240 (40, sys.intern('XL'), sys.intern('xl')),
241 (10, sys.intern('X'), sys.intern('x')),
242 (9, sys.intern('IX'), sys.intern('ix')),
243 (5, sys.intern('V'), sys.intern('v')),
244 (4, sys.intern('IV'), sys.intern('iv')),
245 (1, sys.intern('I'), sys.intern('i')),
246]
247"""Numeral value, uppercase character, and lowercase character."""