1# $Id$
2# Author: Adam Turner.
3# Copyright: This module is placed in the public domain
4# or under the `Zero Clause BSD licence`_,
5# whichever is more permissive.
6#
7# .. _Zero Clause BSD licence: https://opensource.org/license/0BSD
8
9"""Conversion between integers and roman numerals."""
10
11from __future__ import annotations
12
13import sys
14
15TYPE_CHECKING = False
16if TYPE_CHECKING:
17 from typing import Final, final
18
19 from docutils.utils._typing import Self
20else:
21 from docutils.utils._typing import final
22
23
24__all__: Final = (
25 'MAX',
26 'MIN',
27 'InvalidRomanNumeralError',
28 'OutOfRangeError',
29 'RomanNumeral',
30)
31
32MIN: Final = 1
33"""The value of the smallest well-formed roman numeral."""
34
35# Note that 4,999 (MMMMCMXCIX) breaks one of the rules of Roman numerals,
36# that the same character may not appear more than thrice consecutively,
37# meaning the largest 'well-formed' Roman numeral is 3,999 (MMMCMXCIX).
38# We use 4,999 for backwards compatibility reasons.
39MAX: Final = 4_999
40"""The value of the largest well-formed roman numeral."""
41
42
43@final
44class OutOfRangeError(TypeError):
45 """Number out of range (must be between 1 and 4,999)."""
46
47
48@final
49class InvalidRomanNumeralError(ValueError):
50 """Not a valid Roman numeral."""
51
52 def __init__(self, value: str, *args: object) -> None:
53 msg = f'Invalid Roman numeral: {value}'
54 super().__init__(msg, *args)
55
56
57@final
58class RomanNumeral:
59 """A Roman numeral.
60
61 Only values between 1 and 4,999 are valid.
62 Stores the value internally as an ``int``.
63
64 >>> answer = RomanNumeral(42)
65 >>> print(answer.to_uppercase())
66 XLII
67 """
68
69 __slots__ = ('_value',)
70 _value: int
71
72 def __init__(self, value: int, /) -> None:
73 if not isinstance(value, int):
74 value_qualname = type(value).__qualname__
75 msg = f'RomanNumeral: an integer is required, not {value_qualname!r}' # NoQA: E501
76 raise TypeError(msg)
77 if value < MIN or value > MAX:
78 msg = f'Number out of range (must be between 1 and 4,999). Got {value}.' # NoQA: E501
79 raise OutOfRangeError(msg)
80 super().__setattr__('_value', value)
81
82 def __int__(self) -> int:
83 """Return the integer value of this numeral."""
84 return self._value
85
86 def __str__(self) -> str:
87 """Return the well-formed (uppercase) string for this numeral."""
88 return self.to_uppercase()
89
90 def __repr__(self) -> str:
91 """Return the string representation of this numeral."""
92 return f'{self.__class__.__name__}({self._value!r})'
93
94 def __eq__(self, other: object) -> bool:
95 """Return self == other."""
96 if isinstance(other, RomanNumeral):
97 return self._value == other._value
98 return NotImplemented
99
100 def __lt__(self, other: object) -> bool:
101 """Return self < other."""
102 if isinstance(other, RomanNumeral):
103 return self._value < other._value
104 return NotImplemented
105
106 def __hash__(self) -> int:
107 """Return the hashed value of this numeral."""
108 return hash(self._value)
109
110 def __setattr__(self, key: str, value: object) -> None:
111 """Implement setattr(self, name, value)."""
112 if key == '_value':
113 msg = f'Cannot set the {key!r} attribute.'
114 raise AttributeError(msg)
115 super().__setattr__(key, value)
116
117 def to_uppercase(self) -> str:
118 """Convert a ``RomanNumeral`` to an uppercase string.
119
120 >>> answer = RomanNumeral(42)
121 >>> assert answer.to_uppercase() == 'XLII'
122 """
123 out: list[str] = []
124 n = self._value
125 for value, name, _ in _ROMAN_NUMERAL_PREFIXES:
126 while n >= value:
127 n -= value
128 out.append(name)
129 return ''.join(out)
130
131 def to_lowercase(self) -> str:
132 """Convert a ``RomanNumeral`` to a lowercase string.
133
134 >>> answer = RomanNumeral(42)
135 >>> assert answer.to_lowercase() == 'xlii'
136 """
137 out: list[str] = []
138 n = self._value
139 for value, _, name in _ROMAN_NUMERAL_PREFIXES:
140 while n >= value:
141 n -= value
142 out.append(name)
143 return ''.join(out)
144
145 @classmethod
146 def from_string(cls, string: str, /) -> Self:
147 """Create a ``RomanNumeral`` from a well-formed string representation.
148
149 Returns ``RomanNumeral`` or raises ``InvalidRomanNumeralError``.
150
151 >>> answer = RomanNumeral.from_string('XLII')
152 >>> assert int(answer) == 42
153 """
154 # Not an empty string.
155 if not string or not isinstance(string, str):
156 raise InvalidRomanNumeralError(string)
157
158 # ASCII-only uppercase string.
159 if string.isascii() and string.isupper():
160 chars = string.encode('ascii')
161 elif string.isascii() and string.islower():
162 chars = string.upper().encode('ascii')
163 else:
164 # Either Non-ASCII or mixed-case ASCII.
165 raise InvalidRomanNumeralError(string)
166
167 # ASCII-only uppercase string only containing I, V, X, L, C, D, M.
168 if not frozenset(b'IVXLCDM').issuperset(chars):
169 raise InvalidRomanNumeralError(string)
170
171 result: int = 0
172 idx: int = 0
173
174 # Thousands: between 0 and 4 "M" characters at the start
175 for _ in range(4):
176 if chars[idx:idx + 1] == b'M':
177 result += 1000
178 idx += 1
179 else:
180 break
181 if len(chars) == idx:
182 return cls(result)
183
184 # Hundreds: 900 ("CM"), 400 ("CD"), 0-300 (0 to 3 "C" chars),
185 # or 500-800 ("D", followed by 0 to 3 "C" chars)
186 if chars[idx:idx + 2] == b'CM':
187 result += 900
188 idx += 2
189 elif chars[idx:idx + 2] == b'CD':
190 result += 400
191 idx += 2
192 else:
193 if chars[idx:idx + 1] == b'D':
194 result += 500
195 idx += 1
196 for _ in range(3):
197 if chars[idx:idx + 1] == b'C':
198 result += 100
199 idx += 1
200 else:
201 break
202 if len(chars) == idx:
203 return cls(result)
204
205 # Tens: 90 ("XC"), 40 ("XL"), 0-30 (0 to 3 "X" chars),
206 # or 50-80 ("L", followed by 0 to 3 "X" chars)
207 if chars[idx:idx + 2] == b'XC':
208 result += 90
209 idx += 2
210 elif chars[idx:idx + 2] == b'XL':
211 result += 40
212 idx += 2
213 else:
214 if chars[idx:idx + 1] == b'L':
215 result += 50
216 idx += 1
217 for _ in range(3):
218 if chars[idx:idx + 1] == b'X':
219 result += 10
220 idx += 1
221 else:
222 break
223 if len(chars) == idx:
224 return cls(result)
225
226 # Ones: 9 ("IX"), 4 ("IV"), 0-3 (0 to 3 "I" chars),
227 # or 5-8 ("V", followed by 0 to 3 "I" chars)
228 if chars[idx:idx + 2] == b'IX':
229 result += 9
230 idx += 2
231 elif chars[idx:idx + 2] == b'IV':
232 result += 4
233 idx += 2
234 else:
235 if chars[idx:idx + 1] == b'V':
236 result += 5
237 idx += 1
238 for _ in range(3):
239 if chars[idx:idx + 1] == b'I':
240 result += 1
241 idx += 1
242 else:
243 break
244 if len(chars) == idx:
245 return cls(result)
246 raise InvalidRomanNumeralError(string)
247
248
249_ROMAN_NUMERAL_PREFIXES: Final = [
250 (1000, sys.intern('M'), sys.intern('m')),
251 (900, sys.intern('CM'), sys.intern('cm')),
252 (500, sys.intern('D'), sys.intern('d')),
253 (400, sys.intern('CD'), sys.intern('cd')),
254 (100, sys.intern('C'), sys.intern('c')),
255 (90, sys.intern('XC'), sys.intern('xc')),
256 (50, sys.intern('L'), sys.intern('l')),
257 (40, sys.intern('XL'), sys.intern('xl')),
258 (10, sys.intern('X'), sys.intern('x')),
259 (9, sys.intern('IX'), sys.intern('ix')),
260 (5, sys.intern('V'), sys.intern('v')),
261 (4, sys.intern('IV'), sys.intern('iv')),
262 (1, sys.intern('I'), sys.intern('i')),
263]
264"""Numeral value, uppercase character, and lowercase character."""