1"""
2Plotting of string "category" data: ``plot(['d', 'f', 'a'], [1, 2, 3])`` will
3plot three points with x-axis values of 'd', 'f', 'a'.
4
5See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
6example.
7
8The module uses Matplotlib's `matplotlib.units` mechanism to convert from
9strings to integers and provides a tick locator, a tick formatter, and the
10`.UnitData` class that creates and stores the string-to-integer mapping.
11"""
12
13from collections import OrderedDict
14import dateutil.parser
15import itertools
16import logging
17
18import numpy as np
19
20from matplotlib import _api, ticker, units
21
22
23_log = logging.getLogger(__name__)
24
25
26class StrCategoryConverter(units.ConversionInterface):
27 @staticmethod
28 def convert(value, unit, axis):
29 """
30 Convert strings in *value* to floats using mapping information stored
31 in the *unit* object.
32
33 Parameters
34 ----------
35 value : str or iterable
36 Value or list of values to be converted.
37 unit : `.UnitData`
38 An object mapping strings to integers.
39 axis : `~matplotlib.axis.Axis`
40 The axis on which the converted value is plotted.
41
42 .. note:: *axis* is unused.
43
44 Returns
45 -------
46 float or `~numpy.ndarray` of float
47 """
48 if unit is None:
49 raise ValueError(
50 'Missing category information for StrCategoryConverter; '
51 'this might be caused by unintendedly mixing categorical and '
52 'numeric data')
53 StrCategoryConverter._validate_unit(unit)
54 # dtype = object preserves numerical pass throughs
55 values = np.atleast_1d(np.array(value, dtype=object))
56 # force an update so it also does type checking
57 unit.update(values)
58 return np.vectorize(unit._mapping.__getitem__, otypes=[float])(values)
59
60 @staticmethod
61 def axisinfo(unit, axis):
62 """
63 Set the default axis ticks and labels.
64
65 Parameters
66 ----------
67 unit : `.UnitData`
68 object string unit information for value
69 axis : `~matplotlib.axis.Axis`
70 axis for which information is being set
71
72 .. note:: *axis* is not used
73
74 Returns
75 -------
76 `~matplotlib.units.AxisInfo`
77 Information to support default tick labeling
78
79 """
80 StrCategoryConverter._validate_unit(unit)
81 # locator and formatter take mapping dict because
82 # args need to be pass by reference for updates
83 majloc = StrCategoryLocator(unit._mapping)
84 majfmt = StrCategoryFormatter(unit._mapping)
85 return units.AxisInfo(majloc=majloc, majfmt=majfmt)
86
87 @staticmethod
88 def default_units(data, axis):
89 """
90 Set and update the `~matplotlib.axis.Axis` units.
91
92 Parameters
93 ----------
94 data : str or iterable of str
95 axis : `~matplotlib.axis.Axis`
96 axis on which the data is plotted
97
98 Returns
99 -------
100 `.UnitData`
101 object storing string to integer mapping
102 """
103 # the conversion call stack is default_units -> axis_info -> convert
104 if axis.units is None:
105 axis.set_units(UnitData(data))
106 else:
107 axis.units.update(data)
108 return axis.units
109
110 @staticmethod
111 def _validate_unit(unit):
112 if not hasattr(unit, '_mapping'):
113 raise ValueError(
114 f'Provided unit "{unit}" is not valid for a categorical '
115 'converter, as it does not have a _mapping attribute.')
116
117
118class StrCategoryLocator(ticker.Locator):
119 """Tick at every integer mapping of the string data."""
120 def __init__(self, units_mapping):
121 """
122 Parameters
123 ----------
124 units_mapping : dict
125 Mapping of category names (str) to indices (int).
126 """
127 self._units = units_mapping
128
129 def __call__(self):
130 # docstring inherited
131 return list(self._units.values())
132
133 def tick_values(self, vmin, vmax):
134 # docstring inherited
135 return self()
136
137
138class StrCategoryFormatter(ticker.Formatter):
139 """String representation of the data at every tick."""
140 def __init__(self, units_mapping):
141 """
142 Parameters
143 ----------
144 units_mapping : dict
145 Mapping of category names (str) to indices (int).
146 """
147 self._units = units_mapping
148
149 def __call__(self, x, pos=None):
150 # docstring inherited
151 return self.format_ticks([x])[0]
152
153 def format_ticks(self, values):
154 # docstring inherited
155 r_mapping = {v: self._text(k) for k, v in self._units.items()}
156 return [r_mapping.get(round(val), '') for val in values]
157
158 @staticmethod
159 def _text(value):
160 """Convert text values into utf-8 or ascii strings."""
161 if isinstance(value, bytes):
162 value = value.decode(encoding='utf-8')
163 elif not isinstance(value, str):
164 value = str(value)
165 return value
166
167
168class UnitData:
169 def __init__(self, data=None):
170 """
171 Create mapping between unique categorical values and integer ids.
172
173 Parameters
174 ----------
175 data : iterable
176 sequence of string values
177 """
178 self._mapping = OrderedDict()
179 self._counter = itertools.count()
180 if data is not None:
181 self.update(data)
182
183 @staticmethod
184 def _str_is_convertible(val):
185 """
186 Helper method to check whether a string can be parsed as float or date.
187 """
188 try:
189 float(val)
190 except ValueError:
191 try:
192 dateutil.parser.parse(val)
193 except (ValueError, TypeError):
194 # TypeError if dateutil >= 2.8.1 else ValueError
195 return False
196 return True
197
198 def update(self, data):
199 """
200 Map new values to integer identifiers.
201
202 Parameters
203 ----------
204 data : iterable of str or bytes
205
206 Raises
207 ------
208 TypeError
209 If elements in *data* are neither str nor bytes.
210 """
211 data = np.atleast_1d(np.array(data, dtype=object))
212 # check if convertible to number:
213 convertible = True
214 for val in OrderedDict.fromkeys(data):
215 # OrderedDict just iterates over unique values in data.
216 _api.check_isinstance((str, bytes), value=val)
217 if convertible:
218 # this will only be called so long as convertible is True.
219 convertible = self._str_is_convertible(val)
220 if val not in self._mapping:
221 self._mapping[val] = next(self._counter)
222 if data.size and convertible:
223 _log.info('Using categorical units to plot a list of strings '
224 'that are all parsable as floats or dates. If these '
225 'strings should be plotted as numbers, cast to the '
226 'appropriate data type before plotting.')
227
228
229# Register the converter with Matplotlib's unit framework
230units.registry[str] = StrCategoryConverter()
231units.registry[np.str_] = StrCategoryConverter()
232units.registry[bytes] = StrCategoryConverter()
233units.registry[np.bytes_] = StrCategoryConverter()