/src/openvswitch/lib/classifier.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2009-2017 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include "classifier.h" |
19 | | #include "classifier-private.h" |
20 | | #include <errno.h> |
21 | | #include <sys/types.h> |
22 | | #include <netinet/in.h> |
23 | | #include "byte-order.h" |
24 | | #include "openvswitch/dynamic-string.h" |
25 | | #include "odp-util.h" |
26 | | #include "packets.h" |
27 | | #include "util.h" |
28 | | |
29 | | struct trie_ctx; |
30 | | |
31 | | /* A collection of "struct cls_conjunction"s currently embedded into a |
32 | | * cls_match. */ |
33 | | struct cls_conjunction_set { |
34 | | /* Link back to the cls_match. |
35 | | * |
36 | | * cls_conjunction_set is mostly used during classifier lookup, and, in |
37 | | * turn, during classifier lookup the most used member of |
38 | | * cls_conjunction_set is the rule's priority, so we cache it here for fast |
39 | | * access. */ |
40 | | struct cls_match *match; |
41 | | int priority; /* Cached copy of match->priority. */ |
42 | | |
43 | | /* Conjunction information. |
44 | | * |
45 | | * 'min_n_clauses' allows some optimization during classifier lookup. */ |
46 | | unsigned int n; /* Number of elements in 'conj'. */ |
47 | | unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */ |
48 | | struct cls_conjunction conj[]; |
49 | | }; |
50 | | |
51 | | /* Ports trie depends on both ports sharing the same ovs_be32. */ |
52 | 0 | #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) |
53 | | BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4); |
54 | | BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0); |
55 | | #define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2) |
56 | | |
57 | | static size_t |
58 | | cls_conjunction_set_size(size_t n) |
59 | 0 | { |
60 | 0 | return (sizeof(struct cls_conjunction_set) |
61 | 0 | + n * sizeof(struct cls_conjunction)); |
62 | 0 | } |
63 | | |
64 | | static struct cls_conjunction_set * |
65 | | cls_conjunction_set_alloc(struct cls_match *match, |
66 | | const struct cls_conjunction conj[], size_t n) |
67 | 0 | { |
68 | 0 | if (n) { |
69 | 0 | size_t min_n_clauses = conj[0].n_clauses; |
70 | 0 | for (size_t i = 1; i < n; i++) { |
71 | 0 | min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses); |
72 | 0 | } |
73 | |
|
74 | 0 | struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n)); |
75 | 0 | set->match = match; |
76 | 0 | set->priority = match->priority; |
77 | 0 | set->n = n; |
78 | 0 | set->min_n_clauses = min_n_clauses; |
79 | 0 | memcpy(set->conj, conj, n * sizeof *conj); |
80 | 0 | return set; |
81 | 0 | } else { |
82 | 0 | return NULL; |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | static struct cls_match * |
87 | | cls_match_alloc(const struct cls_rule *rule, ovs_version_t version, |
88 | | const struct cls_conjunction conj[], size_t n) |
89 | 0 | { |
90 | 0 | size_t count = miniflow_n_values(rule->match.flow); |
91 | |
|
92 | 0 | struct cls_match *cls_match |
93 | 0 | = xmalloc(sizeof *cls_match + MINIFLOW_VALUES_SIZE(count)); |
94 | |
|
95 | 0 | ovsrcu_init(&cls_match->next, NULL); |
96 | 0 | *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; |
97 | 0 | *CONST_CAST(int *, &cls_match->priority) = rule->priority; |
98 | | /* Make rule initially invisible. */ |
99 | 0 | cls_match->versions = VERSIONS_INITIALIZER(version, version); |
100 | 0 | miniflow_clone(CONST_CAST(struct miniflow *, &cls_match->flow), |
101 | 0 | rule->match.flow, count); |
102 | 0 | ovsrcu_set_hidden(&cls_match->conj_set, |
103 | 0 | cls_conjunction_set_alloc(cls_match, conj, n)); |
104 | |
|
105 | 0 | return cls_match; |
106 | 0 | } |
107 | | |
108 | | static struct cls_subtable *find_subtable(const struct classifier *cls, |
109 | | const struct minimask *); |
110 | | static struct cls_subtable *insert_subtable(struct classifier *cls, |
111 | | const struct minimask *); |
112 | | static void destroy_subtable(struct classifier *cls, struct cls_subtable *); |
113 | | |
114 | | static const struct cls_match *find_match_wc(const struct cls_subtable *, |
115 | | ovs_version_t version, |
116 | | const struct flow *, |
117 | | struct trie_ctx *, |
118 | | uint32_t n_tries, |
119 | | struct flow_wildcards *); |
120 | | static struct cls_match *find_equal(const struct cls_subtable *, |
121 | | const struct miniflow *, uint32_t hash); |
122 | | |
123 | | /* Return the next visible (lower-priority) rule in the list. Multiple |
124 | | * identical rules with the same priority may exist transitionally, but when |
125 | | * versioning is used at most one of them is ever visible for lookups on any |
126 | | * given 'version'. */ |
127 | | static inline const struct cls_match * |
128 | | next_visible_rule_in_list(const struct cls_match *rule, ovs_version_t version) |
129 | 0 | { |
130 | 0 | do { |
131 | 0 | rule = cls_match_next(rule); |
132 | 0 | } while (rule && !cls_match_visible_in_version(rule, version)); |
133 | |
|
134 | 0 | return rule; |
135 | 0 | } |
136 | | |
137 | | /* Type with maximum supported prefix length. */ |
138 | | union trie_prefix { |
139 | | struct in6_addr ipv6; /* For sizing. */ |
140 | | ovs_be32 be32; /* For access. */ |
141 | | }; |
142 | | |
143 | | static unsigned int minimask_get_prefix_len(const struct minimask *, |
144 | | const struct mf_field *); |
145 | | static void trie_init(struct classifier *cls, int trie_idx, |
146 | | const struct mf_field *); |
147 | | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, |
148 | | union trie_prefix *plens); |
149 | | static unsigned int trie_lookup_value(const rcu_trie_ptr *, |
150 | | const ovs_be32 value[], ovs_be32 plens[], |
151 | | unsigned int value_bits); |
152 | | static void trie_destroy(struct cls_trie *); |
153 | | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); |
154 | | static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
155 | | int mlen); |
156 | | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); |
157 | | static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
158 | | int mlen); |
159 | | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, |
160 | | unsigned int n_bits); |
161 | | static bool mask_prefix_bits_set(const struct flow_wildcards *, |
162 | | uint8_t be32ofs, unsigned int n_bits); |
163 | | |
164 | | /* cls_rule. */ |
165 | | |
166 | | static inline void |
167 | | cls_rule_init__(struct cls_rule *rule, unsigned int priority) |
168 | 0 | { |
169 | 0 | rculist_init(&rule->node); |
170 | 0 | *CONST_CAST(int *, &rule->priority) = priority; |
171 | 0 | ovsrcu_init(&rule->cls_match, NULL); |
172 | 0 | } |
173 | | |
174 | | /* Initializes 'rule' to match packets specified by 'match' at the given |
175 | | * 'priority'. 'match' must satisfy the invariant described in the comment at |
176 | | * the definition of struct match. |
177 | | * |
178 | | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
179 | | * |
180 | | * Clients should not use priority INT_MIN. (OpenFlow uses priorities between |
181 | | * 0 and UINT16_MAX, inclusive.) */ |
182 | | void |
183 | | cls_rule_init(struct cls_rule *rule, const struct match *match, int priority) |
184 | 0 | { |
185 | 0 | cls_rule_init__(rule, priority); |
186 | 0 | minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match); |
187 | 0 | } |
188 | | |
189 | | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ |
190 | | void |
191 | | cls_rule_init_from_minimatch(struct cls_rule *rule, |
192 | | const struct minimatch *match, int priority) |
193 | 0 | { |
194 | 0 | cls_rule_init__(rule, priority); |
195 | 0 | minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match); |
196 | 0 | } |
197 | | |
198 | | /* Initializes 'dst' as a copy of 'src'. |
199 | | * |
200 | | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
201 | | void |
202 | | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) |
203 | 0 | { |
204 | 0 | cls_rule_init__(dst, src->priority); |
205 | 0 | minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match); |
206 | 0 | } |
207 | | |
208 | | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
209 | | * |
210 | | * 'src' must be a cls_rule NOT in a classifier. |
211 | | * |
212 | | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
213 | | void |
214 | | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) |
215 | 0 | { |
216 | 0 | cls_rule_init__(dst, src->priority); |
217 | 0 | minimatch_move(CONST_CAST(struct minimatch *, &dst->match), |
218 | 0 | CONST_CAST(struct minimatch *, &src->match)); |
219 | 0 | } |
220 | | |
221 | | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
222 | | * normally embedded into a larger structure). |
223 | | * |
224 | | * ('rule' must not currently be in a classifier.) */ |
225 | | void |
226 | | cls_rule_destroy(struct cls_rule *rule) |
227 | | OVS_NO_THREAD_SAFETY_ANALYSIS |
228 | 0 | { |
229 | | /* Must not be in a classifier. */ |
230 | 0 | ovs_assert(!get_cls_match_protected(rule)); |
231 | | |
232 | | /* Check that the rule has been properly removed from the classifier. */ |
233 | 0 | ovs_assert(rule->node.prev == RCULIST_POISON |
234 | 0 | || rculist_is_empty(&rule->node)); |
235 | 0 | rculist_poison__(&rule->node); /* Poisons also the next pointer. */ |
236 | |
|
237 | 0 | minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match)); |
238 | 0 | } |
239 | | |
240 | | /* This may only be called by the exclusive writer. */ |
241 | | void |
242 | | cls_rule_set_conjunctions(struct cls_rule *cr, |
243 | | const struct cls_conjunction *conj, size_t n) |
244 | 0 | { |
245 | 0 | struct cls_match *match = get_cls_match_protected(cr); |
246 | 0 | struct cls_conjunction_set *old |
247 | 0 | = ovsrcu_get_protected(struct cls_conjunction_set *, &match->conj_set); |
248 | 0 | struct cls_conjunction *old_conj = old ? old->conj : NULL; |
249 | 0 | unsigned int old_n = old ? old->n : 0; |
250 | |
|
251 | 0 | if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) { |
252 | 0 | if (old) { |
253 | 0 | ovsrcu_postpone(free, old); |
254 | 0 | } |
255 | 0 | ovsrcu_set(&match->conj_set, |
256 | 0 | cls_conjunction_set_alloc(match, conj, n)); |
257 | 0 | } |
258 | 0 | } |
259 | | |
260 | | |
261 | | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
262 | | * false if they differ in some way. */ |
263 | | bool |
264 | | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) |
265 | 0 | { |
266 | 0 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
267 | 0 | } |
268 | | |
269 | | /* Appends a string describing 'rule' to 's'. */ |
270 | | void |
271 | | cls_rule_format(const struct cls_rule *rule, const struct tun_table *tun_table, |
272 | | const struct ofputil_port_map *port_map, struct ds *s) |
273 | 0 | { |
274 | 0 | minimatch_format(&rule->match, tun_table, port_map, s, rule->priority); |
275 | 0 | } |
276 | | |
277 | | /* Returns true if 'rule' matches every packet, false otherwise. */ |
278 | | bool |
279 | | cls_rule_is_catchall(const struct cls_rule *rule) |
280 | 0 | { |
281 | 0 | return minimask_is_catchall(rule->match.mask); |
282 | 0 | } |
283 | | |
284 | | /* Makes 'rule' invisible in 'remove_version'. Once that version is used in |
285 | | * lookups, the caller should remove 'rule' via ovsrcu_postpone(). |
286 | | * |
287 | | * 'rule' must be in a classifier. |
288 | | * This may only be called by the exclusive writer. */ |
289 | | void |
290 | | cls_rule_make_invisible_in_version(const struct cls_rule *rule, |
291 | | ovs_version_t remove_version) |
292 | 0 | { |
293 | 0 | struct cls_match *cls_match = get_cls_match_protected(rule); |
294 | |
|
295 | 0 | ovs_assert(remove_version >= cls_match->versions.add_version); |
296 | |
|
297 | 0 | cls_match_set_remove_version(cls_match, remove_version); |
298 | 0 | } |
299 | | |
300 | | /* This undoes the change made by cls_rule_make_invisible_in_version(). |
301 | | * |
302 | | * 'rule' must be in a classifier. |
303 | | * This may only be called by the exclusive writer. */ |
304 | | void |
305 | | cls_rule_restore_visibility(const struct cls_rule *rule) |
306 | 0 | { |
307 | 0 | cls_match_set_remove_version(get_cls_match_protected(rule), |
308 | 0 | OVS_VERSION_NOT_REMOVED); |
309 | 0 | } |
310 | | |
311 | | /* Return true if 'rule' is visible in 'version'. |
312 | | * |
313 | | * 'rule' must be in a classifier. */ |
314 | | bool |
315 | | cls_rule_visible_in_version(const struct cls_rule *rule, ovs_version_t version) |
316 | 0 | { |
317 | 0 | struct cls_match *cls_match = get_cls_match(rule); |
318 | |
|
319 | 0 | return cls_match && cls_match_visible_in_version(cls_match, version); |
320 | 0 | } |
321 | | |
322 | | /* Initializes 'cls' as a classifier that initially contains no classification |
323 | | * rules. */ |
324 | | void |
325 | | classifier_init(struct classifier *cls, const uint8_t *flow_segments) |
326 | 0 | { |
327 | 0 | cls->n_rules = 0; |
328 | 0 | cmap_init(&cls->subtables_map); |
329 | 0 | pvector_init(&cls->subtables); |
330 | 0 | cls->n_flow_segments = 0; |
331 | 0 | if (flow_segments) { |
332 | 0 | while (cls->n_flow_segments < CLS_MAX_INDICES |
333 | 0 | && *flow_segments < FLOW_U64S) { |
334 | 0 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; |
335 | 0 | } |
336 | 0 | } |
337 | 0 | memset(cls->tries, 0, sizeof cls->tries); |
338 | 0 | atomic_store_explicit(&cls->n_tries, 0, memory_order_release); |
339 | 0 | cls->publish = true; |
340 | 0 | } |
341 | | |
342 | | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the |
343 | | * caller's responsibility. |
344 | | * May only be called after all the readers have been terminated. */ |
345 | | void |
346 | | classifier_destroy(struct classifier *cls) |
347 | 0 | { |
348 | 0 | if (cls) { |
349 | 0 | struct cls_subtable *subtable; |
350 | 0 | uint32_t i, n_tries; |
351 | |
|
352 | 0 | atomic_read_relaxed(&cls->n_tries, &n_tries); |
353 | 0 | for (i = 0; i < n_tries; i++) { |
354 | 0 | trie_destroy(&cls->tries[i]); |
355 | 0 | } |
356 | |
|
357 | 0 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
358 | 0 | destroy_subtable(cls, subtable); |
359 | 0 | } |
360 | 0 | cmap_destroy(&cls->subtables_map); |
361 | |
|
362 | 0 | pvector_destroy(&cls->subtables); |
363 | 0 | } |
364 | 0 | } |
365 | | |
366 | | /* Set the fields for which prefix lookup should be performed. */ |
367 | | bool |
368 | | classifier_set_prefix_fields(struct classifier *cls, |
369 | | const enum mf_field_id *trie_fields, |
370 | | unsigned int n_fields) |
371 | 0 | { |
372 | 0 | const struct mf_field *new_fields[CLS_MAX_TRIES]; |
373 | 0 | struct mf_bitmap fields = MF_BITMAP_INITIALIZER; |
374 | 0 | uint32_t i, n_tries = 0, old_n_tries; |
375 | 0 | uint32_t first_changed = 0; |
376 | 0 | bool changed = false; |
377 | |
|
378 | 0 | atomic_read_relaxed(&cls->n_tries, &old_n_tries); |
379 | |
|
380 | 0 | for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) { |
381 | 0 | const struct mf_field *field = mf_from_id(trie_fields[i]); |
382 | 0 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { |
383 | | /* Incompatible field. This is the only place where we |
384 | | * enforce these requirements, but the rest of the trie code |
385 | | * depends on the flow_be32ofs to be non-negative and the |
386 | | * field length to be a multiple of 32 bits. */ |
387 | 0 | continue; |
388 | 0 | } |
389 | | |
390 | 0 | if (bitmap_is_set(fields.bm, trie_fields[i])) { |
391 | | /* Duplicate field, there is no need to build more than |
392 | | * one index for any one field. */ |
393 | 0 | continue; |
394 | 0 | } |
395 | 0 | bitmap_set1(fields.bm, trie_fields[i]); |
396 | |
|
397 | 0 | new_fields[n_tries] = NULL; |
398 | 0 | if (n_tries >= old_n_tries || field != cls->tries[n_tries].field) { |
399 | 0 | new_fields[n_tries] = field; |
400 | 0 | if (!changed) { |
401 | 0 | first_changed = n_tries; |
402 | 0 | } |
403 | 0 | changed = true; |
404 | 0 | } |
405 | 0 | n_tries++; |
406 | 0 | } |
407 | |
|
408 | 0 | if (changed || n_tries < old_n_tries) { |
409 | 0 | if (!changed) { |
410 | | /* Threre are no new or changed fields, only removing a few. */ |
411 | 0 | first_changed = n_tries; |
412 | 0 | } |
413 | | /* Trie configuration needs to change. Disable trie lookups and wait |
414 | | * for all the current readers to be done with the old configuration. |
415 | | * The readers may temporarily function without the trie lookup based |
416 | | * optimizations. Keeping the few first entries that didn't change |
417 | | * accessible. |
418 | | * |
419 | | * This store can be relaxed because ovsrcu_synchronize() functions as |
420 | | * a memory barrier. */ |
421 | 0 | atomic_store_relaxed(&cls->n_tries, first_changed); |
422 | 0 | ovsrcu_synchronize(); |
423 | | |
424 | | /* Now set up the tries for new and changed fields. */ |
425 | 0 | for (i = first_changed; i < n_tries; i++) { |
426 | 0 | if (new_fields[i]) { |
427 | 0 | trie_destroy(&cls->tries[i]); |
428 | 0 | trie_init(cls, i, new_fields[i]); |
429 | 0 | } |
430 | 0 | } |
431 | | /* Destroy the rest, if any. */ |
432 | 0 | for (; i < old_n_tries; i++) { |
433 | 0 | trie_destroy(&cls->tries[i]); |
434 | 0 | } |
435 | | |
436 | | /* Re-enable trie lookups. Using release memory order, so all the |
437 | | * previous stores are visible in the classifier_lookup(). */ |
438 | 0 | atomic_store_explicit(&cls->n_tries, n_tries, memory_order_release); |
439 | 0 | return true; |
440 | 0 | } |
441 | | |
442 | 0 | return false; /* No change. */ |
443 | 0 | } |
444 | | |
445 | | static void |
446 | | trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) |
447 | 0 | { |
448 | 0 | struct cls_trie *trie = &cls->tries[trie_idx]; |
449 | 0 | struct cls_subtable *subtable; |
450 | |
|
451 | 0 | ovs_assert(field); |
452 | 0 | ovs_assert(!trie->field); |
453 | |
|
454 | 0 | trie->field = field; |
455 | 0 | ovsrcu_set_hidden(&trie->root, NULL); |
456 | | |
457 | | /* Add existing rules to the new trie. */ |
458 | 0 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
459 | 0 | unsigned int plen; |
460 | |
|
461 | 0 | plen = minimask_get_prefix_len(&subtable->mask, field); |
462 | 0 | if (plen) { |
463 | 0 | struct cls_match *head; |
464 | |
|
465 | 0 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
466 | 0 | trie_insert(trie, head->cls_rule, plen); |
467 | 0 | } |
468 | 0 | } |
469 | | /* Initialize subtable's prefix length on this field. */ |
470 | 0 | subtable->trie_plen[trie_idx] = plen; |
471 | 0 | } |
472 | 0 | } |
473 | | |
474 | | /* Returns true if 'cls' contains no classification rules, false otherwise. |
475 | | * Checking the cmap requires no locking. */ |
476 | | bool |
477 | | classifier_is_empty(const struct classifier *cls) |
478 | 0 | { |
479 | 0 | return cmap_is_empty(&cls->subtables_map); |
480 | 0 | } |
481 | | |
482 | | /* Returns the number of rules in 'cls'. */ |
483 | | int |
484 | | classifier_count(const struct classifier *cls) |
485 | 0 | { |
486 | | /* n_rules is an int, so in the presence of concurrent writers this will |
487 | | * return either the old or a new value. */ |
488 | 0 | return cls->n_rules; |
489 | 0 | } |
490 | | |
491 | | static inline ovs_be32 minimatch_get_ports(const struct minimatch *match) |
492 | 0 | { |
493 | | /* Could optimize to use the same map if needed for fast path. */ |
494 | 0 | return (miniflow_get_ports(match->flow) |
495 | 0 | & miniflow_get_ports(&match->mask->masks)); |
496 | 0 | } |
497 | | |
498 | | /* Inserts 'rule' into 'cls' in 'version'. Until 'rule' is removed from 'cls', |
499 | | * the caller must not modify or free it. |
500 | | * |
501 | | * If 'cls' already contains an identical rule (including wildcards, values of |
502 | | * fixed fields, and priority) that is visible in 'version', replaces the old |
503 | | * rule by 'rule' and returns the rule that was replaced. The caller takes |
504 | | * ownership of the returned rule and is thus responsible for destroying it |
505 | | * with cls_rule_destroy(), after RCU grace period has passed (see |
506 | | * ovsrcu_postpone()). |
507 | | * |
508 | | * Returns NULL if 'cls' does not contain a rule with an identical key, after |
509 | | * inserting the new rule. In this case, no rules are displaced by the new |
510 | | * rule, even rules that cannot have any effect because the new rule matches a |
511 | | * superset of their flows and has higher priority. |
512 | | */ |
513 | | const struct cls_rule * |
514 | | classifier_replace(struct classifier *cls, const struct cls_rule *rule, |
515 | | ovs_version_t version, |
516 | | const struct cls_conjunction *conjs, size_t n_conjs) |
517 | 0 | { |
518 | 0 | struct cls_match *new; |
519 | 0 | struct cls_subtable *subtable; |
520 | 0 | uint32_t ihash[CLS_MAX_INDICES]; |
521 | 0 | struct cls_match *head; |
522 | 0 | unsigned int mask_offset; |
523 | 0 | size_t n_rules = 0; |
524 | 0 | uint32_t i, n_tries; |
525 | 0 | uint8_t n_indices; |
526 | 0 | uint32_t basis; |
527 | 0 | uint32_t hash; |
528 | | |
529 | | /* 'new' is initially invisible to lookups. */ |
530 | 0 | new = cls_match_alloc(rule, version, conjs, n_conjs); |
531 | 0 | ovsrcu_set(&CONST_CAST(struct cls_rule *, rule)->cls_match, new); |
532 | |
|
533 | 0 | subtable = find_subtable(cls, rule->match.mask); |
534 | 0 | if (!subtable) { |
535 | 0 | subtable = insert_subtable(cls, rule->match.mask); |
536 | 0 | } |
537 | | |
538 | | /* Compute hashes in segments. */ |
539 | 0 | basis = 0; |
540 | 0 | mask_offset = 0; |
541 | 0 | n_indices = subtable->n_indices; |
542 | 0 | for (i = 0; i < n_indices; i++) { |
543 | 0 | ihash[i] = minimatch_hash_range(&rule->match, subtable->index_maps[i], |
544 | 0 | &mask_offset, &basis); |
545 | 0 | } |
546 | 0 | hash = minimatch_hash_range(&rule->match, subtable->index_maps[i], |
547 | 0 | &mask_offset, &basis); |
548 | |
|
549 | 0 | head = find_equal(subtable, rule->match.flow, hash); |
550 | 0 | if (!head) { |
551 | | /* Add rule to tries. |
552 | | * |
553 | | * Concurrent readers might miss seeing the rule until this update, |
554 | | * which might require being fixed up by revalidation later. */ |
555 | 0 | atomic_read_relaxed(&cls->n_tries, &n_tries); |
556 | 0 | for (i = 0; i < n_tries; i++) { |
557 | 0 | if (subtable->trie_plen[i]) { |
558 | 0 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); |
559 | 0 | } |
560 | 0 | } |
561 | | |
562 | | /* Add rule to ports trie. */ |
563 | 0 | if (subtable->ports_mask_len) { |
564 | | /* We mask the value to be inserted to always have the wildcarded |
565 | | * bits in known (zero) state, so we can include them in comparison |
566 | | * and they will always match (== their original value does not |
567 | | * matter). */ |
568 | 0 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); |
569 | |
|
570 | 0 | trie_insert_prefix(&subtable->ports_trie, &masked_ports, |
571 | 0 | subtable->ports_mask_len); |
572 | 0 | } |
573 | | |
574 | | /* Add new node to segment indices. */ |
575 | 0 | for (i = 0; i < n_indices; i++) { |
576 | 0 | ccmap_inc(&subtable->indices[i], ihash[i]); |
577 | 0 | } |
578 | 0 | n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash); |
579 | 0 | } else { /* Equal rules exist in the classifier already. */ |
580 | 0 | struct cls_match *prev, *iter; |
581 | | |
582 | | /* Scan the list for the insertion point that will keep the list in |
583 | | * order of decreasing priority. Insert after rules marked invisible |
584 | | * in any version of the same priority. */ |
585 | 0 | FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { |
586 | 0 | if (rule->priority > iter->priority |
587 | 0 | || (rule->priority == iter->priority |
588 | 0 | && !cls_match_is_eventually_invisible(iter))) { |
589 | 0 | break; |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | | /* Replace 'iter' with 'new' or insert 'new' between 'prev' and |
594 | | * 'iter'. */ |
595 | 0 | if (iter) { |
596 | 0 | struct cls_rule *old; |
597 | |
|
598 | 0 | if (rule->priority == iter->priority) { |
599 | 0 | cls_match_replace(prev, iter, new); |
600 | 0 | old = CONST_CAST(struct cls_rule *, iter->cls_rule); |
601 | 0 | } else { |
602 | 0 | cls_match_insert(prev, iter, new); |
603 | 0 | old = NULL; |
604 | 0 | } |
605 | | |
606 | | /* Replace the existing head in data structures, if rule is the new |
607 | | * head. */ |
608 | 0 | if (iter == head) { |
609 | 0 | cmap_replace(&subtable->rules, &head->cmap_node, |
610 | 0 | &new->cmap_node, hash); |
611 | 0 | } |
612 | |
|
613 | 0 | if (old) { |
614 | 0 | struct cls_conjunction_set *conj_set; |
615 | |
|
616 | 0 | conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, |
617 | 0 | &iter->conj_set); |
618 | 0 | if (conj_set) { |
619 | 0 | ovsrcu_postpone(free, conj_set); |
620 | 0 | } |
621 | |
|
622 | 0 | ovsrcu_set(&old->cls_match, NULL); /* Marks old rule as removed |
623 | | * from the classifier. */ |
624 | 0 | ovsrcu_postpone(cls_match_free_cb, iter); |
625 | | |
626 | | /* No change in subtable's max priority or max count. */ |
627 | | |
628 | | /* Make 'new' visible to lookups in the appropriate version. */ |
629 | 0 | cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED); |
630 | | |
631 | | /* Make rule visible to iterators (immediately). */ |
632 | 0 | rculist_replace(CONST_CAST(struct rculist *, &rule->node), |
633 | 0 | &old->node); |
634 | | |
635 | | /* Return displaced rule. Caller is responsible for keeping it |
636 | | * around until all threads quiesce. */ |
637 | 0 | return old; |
638 | 0 | } |
639 | 0 | } else { |
640 | | /* 'new' is new node after 'prev' */ |
641 | 0 | cls_match_insert(prev, iter, new); |
642 | 0 | } |
643 | 0 | } |
644 | | |
645 | | /* Make 'new' visible to lookups in the appropriate version. */ |
646 | 0 | cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED); |
647 | | |
648 | | /* Make rule visible to iterators (immediately). */ |
649 | 0 | rculist_push_back(&subtable->rules_list, |
650 | 0 | CONST_CAST(struct rculist *, &rule->node)); |
651 | | |
652 | | /* Rule was added, not replaced. Update 'subtable's 'max_priority' and |
653 | | * 'max_count', if necessary. |
654 | | * |
655 | | * The rule was already inserted, but concurrent readers may not see the |
656 | | * rule yet as the subtables vector is not updated yet. This will have to |
657 | | * be fixed by revalidation later. */ |
658 | 0 | if (n_rules == 1) { |
659 | 0 | subtable->max_priority = rule->priority; |
660 | 0 | subtable->max_count = 1; |
661 | 0 | pvector_insert(&cls->subtables, subtable, rule->priority); |
662 | 0 | } else if (rule->priority == subtable->max_priority) { |
663 | 0 | ++subtable->max_count; |
664 | 0 | } else if (rule->priority > subtable->max_priority) { |
665 | 0 | subtable->max_priority = rule->priority; |
666 | 0 | subtable->max_count = 1; |
667 | 0 | pvector_change_priority(&cls->subtables, subtable, rule->priority); |
668 | 0 | } |
669 | | |
670 | | /* Nothing was replaced. */ |
671 | 0 | cls->n_rules++; |
672 | |
|
673 | 0 | if (cls->publish) { |
674 | 0 | pvector_publish(&cls->subtables); |
675 | 0 | } |
676 | |
|
677 | 0 | return NULL; |
678 | 0 | } |
679 | | |
680 | | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
681 | | * must not modify or free it. |
682 | | * |
683 | | * 'cls' must not contain an identical rule (including wildcards, values of |
684 | | * fixed fields, and priority). Use classifier_find_rule_exactly() to find |
685 | | * such a rule. */ |
686 | | void |
687 | | classifier_insert(struct classifier *cls, const struct cls_rule *rule, |
688 | | ovs_version_t version, const struct cls_conjunction conj[], |
689 | | size_t n_conj) |
690 | 0 | { |
691 | 0 | const struct cls_rule *displaced_rule |
692 | 0 | = classifier_replace(cls, rule, version, conj, n_conj); |
693 | 0 | ovs_assert(!displaced_rule); |
694 | 0 | } |
695 | | |
696 | | /* If 'rule' is in 'cls', removes 'rule' from 'cls' and returns true. It is |
697 | | * the caller's responsibility to destroy 'rule' with cls_rule_destroy(), |
698 | | * freeing the memory block in which 'rule' resides, etc., as necessary. |
699 | | * |
700 | | * If 'rule' is not in any classifier, returns false without making any |
701 | | * changes. |
702 | | * |
703 | | * 'rule' must not be in some classifier other than 'cls'. |
704 | | */ |
705 | | bool |
706 | | classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) |
707 | 0 | { |
708 | 0 | struct cls_match *rule, *prev, *next, *head; |
709 | 0 | struct cls_conjunction_set *conj_set; |
710 | 0 | struct cls_subtable *subtable; |
711 | 0 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
712 | 0 | unsigned int mask_offset; |
713 | 0 | uint32_t i, n_tries; |
714 | 0 | uint8_t n_indices; |
715 | 0 | size_t n_rules; |
716 | |
|
717 | 0 | rule = get_cls_match_protected(cls_rule); |
718 | 0 | if (!rule) { |
719 | 0 | return false; |
720 | 0 | } |
721 | | /* Mark as removed. */ |
722 | 0 | ovsrcu_set(&CONST_CAST(struct cls_rule *, cls_rule)->cls_match, NULL); |
723 | | |
724 | | /* Remove 'cls_rule' from the subtable's rules list. */ |
725 | 0 | rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node)); |
726 | |
|
727 | 0 | subtable = find_subtable(cls, cls_rule->match.mask); |
728 | 0 | ovs_assert(subtable); |
729 | |
|
730 | 0 | mask_offset = 0; |
731 | 0 | n_indices = subtable->n_indices; |
732 | 0 | for (i = 0; i < n_indices; i++) { |
733 | 0 | ihash[i] = minimatch_hash_range(&cls_rule->match, |
734 | 0 | subtable->index_maps[i], |
735 | 0 | &mask_offset, &basis); |
736 | 0 | } |
737 | 0 | hash = minimatch_hash_range(&cls_rule->match, subtable->index_maps[i], |
738 | 0 | &mask_offset, &basis); |
739 | |
|
740 | 0 | head = find_equal(subtable, cls_rule->match.flow, hash); |
741 | | |
742 | | /* Check if the rule is not the head rule. */ |
743 | 0 | if (rule != head) { |
744 | 0 | struct cls_match *iter; |
745 | | |
746 | | /* Not the head rule, but potentially one with the same priority. */ |
747 | | /* Remove from the list of equal rules. */ |
748 | 0 | FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { |
749 | 0 | if (rule == iter) { |
750 | 0 | break; |
751 | 0 | } |
752 | 0 | } |
753 | 0 | ovs_assert(iter == rule); |
754 | |
|
755 | 0 | cls_match_remove(prev, rule); |
756 | |
|
757 | 0 | goto check_priority; |
758 | 0 | } |
759 | | |
760 | | /* 'rule' is the head rule. Check if there is another rule to |
761 | | * replace 'rule' in the data structures. */ |
762 | 0 | next = cls_match_next_protected(rule); |
763 | 0 | if (next) { |
764 | 0 | cmap_replace(&subtable->rules, &rule->cmap_node, &next->cmap_node, |
765 | 0 | hash); |
766 | 0 | goto check_priority; |
767 | 0 | } |
768 | | |
769 | | /* 'rule' is last of the kind in the classifier, must remove from all the |
770 | | * data structures. */ |
771 | | |
772 | 0 | if (subtable->ports_mask_len) { |
773 | 0 | ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match); |
774 | |
|
775 | 0 | trie_remove_prefix(&subtable->ports_trie, |
776 | 0 | &masked_ports, subtable->ports_mask_len); |
777 | 0 | } |
778 | 0 | atomic_read_relaxed(&cls->n_tries, &n_tries); |
779 | 0 | for (i = 0; i < n_tries; i++) { |
780 | 0 | if (subtable->trie_plen[i]) { |
781 | 0 | trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]); |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | | /* Remove rule node from indices. */ |
786 | 0 | for (i = 0; i < n_indices; i++) { |
787 | 0 | ccmap_dec(&subtable->indices[i], ihash[i]); |
788 | 0 | } |
789 | 0 | n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash); |
790 | |
|
791 | 0 | if (n_rules == 0) { |
792 | 0 | destroy_subtable(cls, subtable); |
793 | 0 | } else { |
794 | 0 | check_priority: |
795 | 0 | if (subtable->max_priority == rule->priority |
796 | 0 | && --subtable->max_count == 0) { |
797 | | /* Find the new 'max_priority' and 'max_count'. */ |
798 | 0 | int max_priority = INT_MIN; |
799 | 0 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
800 | 0 | if (head->priority > max_priority) { |
801 | 0 | max_priority = head->priority; |
802 | 0 | subtable->max_count = 1; |
803 | 0 | } else if (head->priority == max_priority) { |
804 | 0 | ++subtable->max_count; |
805 | 0 | } |
806 | 0 | } |
807 | 0 | subtable->max_priority = max_priority; |
808 | 0 | pvector_change_priority(&cls->subtables, subtable, max_priority); |
809 | 0 | } |
810 | 0 | } |
811 | |
|
812 | 0 | if (cls->publish) { |
813 | 0 | pvector_publish(&cls->subtables); |
814 | 0 | } |
815 | | |
816 | | /* free the rule. */ |
817 | 0 | conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, |
818 | 0 | &rule->conj_set); |
819 | 0 | if (conj_set) { |
820 | 0 | ovsrcu_postpone(free, conj_set); |
821 | 0 | } |
822 | 0 | ovsrcu_postpone(cls_match_free_cb, rule); |
823 | 0 | cls->n_rules--; |
824 | |
|
825 | 0 | return true; |
826 | 0 | } |
827 | | |
828 | | void |
829 | | classifier_remove_assert(struct classifier *cls, |
830 | | const struct cls_rule *cls_rule) |
831 | 0 | { |
832 | 0 | ovs_assert(classifier_remove(cls, cls_rule)); |
833 | 0 | } |
834 | | |
835 | | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
836 | | * subtables which have a prefix match on the trie field, but whose prefix |
837 | | * length is not indicated in 'match_plens'. For example, a subtable that |
838 | | * has a 8-bit trie field prefix match can be skipped if |
839 | | * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits |
840 | | * must be unwildcarded to make datapath flow only match packets it should. */ |
841 | | struct trie_ctx { |
842 | | const struct cls_trie *trie; |
843 | | bool lookup_done; /* Status of the lookup. */ |
844 | | uint8_t be32ofs; /* U32 offset of the field in question. */ |
845 | | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ |
846 | | union trie_prefix match_plens; /* Bitmask of prefix lengths with possible |
847 | | * matches. */ |
848 | | }; |
849 | | |
850 | | static void |
851 | | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) |
852 | 0 | { |
853 | 0 | ctx->trie = trie; |
854 | 0 | ctx->be32ofs = trie->field->flow_be32ofs; |
855 | 0 | ctx->lookup_done = false; |
856 | 0 | } |
857 | | |
858 | | static void |
859 | | insert_conj_flows(struct hmapx *conj_flows, uint32_t id, int priority, |
860 | | struct cls_conjunction_set **soft, size_t n_soft) |
861 | 0 | { |
862 | 0 | struct cls_conjunction_set *conj_set; |
863 | |
|
864 | 0 | if (!conj_flows) { |
865 | 0 | return; |
866 | 0 | } |
867 | | |
868 | 0 | for (size_t i = 0; i < n_soft; i++) { |
869 | 0 | conj_set = soft[i]; |
870 | |
|
871 | 0 | if (conj_set->priority != priority) { |
872 | 0 | continue; |
873 | 0 | } |
874 | | |
875 | 0 | for (size_t j = 0; j < conj_set->n; j++) { |
876 | 0 | if (conj_set->conj[j].id == id) { |
877 | 0 | hmapx_add(conj_flows, (void *) (conj_set->match->cls_rule)); |
878 | 0 | break; |
879 | 0 | } |
880 | 0 | } |
881 | 0 | } |
882 | 0 | } |
883 | | |
884 | | struct conjunctive_match { |
885 | | struct hmap_node hmap_node; |
886 | | uint32_t id; |
887 | | uint64_t clauses; |
888 | | }; |
889 | | |
890 | | static struct conjunctive_match * |
891 | | find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash) |
892 | 0 | { |
893 | 0 | struct conjunctive_match *m; |
894 | |
|
895 | 0 | HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) { |
896 | 0 | if (m->id == id) { |
897 | 0 | return m; |
898 | 0 | } |
899 | 0 | } |
900 | 0 | return NULL; |
901 | 0 | } |
902 | | |
903 | | static bool |
904 | | find_conjunctive_match(const struct cls_conjunction_set *set, |
905 | | unsigned int max_n_clauses, struct hmap *matches, |
906 | | struct conjunctive_match *cm_stubs, size_t n_cm_stubs, |
907 | | uint32_t *idp) |
908 | 0 | { |
909 | 0 | const struct cls_conjunction *c; |
910 | |
|
911 | 0 | if (max_n_clauses < set->min_n_clauses) { |
912 | 0 | return false; |
913 | 0 | } |
914 | | |
915 | 0 | for (c = set->conj; c < &set->conj[set->n]; c++) { |
916 | 0 | struct conjunctive_match *cm; |
917 | 0 | uint32_t hash; |
918 | |
|
919 | 0 | if (c->n_clauses > max_n_clauses) { |
920 | 0 | continue; |
921 | 0 | } |
922 | | |
923 | 0 | hash = hash_int(c->id, 0); |
924 | 0 | cm = find_conjunctive_match__(matches, c->id, hash); |
925 | 0 | if (!cm) { |
926 | 0 | size_t n = hmap_count(matches); |
927 | |
|
928 | 0 | cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm); |
929 | 0 | hmap_insert(matches, &cm->hmap_node, hash); |
930 | 0 | cm->id = c->id; |
931 | 0 | cm->clauses = UINT64_MAX << (c->n_clauses & 63); |
932 | 0 | } |
933 | 0 | cm->clauses |= UINT64_C(1) << c->clause; |
934 | 0 | if (cm->clauses == UINT64_MAX) { |
935 | 0 | *idp = cm->id; |
936 | 0 | return true; |
937 | 0 | } |
938 | 0 | } |
939 | 0 | return false; |
940 | 0 | } |
941 | | |
942 | | static void |
943 | | free_conjunctive_matches(struct hmap *matches, |
944 | | struct conjunctive_match *cm_stubs, size_t n_cm_stubs) |
945 | 0 | { |
946 | 0 | if (hmap_count(matches) > n_cm_stubs) { |
947 | 0 | struct conjunctive_match *cm; |
948 | |
|
949 | 0 | HMAP_FOR_EACH_SAFE (cm, hmap_node, matches) { |
950 | 0 | if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) { |
951 | 0 | free(cm); |
952 | 0 | } |
953 | 0 | } |
954 | 0 | } |
955 | 0 | hmap_destroy(matches); |
956 | 0 | } |
957 | | |
958 | | /* Like classifier_lookup(), except that support for conjunctive matches can be |
959 | | * configured with 'allow_conjunctive_matches'. That feature is not exposed |
960 | | * externally because turning off conjunctive matches is only useful to avoid |
961 | | * recursion within this function itself. |
962 | | * |
963 | | * 'flow' is non-const to allow for temporary modifications during the lookup. |
964 | | * Any changes are restored before returning. |
965 | | * |
966 | | * 'conj_flows' is an optional parameter. If it is non-null, the matching |
967 | | * conjunctive flows are inserted. */ |
968 | | static const struct cls_rule * |
969 | | classifier_lookup__(const struct classifier *cls, ovs_version_t version, |
970 | | struct flow *flow, struct flow_wildcards *wc, |
971 | | bool allow_conjunctive_matches, |
972 | | struct hmapx *conj_flows) |
973 | 0 | { |
974 | 0 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; |
975 | 0 | const struct cls_match *match; |
976 | | /* Highest-priority flow in 'cls' that certainly matches 'flow'. */ |
977 | 0 | const struct cls_match *hard = NULL; |
978 | 0 | int hard_pri = INT_MIN; /* hard ? hard->priority : INT_MIN. */ |
979 | | |
980 | | /* Highest-priority conjunctive flows in 'cls' matching 'flow'. Since |
981 | | * these are (components of) conjunctive flows, we can only know whether |
982 | | * the full conjunctive flow matches after seeing multiple of them. Thus, |
983 | | * we refer to these as "soft matches". */ |
984 | 0 | struct cls_conjunction_set *soft_stub[64]; |
985 | 0 | struct cls_conjunction_set **soft = soft_stub; |
986 | 0 | size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub); |
987 | 0 | int soft_pri = INT_MIN; /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */ |
988 | |
|
989 | 0 | uint32_t n_tries; |
990 | | |
991 | | /* Using memory_order_acquire on cls->n_tries to make sure that all the |
992 | | * configuration changes for these tries are fully visible after the read. |
993 | | * |
994 | | * Trie configuration changes typically happen on startup, but can also |
995 | | * happen in runtime. */ |
996 | 0 | atomic_read_explicit(&CONST_CAST(struct classifier *, cls)->n_tries, |
997 | 0 | &n_tries, memory_order_acquire); |
998 | | |
999 | | /* Initialize trie contexts for find_match_wc(). */ |
1000 | 0 | for (uint32_t i = 0; i < n_tries; i++) { |
1001 | 0 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); |
1002 | 0 | } |
1003 | | |
1004 | | /* Main loop. */ |
1005 | 0 | struct cls_subtable *subtable; |
1006 | 0 | PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri + 1, 2, sizeof *subtable, |
1007 | 0 | &cls->subtables) { |
1008 | 0 | struct cls_conjunction_set *conj_set; |
1009 | | |
1010 | | /* Skip subtables with no match, or where the match is lower-priority |
1011 | | * than some certain match we've already found. */ |
1012 | 0 | match = find_match_wc(subtable, version, flow, trie_ctx, n_tries, wc); |
1013 | 0 | if (!match || match->priority <= hard_pri) { |
1014 | 0 | continue; |
1015 | 0 | } |
1016 | | |
1017 | 0 | conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set); |
1018 | 0 | if (!conj_set) { |
1019 | | /* 'match' isn't part of a conjunctive match. It's the best |
1020 | | * certain match we've got so far, since we know that it's |
1021 | | * higher-priority than hard_pri. |
1022 | | * |
1023 | | * (There might be a higher-priority conjunctive match. We can't |
1024 | | * tell yet.) */ |
1025 | 0 | hard = match; |
1026 | 0 | hard_pri = hard->priority; |
1027 | 0 | } else if (allow_conjunctive_matches) { |
1028 | | /* 'match' is part of a conjunctive match. Add it to the list. */ |
1029 | 0 | if (OVS_UNLIKELY(n_soft >= allocated_soft)) { |
1030 | 0 | struct cls_conjunction_set **old_soft = soft; |
1031 | |
|
1032 | 0 | allocated_soft *= 2; |
1033 | 0 | soft = xmalloc(allocated_soft * sizeof *soft); |
1034 | 0 | memcpy(soft, old_soft, n_soft * sizeof *soft); |
1035 | 0 | if (old_soft != soft_stub) { |
1036 | 0 | free(old_soft); |
1037 | 0 | } |
1038 | 0 | } |
1039 | 0 | soft[n_soft++] = conj_set; |
1040 | | |
1041 | | /* Keep track of the highest-priority soft match. */ |
1042 | 0 | if (soft_pri < match->priority) { |
1043 | 0 | soft_pri = match->priority; |
1044 | 0 | } |
1045 | 0 | } |
1046 | 0 | } |
1047 | | |
1048 | | /* In the common case, at this point we have no soft matches and we can |
1049 | | * return immediately. (We do the same thing if we have potential soft |
1050 | | * matches but none of them are higher-priority than our hard match.) */ |
1051 | 0 | if (hard_pri >= soft_pri) { |
1052 | 0 | if (soft != soft_stub) { |
1053 | 0 | free(soft); |
1054 | 0 | } |
1055 | 0 | return hard ? hard->cls_rule : NULL; |
1056 | 0 | } |
1057 | | |
1058 | | /* At this point, we have some soft matches. We might also have a hard |
1059 | | * match; if so, its priority is lower than the highest-priority soft |
1060 | | * match. */ |
1061 | | |
1062 | | /* Soft match loop. |
1063 | | * |
1064 | | * Check whether soft matches are real matches. */ |
1065 | 0 | for (;;) { |
1066 | | /* Delete soft matches that are null. This only happens in second and |
1067 | | * subsequent iterations of the soft match loop, when we drop back from |
1068 | | * a high-priority soft match to a lower-priority one. |
1069 | | * |
1070 | | * Also, delete soft matches whose priority is less than or equal to |
1071 | | * the hard match's priority. In the first iteration of the soft |
1072 | | * match, these can be in 'soft' because the earlier main loop found |
1073 | | * the soft match before the hard match. In second and later iteration |
1074 | | * of the soft match loop, these can be in 'soft' because we dropped |
1075 | | * back from a high-priority soft match to a lower-priority soft match. |
1076 | | * |
1077 | | * It is tempting to delete soft matches that cannot be satisfied |
1078 | | * because there are fewer soft matches than required to satisfy any of |
1079 | | * their conjunctions, but we cannot do that because there might be |
1080 | | * lower priority soft or hard matches with otherwise identical |
1081 | | * matches. (We could special case those here, but there's no |
1082 | | * need--we'll do so at the bottom of the soft match loop anyway and |
1083 | | * this duplicates less code.) |
1084 | | * |
1085 | | * It's also tempting to break out of the soft match loop if 'n_soft == |
1086 | | * 1' but that would also miss lower-priority hard matches. We could |
1087 | | * special case that also but again there's no need. */ |
1088 | 0 | for (int i = 0; i < n_soft; ) { |
1089 | 0 | if (!soft[i] || soft[i]->priority <= hard_pri) { |
1090 | 0 | soft[i] = soft[--n_soft]; |
1091 | 0 | } else { |
1092 | 0 | i++; |
1093 | 0 | } |
1094 | 0 | } |
1095 | 0 | if (!n_soft) { |
1096 | 0 | break; |
1097 | 0 | } |
1098 | | |
1099 | | /* Find the highest priority among the soft matches. (We know this |
1100 | | * must be higher than the hard match's priority; otherwise we would |
1101 | | * have deleted all of the soft matches in the previous loop.) Count |
1102 | | * the number of soft matches that have that priority. */ |
1103 | 0 | soft_pri = INT_MIN; |
1104 | 0 | int n_soft_pri = 0; |
1105 | 0 | for (int i = 0; i < n_soft; i++) { |
1106 | 0 | if (soft[i]->priority > soft_pri) { |
1107 | 0 | soft_pri = soft[i]->priority; |
1108 | 0 | n_soft_pri = 1; |
1109 | 0 | } else if (soft[i]->priority == soft_pri) { |
1110 | 0 | n_soft_pri++; |
1111 | 0 | } |
1112 | 0 | } |
1113 | 0 | ovs_assert(soft_pri > hard_pri); |
1114 | | |
1115 | | /* Look for a real match among the highest-priority soft matches. |
1116 | | * |
1117 | | * It's unusual to have many conjunctive matches, so we use stubs to |
1118 | | * avoid calling malloc() in the common case. An hmap has a built-in |
1119 | | * stub for up to 2 hmap_nodes; possibly, we would benefit a variant |
1120 | | * with a bigger stub. */ |
1121 | 0 | struct conjunctive_match cm_stubs[16]; |
1122 | 0 | struct hmap matches; |
1123 | |
|
1124 | 0 | hmap_init(&matches); |
1125 | 0 | for (int i = 0; i < n_soft; i++) { |
1126 | 0 | uint32_t id; |
1127 | |
|
1128 | 0 | if (soft[i]->priority == soft_pri |
1129 | 0 | && find_conjunctive_match(soft[i], n_soft_pri, &matches, |
1130 | 0 | cm_stubs, ARRAY_SIZE(cm_stubs), |
1131 | 0 | &id)) { |
1132 | 0 | uint32_t saved_conj_id = flow->conj_id; |
1133 | 0 | const struct cls_rule *rule; |
1134 | |
|
1135 | 0 | flow->conj_id = id; |
1136 | 0 | rule = classifier_lookup__(cls, version, flow, wc, false, |
1137 | 0 | NULL); |
1138 | 0 | flow->conj_id = saved_conj_id; |
1139 | |
|
1140 | 0 | if (rule) { |
1141 | 0 | if (allow_conjunctive_matches) { |
1142 | 0 | insert_conj_flows(conj_flows, id, soft_pri, soft, |
1143 | 0 | n_soft); |
1144 | 0 | } |
1145 | 0 | free_conjunctive_matches(&matches, |
1146 | 0 | cm_stubs, ARRAY_SIZE(cm_stubs)); |
1147 | 0 | if (soft != soft_stub) { |
1148 | 0 | free(soft); |
1149 | 0 | } |
1150 | 0 | return rule; |
1151 | 0 | } |
1152 | 0 | } |
1153 | 0 | } |
1154 | 0 | free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs)); |
1155 | | |
1156 | | /* There's no real match among the highest-priority soft matches. |
1157 | | * However, if any of those soft matches has a lower-priority but |
1158 | | * otherwise identical flow match, then we need to consider those for |
1159 | | * soft or hard matches. |
1160 | | * |
1161 | | * The next iteration of the soft match loop will delete any null |
1162 | | * pointers we put into 'soft' (and some others too). */ |
1163 | 0 | for (int i = 0; i < n_soft; i++) { |
1164 | 0 | if (soft[i]->priority != soft_pri) { |
1165 | 0 | continue; |
1166 | 0 | } |
1167 | | |
1168 | | /* Find next-lower-priority flow with identical flow match. */ |
1169 | 0 | match = next_visible_rule_in_list(soft[i]->match, version); |
1170 | 0 | if (match) { |
1171 | 0 | soft[i] = ovsrcu_get(struct cls_conjunction_set *, |
1172 | 0 | &match->conj_set); |
1173 | 0 | if (!soft[i]) { |
1174 | | /* The flow is a hard match; don't treat as a soft |
1175 | | * match. */ |
1176 | 0 | if (match->priority > hard_pri) { |
1177 | 0 | hard = match; |
1178 | 0 | hard_pri = hard->priority; |
1179 | 0 | } |
1180 | 0 | } |
1181 | 0 | } else { |
1182 | | /* No such lower-priority flow (probably the common case). */ |
1183 | 0 | soft[i] = NULL; |
1184 | 0 | } |
1185 | 0 | } |
1186 | 0 | } |
1187 | | |
1188 | 0 | if (soft != soft_stub) { |
1189 | 0 | free(soft); |
1190 | 0 | } |
1191 | 0 | return hard ? hard->cls_rule : NULL; |
1192 | 0 | } |
1193 | | |
1194 | | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and |
1195 | | * that is visible in 'version'. Returns a null pointer if no rules in 'cls' |
1196 | | * match 'flow'. If multiple rules of equal priority match 'flow', returns one |
1197 | | * arbitrarily. |
1198 | | * |
1199 | | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the |
1200 | | * set of bits that were significant in the lookup. At some point |
1201 | | * earlier, 'wc' should have been initialized (e.g., by |
1202 | | * flow_wildcards_init_catchall()). |
1203 | | * |
1204 | | * 'flow' is non-const to allow for temporary modifications during the lookup. |
1205 | | * Any changes are restored before returning. |
1206 | | * |
1207 | | * 'conj_flows' is an optional parameter. If it is non-null, the matching |
1208 | | * conjunctive flows are inserted. */ |
1209 | | const struct cls_rule * |
1210 | | classifier_lookup(const struct classifier *cls, ovs_version_t version, |
1211 | | struct flow *flow, struct flow_wildcards *wc, |
1212 | | struct hmapx *conj_flows) |
1213 | 0 | { |
1214 | 0 | return classifier_lookup__(cls, version, flow, wc, true, conj_flows); |
1215 | 0 | } |
1216 | | |
1217 | | /* Finds and returns a rule in 'cls' with exactly the same priority and |
1218 | | * matching criteria as 'target', and that is visible in 'version'. |
1219 | | * Only one such rule may ever exist. Returns a null pointer if 'cls' doesn't |
1220 | | * contain an exact match. */ |
1221 | | const struct cls_rule * |
1222 | | classifier_find_rule_exactly(const struct classifier *cls, |
1223 | | const struct cls_rule *target, |
1224 | | ovs_version_t version) |
1225 | 0 | { |
1226 | 0 | const struct cls_match *head, *rule; |
1227 | 0 | const struct cls_subtable *subtable; |
1228 | |
|
1229 | 0 | subtable = find_subtable(cls, target->match.mask); |
1230 | 0 | if (!subtable) { |
1231 | 0 | return NULL; |
1232 | 0 | } |
1233 | | |
1234 | 0 | head = find_equal(subtable, target->match.flow, |
1235 | 0 | miniflow_hash_in_minimask(target->match.flow, |
1236 | 0 | target->match.mask, 0)); |
1237 | 0 | if (!head) { |
1238 | 0 | return NULL; |
1239 | 0 | } |
1240 | 0 | CLS_MATCH_FOR_EACH (rule, head) { |
1241 | 0 | if (rule->priority < target->priority) { |
1242 | 0 | break; /* Not found. */ |
1243 | 0 | } |
1244 | 0 | if (rule->priority == target->priority |
1245 | 0 | && cls_match_visible_in_version(rule, version)) { |
1246 | 0 | return rule->cls_rule; |
1247 | 0 | } |
1248 | 0 | } |
1249 | 0 | return NULL; |
1250 | 0 | } |
1251 | | |
1252 | | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
1253 | | * same matching criteria as 'target', and that is visible in 'version'. |
1254 | | * Returns a null pointer if 'cls' doesn't contain an exact match visible in |
1255 | | * 'version'. */ |
1256 | | const struct cls_rule * |
1257 | | classifier_find_match_exactly(const struct classifier *cls, |
1258 | | const struct match *target, int priority, |
1259 | | ovs_version_t version) |
1260 | 0 | { |
1261 | 0 | const struct cls_rule *retval; |
1262 | 0 | struct cls_rule cr; |
1263 | |
|
1264 | 0 | cls_rule_init(&cr, target, priority); |
1265 | 0 | retval = classifier_find_rule_exactly(cls, &cr, version); |
1266 | 0 | cls_rule_destroy(&cr); |
1267 | |
|
1268 | 0 | return retval; |
1269 | 0 | } |
1270 | | |
1271 | | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
1272 | | * same matching criteria as 'target', and that is visible in 'version'. |
1273 | | * Returns a null pointer if 'cls' doesn't contain an exact match visible in |
1274 | | * 'version'. */ |
1275 | | const struct cls_rule * |
1276 | | classifier_find_minimatch_exactly(const struct classifier *cls, |
1277 | | const struct minimatch *target, int priority, |
1278 | | ovs_version_t version) |
1279 | 0 | { |
1280 | 0 | const struct cls_rule *retval; |
1281 | 0 | struct cls_rule cr; |
1282 | |
|
1283 | 0 | cls_rule_init_from_minimatch(&cr, target, priority); |
1284 | 0 | retval = classifier_find_rule_exactly(cls, &cr, version); |
1285 | 0 | cls_rule_destroy(&cr); |
1286 | |
|
1287 | 0 | return retval; |
1288 | 0 | } |
1289 | | |
1290 | | /* Checks if 'target' would overlap any other rule in 'cls' in 'version'. Two |
1291 | | * rules are considered to overlap if both rules have the same priority and a |
1292 | | * packet could match both, and if both rules are visible in the same version. |
1293 | | * |
1294 | | * A trivial example of overlapping rules is two rules matching disjoint sets |
1295 | | * of fields. E.g., if one rule matches only on port number, while another only |
1296 | | * on dl_type, any packet from that specific port and with that specific |
1297 | | * dl_type could match both, if the rules also have the same priority. */ |
1298 | | bool |
1299 | | classifier_rule_overlaps(const struct classifier *cls, |
1300 | | const struct cls_rule *target, ovs_version_t version) |
1301 | 0 | { |
1302 | 0 | struct cls_subtable *subtable; |
1303 | | |
1304 | | /* Iterate subtables in the descending max priority order. */ |
1305 | 0 | PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority, 2, |
1306 | 0 | sizeof(struct cls_subtable), &cls->subtables) { |
1307 | 0 | struct { |
1308 | 0 | struct minimask mask; |
1309 | 0 | uint64_t storage[FLOW_U64S]; |
1310 | 0 | } m; |
1311 | 0 | const struct cls_rule *rule; |
1312 | |
|
1313 | 0 | minimask_combine(&m.mask, target->match.mask, &subtable->mask, |
1314 | 0 | m.storage); |
1315 | |
|
1316 | 0 | RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { |
1317 | 0 | if (rule->priority == target->priority |
1318 | 0 | && miniflow_equal_in_minimask(target->match.flow, |
1319 | 0 | rule->match.flow, &m.mask) |
1320 | 0 | && cls_rule_visible_in_version(rule, version)) { |
1321 | 0 | return true; |
1322 | 0 | } |
1323 | 0 | } |
1324 | 0 | } |
1325 | 0 | return false; |
1326 | 0 | } |
1327 | | |
1328 | | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more |
1329 | | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this |
1330 | | * function returns true if, for every field: |
1331 | | * |
1332 | | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the |
1333 | | * field, or |
1334 | | * |
1335 | | * - 'criteria' wildcards the field, |
1336 | | * |
1337 | | * Conversely, 'rule' does not match 'criteria' and this function returns false |
1338 | | * if, for at least one field: |
1339 | | * |
1340 | | * - 'criteria' and 'rule' specify different values for the field, or |
1341 | | * |
1342 | | * - 'criteria' specifies a value for the field but 'rule' wildcards it. |
1343 | | * |
1344 | | * Equivalently, the truth table for whether a field matches is: |
1345 | | * |
1346 | | * rule |
1347 | | * |
1348 | | * c wildcard exact |
1349 | | * r +---------+---------+ |
1350 | | * i wild | yes | yes | |
1351 | | * t card | | | |
1352 | | * e +---------+---------+ |
1353 | | * r exact | no |if values| |
1354 | | * i | |are equal| |
1355 | | * a +---------+---------+ |
1356 | | * |
1357 | | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD |
1358 | | * commands and by OpenFlow 1.0 aggregate and flow stats. |
1359 | | * |
1360 | | * Ignores rule->priority. */ |
1361 | | bool |
1362 | | cls_rule_is_loose_match(const struct cls_rule *rule, |
1363 | | const struct minimatch *criteria) |
1364 | 0 | { |
1365 | 0 | return (!minimask_has_extra(rule->match.mask, criteria->mask) |
1366 | 0 | && miniflow_equal_in_minimask(rule->match.flow, criteria->flow, |
1367 | 0 | criteria->mask)); |
1368 | 0 | } |
1369 | | |
1370 | | /* Iteration. */ |
1371 | | |
1372 | | static bool |
1373 | | rule_matches(const struct cls_rule *rule, const struct cls_rule *target, |
1374 | | ovs_version_t version) |
1375 | 0 | { |
1376 | | /* Rule may only match a target if it is visible in target's version. */ |
1377 | 0 | return cls_rule_visible_in_version(rule, version) |
1378 | 0 | && (!target || miniflow_equal_in_minimask(rule->match.flow, |
1379 | 0 | target->match.flow, |
1380 | 0 | target->match.mask)); |
1381 | 0 | } |
1382 | | |
1383 | | static const struct cls_rule * |
1384 | | search_subtable(const struct cls_subtable *subtable, |
1385 | | struct cls_cursor *cursor) |
1386 | 0 | { |
1387 | 0 | if (!cursor->target |
1388 | 0 | || !minimask_has_extra(&subtable->mask, cursor->target->match.mask)) { |
1389 | 0 | const struct cls_rule *rule; |
1390 | |
|
1391 | 0 | RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { |
1392 | 0 | if (rule_matches(rule, cursor->target, cursor->version)) { |
1393 | 0 | return rule; |
1394 | 0 | } |
1395 | 0 | } |
1396 | 0 | } |
1397 | 0 | return NULL; |
1398 | 0 | } |
1399 | | |
1400 | | /* Initializes 'cursor' for iterating through rules in 'cls', and returns the |
1401 | | * cursor. |
1402 | | * |
1403 | | * - If 'target' is null, or if the 'target' is a catchall target, the |
1404 | | * cursor will visit every rule in 'cls' that is visible in 'version'. |
1405 | | * |
1406 | | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
1407 | | * such that cls_rule_is_loose_match(rule, target) returns true and that |
1408 | | * the rule is visible in 'version'. |
1409 | | * |
1410 | | * Ignores target->priority. */ |
1411 | | struct cls_cursor |
1412 | | cls_cursor_start(const struct classifier *cls, const struct cls_rule *target, |
1413 | | ovs_version_t version) |
1414 | 0 | { |
1415 | 0 | struct cls_cursor cursor; |
1416 | 0 | struct cls_subtable *subtable; |
1417 | |
|
1418 | 0 | memset(&cursor, 0x0, sizeof cursor); |
1419 | 0 | cursor.cls = cls; |
1420 | 0 | cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; |
1421 | 0 | cursor.version = version; |
1422 | 0 | cursor.rule = NULL; |
1423 | | |
1424 | | /* Find first rule. */ |
1425 | 0 | PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables, |
1426 | 0 | &cursor.cls->subtables) { |
1427 | 0 | const struct cls_rule *rule = search_subtable(subtable, &cursor); |
1428 | |
|
1429 | 0 | if (rule) { |
1430 | 0 | cursor.subtable = subtable; |
1431 | 0 | cursor.rule = rule; |
1432 | 0 | break; |
1433 | 0 | } |
1434 | 0 | } |
1435 | |
|
1436 | 0 | return cursor; |
1437 | 0 | } |
1438 | | |
1439 | | static const struct cls_rule * |
1440 | | cls_cursor_next(struct cls_cursor *cursor) |
1441 | 0 | { |
1442 | 0 | const struct cls_rule *rule; |
1443 | 0 | const struct cls_subtable *subtable; |
1444 | |
|
1445 | 0 | rule = cursor->rule; |
1446 | 0 | subtable = cursor->subtable; |
1447 | 0 | RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) { |
1448 | 0 | if (rule_matches(rule, cursor->target, cursor->version)) { |
1449 | 0 | return rule; |
1450 | 0 | } |
1451 | 0 | } |
1452 | | |
1453 | 0 | PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) { |
1454 | 0 | rule = search_subtable(subtable, cursor); |
1455 | 0 | if (rule) { |
1456 | 0 | cursor->subtable = subtable; |
1457 | 0 | return rule; |
1458 | 0 | } |
1459 | 0 | } |
1460 | | |
1461 | 0 | return NULL; |
1462 | 0 | } |
1463 | | |
1464 | | /* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration, |
1465 | | * or to null if all matching rules have been visited. */ |
1466 | | void |
1467 | | cls_cursor_advance(struct cls_cursor *cursor) |
1468 | 0 | { |
1469 | 0 | cursor->rule = cls_cursor_next(cursor); |
1470 | 0 | } |
1471 | | |
1472 | | static struct cls_subtable * |
1473 | | find_subtable(const struct classifier *cls, const struct minimask *mask) |
1474 | 0 | { |
1475 | 0 | struct cls_subtable *subtable; |
1476 | |
|
1477 | 0 | CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0), |
1478 | 0 | &cls->subtables_map) { |
1479 | 0 | if (minimask_equal(mask, &subtable->mask)) { |
1480 | 0 | return subtable; |
1481 | 0 | } |
1482 | 0 | } |
1483 | 0 | return NULL; |
1484 | 0 | } |
1485 | | |
1486 | | /* Initializes 'map' with a subset of 'miniflow''s maps that includes only the |
1487 | | * portions with u64-offset 'i' such that 'start' <= i < 'end'. Does not copy |
1488 | | * any data from 'miniflow' to 'map'. */ |
1489 | | static struct flowmap |
1490 | | miniflow_get_map_in_range(const struct miniflow *miniflow, uint8_t start, |
1491 | | uint8_t end) |
1492 | 0 | { |
1493 | 0 | struct flowmap map; |
1494 | 0 | size_t ofs = 0; |
1495 | |
|
1496 | 0 | map = miniflow->map; |
1497 | | |
1498 | | /* Clear the bits before 'start'. */ |
1499 | 0 | while (start >= MAP_T_BITS) { |
1500 | 0 | start -= MAP_T_BITS; |
1501 | 0 | ofs += MAP_T_BITS; |
1502 | 0 | map.bits[start / MAP_T_BITS] = 0; |
1503 | 0 | } |
1504 | 0 | if (start > 0) { |
1505 | 0 | flowmap_clear(&map, ofs, start); |
1506 | 0 | } |
1507 | | |
1508 | | /* Clear the bits starting at 'end'. */ |
1509 | 0 | if (end < FLOW_U64S) { |
1510 | | /* flowmap_clear() can handle at most MAP_T_BITS at a time. */ |
1511 | 0 | ovs_assert(FLOW_U64S - end <= MAP_T_BITS); |
1512 | 0 | flowmap_clear(&map, end, FLOW_U64S - end); |
1513 | 0 | } |
1514 | 0 | return map; |
1515 | 0 | } |
1516 | | |
1517 | | static void |
1518 | | subtable_destroy_cb(struct cls_subtable *subtable) |
1519 | 0 | { |
1520 | 0 | int i; |
1521 | |
|
1522 | 0 | ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie) |
1523 | 0 | == NULL); |
1524 | 0 | ovs_assert(cmap_is_empty(&subtable->rules)); |
1525 | 0 | ovs_assert(rculist_is_empty(&subtable->rules_list)); |
1526 | |
|
1527 | 0 | for (i = 0; i < subtable->n_indices; i++) { |
1528 | 0 | ccmap_destroy(&subtable->indices[i]); |
1529 | 0 | } |
1530 | 0 | cmap_destroy(&subtable->rules); |
1531 | |
|
1532 | 0 | ovsrcu_postpone(free, subtable); |
1533 | 0 | } |
1534 | | |
1535 | | /* The new subtable will be visible to the readers only after this. */ |
1536 | | static struct cls_subtable * |
1537 | | insert_subtable(struct classifier *cls, const struct minimask *mask) |
1538 | 0 | { |
1539 | 0 | uint32_t hash = minimask_hash(mask, 0); |
1540 | 0 | uint32_t i, n_tries, index = 0; |
1541 | 0 | struct cls_subtable *subtable; |
1542 | 0 | struct flowmap stage_map; |
1543 | 0 | uint8_t prev; |
1544 | 0 | size_t count = miniflow_n_values(&mask->masks); |
1545 | |
|
1546 | 0 | subtable = xzalloc(sizeof *subtable + MINIFLOW_VALUES_SIZE(count)); |
1547 | 0 | cmap_init(&subtable->rules); |
1548 | 0 | miniflow_clone(CONST_CAST(struct miniflow *, &subtable->mask.masks), |
1549 | 0 | &mask->masks, count); |
1550 | | |
1551 | | /* Init indices for segmented lookup, if any. */ |
1552 | 0 | prev = 0; |
1553 | 0 | for (i = 0; i < cls->n_flow_segments; i++) { |
1554 | 0 | stage_map = miniflow_get_map_in_range(&mask->masks, prev, |
1555 | 0 | cls->flow_segments[i]); |
1556 | | /* Add an index if it adds mask bits. */ |
1557 | 0 | if (!flowmap_is_empty(stage_map)) { |
1558 | 0 | ccmap_init(&subtable->indices[index]); |
1559 | 0 | *CONST_CAST(struct flowmap *, &subtable->index_maps[index]) |
1560 | 0 | = stage_map; |
1561 | 0 | index++; |
1562 | 0 | } |
1563 | 0 | prev = cls->flow_segments[i]; |
1564 | 0 | } |
1565 | | /* Map for the final stage. */ |
1566 | 0 | *CONST_CAST(struct flowmap *, &subtable->index_maps[index]) |
1567 | 0 | = miniflow_get_map_in_range(&mask->masks, prev, FLOW_U64S); |
1568 | | /* Check if the final stage adds any bits. */ |
1569 | 0 | if (index > 0) { |
1570 | 0 | if (flowmap_is_empty(subtable->index_maps[index])) { |
1571 | | /* Remove the last index, as it has the same fields as the rules |
1572 | | * map. */ |
1573 | 0 | --index; |
1574 | 0 | ccmap_destroy(&subtable->indices[index]); |
1575 | 0 | } |
1576 | 0 | } |
1577 | 0 | *CONST_CAST(uint8_t *, &subtable->n_indices) = index; |
1578 | |
|
1579 | 0 | atomic_read_relaxed(&cls->n_tries, &n_tries); |
1580 | 0 | for (i = 0; i < n_tries; i++) { |
1581 | 0 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, |
1582 | 0 | cls->tries[i].field); |
1583 | 0 | } |
1584 | | |
1585 | | /* Ports trie. */ |
1586 | 0 | ovsrcu_set_hidden(&subtable->ports_trie, NULL); |
1587 | 0 | *CONST_CAST(int *, &subtable->ports_mask_len) |
1588 | 0 | = 32 - ctz32(ntohl(miniflow_get_ports(&mask->masks))); |
1589 | | |
1590 | | /* List of rules. */ |
1591 | 0 | rculist_init(&subtable->rules_list); |
1592 | |
|
1593 | 0 | cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash); |
1594 | |
|
1595 | 0 | return subtable; |
1596 | 0 | } |
1597 | | |
1598 | | /* RCU readers may still access the subtable before it is actually freed. */ |
1599 | | static void |
1600 | | destroy_subtable(struct classifier *cls, struct cls_subtable *subtable) |
1601 | 0 | { |
1602 | 0 | pvector_remove(&cls->subtables, subtable); |
1603 | 0 | cmap_remove(&cls->subtables_map, &subtable->cmap_node, |
1604 | 0 | minimask_hash(&subtable->mask, 0)); |
1605 | |
|
1606 | 0 | ovsrcu_postpone(subtable_destroy_cb, subtable); |
1607 | 0 | } |
1608 | | |
1609 | | static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs); |
1610 | | |
1611 | | /* Return 'true' if can skip rest of the subtable based on the prefix trie |
1612 | | * lookup results. */ |
1613 | | static inline bool |
1614 | | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], uint32_t n_tries, |
1615 | | const unsigned int field_plen[CLS_MAX_TRIES], |
1616 | | const struct flowmap range_map, const struct flow *flow, |
1617 | | struct flow_wildcards *wc) |
1618 | 0 | { |
1619 | 0 | uint32_t j; |
1620 | | |
1621 | | /* Check if we could avoid fully unwildcarding the next level of |
1622 | | * fields using the prefix tries. The trie checks are done only as |
1623 | | * needed to avoid folding in additional bits to the wildcards mask. */ |
1624 | 0 | for (j = 0; j < n_tries; j++) { |
1625 | 0 | struct trie_ctx *ctx = &trie_ctx[j]; |
1626 | | |
1627 | | /* Is the trie field relevant for this subtable, and |
1628 | | * is the trie field within the current range of fields? */ |
1629 | 0 | if (field_plen[j] && flowmap_is_set(&range_map, ctx->be32ofs / 2)) { |
1630 | | /* On-demand trie lookup. */ |
1631 | 0 | if (!ctx->lookup_done) { |
1632 | 0 | memset(&ctx->match_plens, 0, sizeof ctx->match_plens); |
1633 | 0 | ctx->maskbits = trie_lookup(ctx->trie, flow, &ctx->match_plens); |
1634 | 0 | ctx->lookup_done = true; |
1635 | 0 | } |
1636 | | /* Possible to skip the rest of the subtable if subtable's |
1637 | | * prefix on the field is not included in the lookup result. */ |
1638 | 0 | if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) { |
1639 | | /* We want the trie lookup to never result in unwildcarding |
1640 | | * any bits that would not be unwildcarded otherwise. |
1641 | | * Since the trie is shared by the whole classifier, it is |
1642 | | * possible that the 'maskbits' contain bits that are |
1643 | | * irrelevant for the partition relevant for the current |
1644 | | * packet. Hence the checks below. */ |
1645 | | |
1646 | | /* Check that the trie result will not unwildcard more bits |
1647 | | * than this subtable would otherwise. */ |
1648 | 0 | if (ctx->maskbits <= field_plen[j]) { |
1649 | | /* Unwildcard the bits and skip the rest. */ |
1650 | 0 | mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits); |
1651 | | /* Note: Prerequisite already unwildcarded, as the only |
1652 | | * prerequisite of the supported trie lookup fields is |
1653 | | * the ethertype, which is always unwildcarded. */ |
1654 | 0 | return true; |
1655 | 0 | } |
1656 | | /* Can skip if the field is already unwildcarded. */ |
1657 | 0 | if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) { |
1658 | 0 | return true; |
1659 | 0 | } |
1660 | 0 | } |
1661 | 0 | } |
1662 | 0 | } |
1663 | 0 | return false; |
1664 | 0 | } |
1665 | | |
1666 | | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1667 | | * for which 'flow', for which 'mask' has a bit set, specifies a particular |
1668 | | * value has the correct value in 'target'. |
1669 | | * |
1670 | | * This function is equivalent to miniflow_equal_flow_in_minimask(flow, |
1671 | | * target, mask) but this is faster because of the invariant that |
1672 | | * flow->map and mask->masks.map are the same, and that this version |
1673 | | * takes the 'wc'. */ |
1674 | | static inline bool |
1675 | | miniflow_and_mask_matches_flow(const struct miniflow *flow, |
1676 | | const struct minimask *mask, |
1677 | | const struct flow *target) |
1678 | 0 | { |
1679 | 0 | const uint64_t *flowp = miniflow_get_values(flow); |
1680 | 0 | const uint64_t *maskp = miniflow_get_values(&mask->masks); |
1681 | 0 | const uint64_t *target_u64 = (const uint64_t *)target; |
1682 | 0 | map_t map; |
1683 | |
|
1684 | 0 | FLOWMAP_FOR_EACH_MAP (map, mask->masks.map) { |
1685 | 0 | size_t idx; |
1686 | |
|
1687 | 0 | MAP_FOR_EACH_INDEX (idx, map) { |
1688 | 0 | if ((*flowp++ ^ target_u64[idx]) & *maskp++) { |
1689 | 0 | return false; |
1690 | 0 | } |
1691 | 0 | } |
1692 | 0 | target_u64 += MAP_T_BITS; |
1693 | 0 | } |
1694 | 0 | return true; |
1695 | 0 | } |
1696 | | |
1697 | | static inline const struct cls_match * |
1698 | | find_match(const struct cls_subtable *subtable, ovs_version_t version, |
1699 | | const struct flow *flow, uint32_t hash) |
1700 | 0 | { |
1701 | 0 | const struct cls_match *head, *rule; |
1702 | |
|
1703 | 0 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
1704 | 0 | if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow, |
1705 | 0 | &subtable->mask, |
1706 | 0 | flow))) { |
1707 | | /* Return highest priority rule that is visible. */ |
1708 | 0 | CLS_MATCH_FOR_EACH (rule, head) { |
1709 | 0 | if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) { |
1710 | 0 | return rule; |
1711 | 0 | } |
1712 | 0 | } |
1713 | 0 | } |
1714 | 0 | } |
1715 | | |
1716 | 0 | return NULL; |
1717 | 0 | } |
1718 | | |
1719 | | static const struct cls_match * |
1720 | | find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, |
1721 | | const struct flow *flow, struct trie_ctx *trie_ctx, |
1722 | | uint32_t n_tries, struct flow_wildcards *wc) |
1723 | 0 | { |
1724 | 0 | if (OVS_UNLIKELY(!wc)) { |
1725 | 0 | return find_match(subtable, version, flow, |
1726 | 0 | flow_hash_in_minimask(flow, &subtable->mask, 0)); |
1727 | 0 | } |
1728 | | |
1729 | 0 | uint32_t basis = 0, hash; |
1730 | 0 | const struct cls_match *rule = NULL; |
1731 | 0 | struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER; |
1732 | 0 | unsigned int mask_offset = 0; |
1733 | 0 | bool adjust_ports_mask = false; |
1734 | 0 | ovs_be32 ports_mask; |
1735 | 0 | uint32_t i; |
1736 | | |
1737 | | /* Try to finish early by checking fields in segments. */ |
1738 | 0 | for (i = 0; i < subtable->n_indices; i++) { |
1739 | 0 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, |
1740 | 0 | subtable->index_maps[i], flow, wc)) { |
1741 | | /* 'wc' bits for the trie field set, now unwildcard the preceding |
1742 | | * bits used so far. */ |
1743 | 0 | goto no_match; |
1744 | 0 | } |
1745 | | |
1746 | | /* Accumulate the map used so far. */ |
1747 | 0 | stages_map = flowmap_or(stages_map, subtable->index_maps[i]); |
1748 | |
|
1749 | 0 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, |
1750 | 0 | subtable->index_maps[i], |
1751 | 0 | &mask_offset, &basis); |
1752 | |
|
1753 | 0 | if (!ccmap_find(&subtable->indices[i], hash)) { |
1754 | 0 | goto no_match; |
1755 | 0 | } |
1756 | 0 | } |
1757 | | /* Trie check for the final range. */ |
1758 | 0 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, |
1759 | 0 | subtable->index_maps[i], flow, wc)) { |
1760 | 0 | goto no_match; |
1761 | 0 | } |
1762 | | /* Accumulate the map used so far. */ |
1763 | 0 | stages_map = flowmap_or(stages_map, subtable->index_maps[i]); |
1764 | |
|
1765 | 0 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, |
1766 | 0 | subtable->index_maps[i], |
1767 | 0 | &mask_offset, &basis); |
1768 | 0 | rule = find_match(subtable, version, flow, hash); |
1769 | 0 | if (!rule && subtable->ports_mask_len) { |
1770 | | /* The final stage had ports, but there was no match. Instead of |
1771 | | * unwildcarding all the ports bits, use the ports trie to figure out a |
1772 | | * smaller set of bits to unwildcard. */ |
1773 | 0 | unsigned int mbits; |
1774 | 0 | ovs_be32 value, plens; |
1775 | |
|
1776 | 0 | ports_mask = miniflow_get_ports(&subtable->mask.masks); |
1777 | 0 | value = ((OVS_FORCE ovs_be32 *) flow)[TP_PORTS_OFS32] & ports_mask; |
1778 | 0 | mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32); |
1779 | |
|
1780 | 0 | ports_mask &= be32_prefix_mask(mbits); |
1781 | 0 | ports_mask |= ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32]; |
1782 | |
|
1783 | 0 | adjust_ports_mask = true; |
1784 | |
|
1785 | 0 | goto no_match; |
1786 | 0 | } |
1787 | | |
1788 | | /* Must unwildcard all the fields, as they were looked at. */ |
1789 | 0 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1790 | 0 | return rule; |
1791 | | |
1792 | 0 | no_match: |
1793 | | /* Unwildcard the bits in stages so far, as they were used in determining |
1794 | | * there is no match. */ |
1795 | 0 | flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map); |
1796 | 0 | if (adjust_ports_mask) { |
1797 | | /* This has to be done after updating flow wildcards to overwrite |
1798 | | * the ports mask back. We can't simply disable the corresponding bit |
1799 | | * in the stages map, because it has 64-bit resolution, i.e. one |
1800 | | * bit covers not only tp_src/dst, but also ct_tp_src/dst, which are |
1801 | | * not covered by the trie. */ |
1802 | 0 | ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32] = ports_mask; |
1803 | 0 | } |
1804 | 0 | return NULL; |
1805 | 0 | } |
1806 | | |
1807 | | static struct cls_match * |
1808 | | find_equal(const struct cls_subtable *subtable, const struct miniflow *flow, |
1809 | | uint32_t hash) |
1810 | 0 | { |
1811 | 0 | struct cls_match *head; |
1812 | |
|
1813 | 0 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
1814 | 0 | if (miniflow_equal(&head->flow, flow)) { |
1815 | 0 | return head; |
1816 | 0 | } |
1817 | 0 | } |
1818 | 0 | return NULL; |
1819 | 0 | } |
1820 | | |
1821 | | /* A longest-prefix match tree. */ |
1822 | | |
1823 | | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. |
1824 | | * Prefixes are in the network byte order, and the offset 0 corresponds to |
1825 | | * the most significant bit of the first byte. The offset can be read as |
1826 | | * "how many bits to skip from the start of the prefix starting at 'pr'". */ |
1827 | | static uint32_t |
1828 | | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) |
1829 | 0 | { |
1830 | 0 | uint32_t prefix; |
1831 | |
|
1832 | 0 | pr += ofs / 32; /* Where to start. */ |
1833 | 0 | ofs %= 32; /* How many bits to skip at 'pr'. */ |
1834 | |
|
1835 | 0 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ |
1836 | 0 | if (plen > 32 - ofs) { /* Need more than we have already? */ |
1837 | 0 | prefix |= ntohl(*++pr) >> (32 - ofs); |
1838 | 0 | } |
1839 | | /* Return with possible unwanted bits at the end. */ |
1840 | 0 | return prefix; |
1841 | 0 | } |
1842 | | |
1843 | | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit |
1844 | | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 |
1845 | | * corresponds to the most significant bit of the first byte. The offset can |
1846 | | * be read as "how many bits to skip from the start of the prefix starting at |
1847 | | * 'pr'". */ |
1848 | | static uint32_t |
1849 | | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) |
1850 | 0 | { |
1851 | 0 | if (!plen) { |
1852 | 0 | return 0; |
1853 | 0 | } |
1854 | 0 | if (plen > TRIE_PREFIX_BITS) { |
1855 | 0 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ |
1856 | 0 | } |
1857 | | /* Return with unwanted bits cleared. */ |
1858 | 0 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); |
1859 | 0 | } |
1860 | | |
1861 | | /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value' |
1862 | | * starting at "MSB 0"-based offset 'ofs'. */ |
1863 | | static unsigned int |
1864 | | prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[], |
1865 | | unsigned int ofs) |
1866 | 0 | { |
1867 | 0 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits); |
1868 | | /* Set the bit after the relevant bits to limit the result. */ |
1869 | 0 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits)); |
1870 | 0 | } |
1871 | | |
1872 | | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length |
1873 | | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ |
1874 | | static unsigned int |
1875 | | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], |
1876 | | unsigned int ofs, unsigned int plen) |
1877 | 0 | { |
1878 | 0 | return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs), |
1879 | 0 | prefix, ofs); |
1880 | 0 | } |
1881 | | |
1882 | | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can |
1883 | | * be greater than 31. */ |
1884 | | static unsigned int |
1885 | | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) |
1886 | 0 | { |
1887 | 0 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; |
1888 | 0 | } |
1889 | | |
1890 | | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must |
1891 | | * be between 0 and 31, inclusive. */ |
1892 | | static unsigned int |
1893 | | get_bit_at(const uint32_t prefix, unsigned int ofs) |
1894 | 0 | { |
1895 | 0 | return (prefix >> (31 - ofs)) & 1u; |
1896 | 0 | } |
1897 | | |
1898 | | /* Create new branch. */ |
1899 | | static struct trie_node * |
1900 | | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, |
1901 | | unsigned int n_rules) |
1902 | 0 | { |
1903 | 0 | struct trie_node *node = xmalloc(sizeof *node); |
1904 | |
|
1905 | 0 | node->prefix = trie_get_prefix(prefix, ofs, plen); |
1906 | |
|
1907 | 0 | if (plen <= TRIE_PREFIX_BITS) { |
1908 | 0 | node->n_bits = plen; |
1909 | 0 | ovsrcu_set_hidden(&node->edges[0], NULL); |
1910 | 0 | ovsrcu_set_hidden(&node->edges[1], NULL); |
1911 | 0 | node->n_rules = n_rules; |
1912 | 0 | } else { /* Need intermediate nodes. */ |
1913 | 0 | struct trie_node *subnode = trie_branch_create(prefix, |
1914 | 0 | ofs + TRIE_PREFIX_BITS, |
1915 | 0 | plen - TRIE_PREFIX_BITS, |
1916 | 0 | n_rules); |
1917 | 0 | int bit = get_bit_at(subnode->prefix, 0); |
1918 | 0 | node->n_bits = TRIE_PREFIX_BITS; |
1919 | 0 | ovsrcu_set_hidden(&node->edges[bit], subnode); |
1920 | 0 | ovsrcu_set_hidden(&node->edges[!bit], NULL); |
1921 | 0 | node->n_rules = 0; |
1922 | 0 | } |
1923 | 0 | return node; |
1924 | 0 | } |
1925 | | |
1926 | | static void |
1927 | | trie_node_destroy(const struct trie_node *node) |
1928 | 0 | { |
1929 | 0 | ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node)); |
1930 | 0 | } |
1931 | | |
1932 | | /* Copy a trie node for modification and postpone delete the old one. */ |
1933 | | static struct trie_node * |
1934 | | trie_node_rcu_realloc(const struct trie_node *node) |
1935 | 0 | { |
1936 | 0 | struct trie_node *new_node = xmalloc(sizeof *node); |
1937 | |
|
1938 | 0 | *new_node = *node; |
1939 | 0 | trie_node_destroy(node); |
1940 | |
|
1941 | 0 | return new_node; |
1942 | 0 | } |
1943 | | |
1944 | | static void |
1945 | | trie_destroy__(rcu_trie_ptr *trie) |
1946 | 0 | { |
1947 | 0 | struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie); |
1948 | |
|
1949 | 0 | if (node) { |
1950 | 0 | ovsrcu_set_hidden(trie, NULL); |
1951 | 0 | trie_destroy__(&node->edges[0]); |
1952 | 0 | trie_destroy__(&node->edges[1]); |
1953 | 0 | trie_node_destroy(node); |
1954 | 0 | } |
1955 | 0 | } |
1956 | | |
1957 | | static void |
1958 | | trie_destroy(struct cls_trie *trie) |
1959 | 0 | { |
1960 | 0 | if (!trie) { |
1961 | 0 | return; |
1962 | 0 | } |
1963 | | |
1964 | 0 | trie_destroy__(&trie->root); |
1965 | 0 | trie->field = NULL; |
1966 | 0 | } |
1967 | | |
1968 | | static bool |
1969 | | trie_is_leaf(const struct trie_node *trie) |
1970 | 0 | { |
1971 | | /* No children? */ |
1972 | 0 | return !ovsrcu_get(struct trie_node *, &trie->edges[0]) |
1973 | 0 | && !ovsrcu_get(struct trie_node *, &trie->edges[1]); |
1974 | 0 | } |
1975 | | |
1976 | | static void |
1977 | | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, |
1978 | | unsigned int n_bits) |
1979 | 0 | { |
1980 | 0 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; |
1981 | 0 | unsigned int i; |
1982 | |
|
1983 | 0 | for (i = 0; i < n_bits / 32; i++) { |
1984 | 0 | mask[i] = OVS_BE32_MAX; |
1985 | 0 | } |
1986 | 0 | if (n_bits % 32) { |
1987 | 0 | mask[i] |= htonl(~0u << (32 - n_bits % 32)); |
1988 | 0 | } |
1989 | 0 | } |
1990 | | |
1991 | | static bool |
1992 | | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, |
1993 | | unsigned int n_bits) |
1994 | 0 | { |
1995 | 0 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; |
1996 | 0 | unsigned int i; |
1997 | 0 | ovs_be32 zeroes = 0; |
1998 | |
|
1999 | 0 | for (i = 0; i < n_bits / 32; i++) { |
2000 | 0 | zeroes |= ~mask[i]; |
2001 | 0 | } |
2002 | 0 | if (n_bits % 32) { |
2003 | 0 | zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32)); |
2004 | 0 | } |
2005 | |
|
2006 | 0 | return !zeroes; /* All 'n_bits' bits set. */ |
2007 | 0 | } |
2008 | | |
2009 | | static rcu_trie_ptr * |
2010 | | trie_next_edge(struct trie_node *node, const ovs_be32 value[], |
2011 | | unsigned int ofs) |
2012 | 0 | { |
2013 | 0 | return node->edges + be_get_bit_at(value, ofs); |
2014 | 0 | } |
2015 | | |
2016 | | static const struct trie_node * |
2017 | | trie_next_node(const struct trie_node *node, const ovs_be32 value[], |
2018 | | unsigned int ofs) |
2019 | 0 | { |
2020 | 0 | return ovsrcu_get(struct trie_node *, |
2021 | 0 | &node->edges[be_get_bit_at(value, ofs)]); |
2022 | 0 | } |
2023 | | |
2024 | | /* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31. |
2025 | | */ |
2026 | | static void |
2027 | | be_set_bit_at(ovs_be32 value[], unsigned int ofs) |
2028 | 0 | { |
2029 | 0 | ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8); |
2030 | 0 | } |
2031 | | |
2032 | | /* Returns the number of bits in the prefix mask necessary to determine a |
2033 | | * mismatch, in case there are longer prefixes in the tree below the one that |
2034 | | * matched. |
2035 | | * '*plens' will have a bit set for each prefix length that may have matching |
2036 | | * rules. The caller is responsible for clearing the '*plens' prior to |
2037 | | * calling this. |
2038 | | */ |
2039 | | static unsigned int |
2040 | | trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[], |
2041 | | ovs_be32 plens[], unsigned int n_bits) |
2042 | 0 | { |
2043 | 0 | const struct trie_node *prev = NULL; |
2044 | 0 | const struct trie_node *node = ovsrcu_get(struct trie_node *, trie); |
2045 | 0 | unsigned int match_len = 0; /* Number of matching bits. */ |
2046 | |
|
2047 | 0 | for (; node; prev = node, node = trie_next_node(node, value, match_len)) { |
2048 | 0 | unsigned int eqbits; |
2049 | | /* Check if this edge can be followed. */ |
2050 | 0 | eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, |
2051 | 0 | match_len); |
2052 | 0 | match_len += eqbits; |
2053 | 0 | if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */ |
2054 | | /* Bit at offset 'match_len' differed. */ |
2055 | 0 | return match_len + 1; /* Includes the first mismatching bit. */ |
2056 | 0 | } |
2057 | | /* Full match, check if rules exist at this prefix length. */ |
2058 | 0 | if (node->n_rules > 0) { |
2059 | 0 | be_set_bit_at(plens, match_len - 1); |
2060 | 0 | } |
2061 | 0 | if (match_len >= n_bits) { |
2062 | 0 | return n_bits; /* Full prefix. */ |
2063 | 0 | } |
2064 | 0 | } |
2065 | | /* node == NULL. Full match so far, but we tried to follow an |
2066 | | * non-existing branch. Need to exclude the other branch if it exists |
2067 | | * (it does not if we were called on an empty trie or 'prev' is a leaf |
2068 | | * node). */ |
2069 | 0 | return !prev || trie_is_leaf(prev) ? match_len : match_len + 1; |
2070 | 0 | } |
2071 | | |
2072 | | static unsigned int |
2073 | | trie_lookup(const struct cls_trie *trie, const struct flow *flow, |
2074 | | union trie_prefix *plens) |
2075 | 0 | { |
2076 | 0 | const struct mf_field *mf = trie->field; |
2077 | | |
2078 | | /* Check that current flow matches the prerequisites for the trie |
2079 | | * field. Some match fields are used for multiple purposes, so we |
2080 | | * must check that the trie is relevant for this flow. */ |
2081 | 0 | if (mf_are_prereqs_ok(mf, flow, NULL)) { |
2082 | 0 | return trie_lookup_value(&trie->root, |
2083 | 0 | &((ovs_be32 *)flow)[mf->flow_be32ofs], |
2084 | 0 | &plens->be32, mf->n_bits); |
2085 | 0 | } |
2086 | 0 | memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */ |
2087 | 0 | return 0; /* Value not used in this case. */ |
2088 | 0 | } |
2089 | | |
2090 | | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. |
2091 | | * Returns the u32 offset to the miniflow data in '*miniflow_index', if |
2092 | | * 'miniflow_index' is not NULL. */ |
2093 | | static unsigned int |
2094 | | minimask_get_prefix_len(const struct minimask *minimask, |
2095 | | const struct mf_field *mf) |
2096 | 0 | { |
2097 | 0 | unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ |
2098 | 0 | uint8_t be32_ofs = mf->flow_be32ofs; |
2099 | 0 | uint8_t be32_end = be32_ofs + mf->n_bytes / 4; |
2100 | |
|
2101 | 0 | for (; be32_ofs < be32_end; ++be32_ofs) { |
2102 | 0 | uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs)); |
2103 | | |
2104 | | /* Validate mask, count the mask length. */ |
2105 | 0 | if (mask_tz) { |
2106 | 0 | if (mask) { |
2107 | 0 | return 0; /* No bits allowed after mask ended. */ |
2108 | 0 | } |
2109 | 0 | } else { |
2110 | 0 | if (~mask & (~mask + 1)) { |
2111 | 0 | return 0; /* Mask not contiguous. */ |
2112 | 0 | } |
2113 | 0 | mask_tz = ctz32(mask); |
2114 | 0 | n_bits += 32 - mask_tz; |
2115 | 0 | } |
2116 | 0 | } |
2117 | | |
2118 | 0 | return n_bits; |
2119 | 0 | } |
2120 | | |
2121 | | /* |
2122 | | * This is called only when mask prefix is known to be CIDR and non-zero. |
2123 | | * Relies on the fact that the flow and mask have the same map, and since |
2124 | | * the mask is CIDR, the storage for the flow field exists even if it |
2125 | | * happened to be zeros. |
2126 | | */ |
2127 | | static const ovs_be32 * |
2128 | | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) |
2129 | 0 | { |
2130 | 0 | size_t u64_ofs = mf->flow_be32ofs / 2; |
2131 | |
|
2132 | 0 | return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs) |
2133 | 0 | + (mf->flow_be32ofs & 1); |
2134 | 0 | } |
2135 | | |
2136 | | /* Insert rule in to the prefix tree. |
2137 | | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask |
2138 | | * in 'rule'. */ |
2139 | | static void |
2140 | | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) |
2141 | 0 | { |
2142 | 0 | trie_insert_prefix(&trie->root, |
2143 | 0 | minimatch_get_prefix(&rule->match, trie->field), mlen); |
2144 | 0 | } |
2145 | | |
2146 | | static void |
2147 | | trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen) |
2148 | 0 | { |
2149 | 0 | struct trie_node *node; |
2150 | 0 | int ofs = 0; |
2151 | | |
2152 | | /* Walk the tree. */ |
2153 | 0 | for (; (node = ovsrcu_get_protected(struct trie_node *, edge)); |
2154 | 0 | edge = trie_next_edge(node, prefix, ofs)) { |
2155 | 0 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); |
2156 | 0 | ofs += eqbits; |
2157 | 0 | if (eqbits < node->n_bits) { |
2158 | | /* Mismatch, new node needs to be inserted above. */ |
2159 | 0 | int old_branch = get_bit_at(node->prefix, eqbits); |
2160 | 0 | struct trie_node *new_parent; |
2161 | |
|
2162 | 0 | new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits, |
2163 | 0 | ofs == mlen ? 1 : 0); |
2164 | | /* Copy the node to modify it. */ |
2165 | 0 | node = trie_node_rcu_realloc(node); |
2166 | | /* Adjust the new node for its new position in the tree. */ |
2167 | 0 | node->prefix <<= eqbits; |
2168 | 0 | node->n_bits -= eqbits; |
2169 | 0 | ovsrcu_set_hidden(&new_parent->edges[old_branch], node); |
2170 | | |
2171 | | /* Check if need a new branch for the new rule. */ |
2172 | 0 | if (ofs < mlen) { |
2173 | 0 | ovsrcu_set_hidden(&new_parent->edges[!old_branch], |
2174 | 0 | trie_branch_create(prefix, ofs, mlen - ofs, |
2175 | 0 | 1)); |
2176 | 0 | } |
2177 | 0 | ovsrcu_set(edge, new_parent); /* Publish changes. */ |
2178 | 0 | return; |
2179 | 0 | } |
2180 | | /* Full match so far. */ |
2181 | | |
2182 | 0 | if (ofs == mlen) { |
2183 | | /* Full match at the current node, rule needs to be added here. */ |
2184 | 0 | node->n_rules++; |
2185 | 0 | return; |
2186 | 0 | } |
2187 | 0 | } |
2188 | | /* Must insert a new tree branch for the new rule. */ |
2189 | 0 | ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1)); |
2190 | 0 | } |
2191 | | |
2192 | | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask |
2193 | | * in 'rule'. */ |
2194 | | static void |
2195 | | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) |
2196 | 0 | { |
2197 | 0 | trie_remove_prefix(&trie->root, |
2198 | 0 | minimatch_get_prefix(&rule->match, trie->field), mlen); |
2199 | 0 | } |
2200 | | |
2201 | | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask |
2202 | | * in 'rule'. */ |
2203 | | static void |
2204 | | trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen) |
2205 | 0 | { |
2206 | 0 | struct trie_node *node; |
2207 | 0 | rcu_trie_ptr *edges[sizeof(union trie_prefix) * CHAR_BIT]; |
2208 | 0 | int depth = 0, ofs = 0; |
2209 | | |
2210 | | /* Walk the tree. */ |
2211 | 0 | for (edges[0] = root; |
2212 | 0 | (node = ovsrcu_get_protected(struct trie_node *, edges[depth])); |
2213 | 0 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { |
2214 | 0 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); |
2215 | |
|
2216 | 0 | if (eqbits < node->n_bits) { |
2217 | | /* Mismatch, nothing to be removed. This should never happen, as |
2218 | | * only rules in the classifier are ever removed. */ |
2219 | 0 | break; /* Log a warning. */ |
2220 | 0 | } |
2221 | | /* Full match so far. */ |
2222 | 0 | ofs += eqbits; |
2223 | |
|
2224 | 0 | if (ofs == mlen) { |
2225 | | /* Full prefix match at the current node, remove rule here. */ |
2226 | 0 | if (!node->n_rules) { |
2227 | 0 | break; /* Log a warning. */ |
2228 | 0 | } |
2229 | 0 | node->n_rules--; |
2230 | | |
2231 | | /* Check if can prune the tree. */ |
2232 | 0 | while (!node->n_rules) { |
2233 | 0 | struct trie_node *next, |
2234 | 0 | *edge0 = ovsrcu_get_protected(struct trie_node *, |
2235 | 0 | &node->edges[0]), |
2236 | 0 | *edge1 = ovsrcu_get_protected(struct trie_node *, |
2237 | 0 | &node->edges[1]); |
2238 | |
|
2239 | 0 | if (edge0 && edge1) { |
2240 | 0 | break; /* A branching point, cannot prune. */ |
2241 | 0 | } |
2242 | | |
2243 | | /* Else have at most one child node, remove this node. */ |
2244 | 0 | next = edge0 ? edge0 : edge1; |
2245 | |
|
2246 | 0 | if (next) { |
2247 | 0 | if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) { |
2248 | 0 | break; /* Cannot combine. */ |
2249 | 0 | } |
2250 | 0 | next = trie_node_rcu_realloc(next); /* Modify. */ |
2251 | | |
2252 | | /* Combine node with next. */ |
2253 | 0 | next->prefix = node->prefix | next->prefix >> node->n_bits; |
2254 | 0 | next->n_bits += node->n_bits; |
2255 | 0 | } |
2256 | | /* Update the parent's edge. */ |
2257 | 0 | ovsrcu_set(edges[depth], next); /* Publish changes. */ |
2258 | 0 | trie_node_destroy(node); |
2259 | |
|
2260 | 0 | if (next || !depth) { |
2261 | | /* Branch not pruned or at root, nothing more to do. */ |
2262 | 0 | break; |
2263 | 0 | } |
2264 | 0 | node = ovsrcu_get_protected(struct trie_node *, |
2265 | 0 | edges[--depth]); |
2266 | 0 | } |
2267 | 0 | return; |
2268 | 0 | } |
2269 | 0 | } |
2270 | | /* Cannot go deeper. This should never happen, since only rules |
2271 | | * that actually exist in the classifier are ever removed. */ |
2272 | 0 | } |
2273 | | |
2274 | | |
2275 | | #define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb) |
2276 | | |
2277 | | void |
2278 | | cls_match_free_cb(struct cls_match *rule) |
2279 | 0 | { |
2280 | 0 | ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON); |
2281 | 0 | free(rule); |
2282 | 0 | } |