Total coverage: 376474 (19%)of 2007629
52 9 37 13 2 7 14 95 418 9094 662 8 8 9 25 406 53 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NODEMASK_H #define __LINUX_NODEMASK_H /* * Nodemasks provide a bitmap suitable for representing the * set of Node's in a system, one bit position per Node number. * * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * * For details of nodemask_parse_user(), see bitmap_parse_user() in * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in * lib/bitmap.c. * * The available nodemask operations are: * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * * int nodes_equal(mask1, mask2) Does mask1 == mask2? * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? * int nodes_empty(mask) Is mask empty (no bits sets)? * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * * int node_random(mask) Random node with set bit in mask * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * * node_set_online(node) set bit 'node' in node_online_map * node_set_offline(node) clear bit 'node' in node_online_map * * for_each_node(node) for-loop node over node_possible_map * for_each_online_node(node) for-loop node over node_online_map * * Subtlety: * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway) * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. * * NODEMASK_SCRATCH * When doing above logical AND, OR, XOR, Remap operations the callers tend to * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper * for such situations. See below and CPUMASK_ALLOC also. */ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; /** * nodemask_pr_args - printf args to output a nodemask * @maskp: nodemask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not * inlined we will end up with a section mismatch error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline */ #define node_set(node, dst) __node_set((node), &(dst)) static __always_inline void __node_set(int node, volatile nodemask_t *dstp) { set_bit(node, dstp->bits); } #define node_clear(node, dst) __node_clear((node), &(dst)) static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } /* No static inline type checking - see Subtlety (1) above. */ #define node_isset(node, nodemask) test_bit((node), (nodemask).bits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_copy(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_copy(dstp->bits, srcp->bits, nbits); } #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); } #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); } #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); } #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* * Find the next present node in src, starting after node n, wrapping around to * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); return ret; } static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); } #define nodemask_of_node(node) \ ({ \ typeof(_unused_nodemask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL << (node); \ } else { \ init_nodemask_of_node(&m, (node)); \ } \ m; \ }) #define first_unset_node(mask) __first_unset_node(&(mask)) static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } #define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) #if MAX_NUMNODES <= BITS_PER_LONG #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #else #define NODE_MASK_ALL \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #endif #define NODE_MASK_NONE \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ } }) #define nodes_addr(src) ((src).bits) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); } #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); } #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* * Bitmasks that are kept for all the nodes. */ enum node_states { N_POSSIBLE, /* The node could become online at some point */ N_ONLINE, /* The node is online */ N_NORMAL_MEMORY, /* The node has regular memory */ #ifdef CONFIG_HIGHMEM N_HIGH_MEMORY, /* The node has regular or high memory */ #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */ NR_NODE_STATES }; /* * The following particular system nodemasks and operations * on them manage all possible and online nodes. */ extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } #define for_each_node_state(__node, __state) \ for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } #else static __always_inline int node_state(int node, enum node_states state) { return node == 0; } static __always_inline void node_set_state(int node, enum node_states state) { } static __always_inline void node_clear_state(int node, enum node_states state) { } static __always_inline int num_node_state(enum node_states state) { return 1; } #define for_each_node_state(node, __state) \ for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) #endif static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int node = find_random_bit(maskp->bits, MAX_NUMNODES); return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif } #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) #define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU) /* * For nodemask scratch area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ #if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */ #define NODEMASK_ALLOC(type, name, gfp_flags) \ type *name = kmalloc(sizeof(*name), gfp_flags) #define NODEMASK_FREE(m) kfree(m) #else #define NODEMASK_ALLOC(type, name, gfp_flags) type _##name, *name = &_##name #define NODEMASK_FREE(m) do {} while (0) #endif /* Example structure for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; }; #define NODEMASK_SCRATCH(x) \ NODEMASK_ALLOC(struct nodemask_scratch, x, \ GFP_KERNEL | __GFP_NORETRY) #define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) #endif /* __LINUX_NODEMASK_H */
5517 5520 6 1 6 2 4 1 5 1 4 2 2 1 2 2 3 2 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 7 3 1 2 1 1 1 3 2 2 2 788 458 457 788 11 11 1 293 17 783 788 103 293 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm_device.c - IPsec device offloading code. * * Copyright (c) 2015 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/errno.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> #include <net/gso.h> #include <net/xfrm.h> #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len - x->props.enc_hdr_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); int phlen = 0; if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); if (x->sel.family != AF_INET6) { phlen = IPV4_BEET_PHMAXLEN; if (x->outer_mode.family == AF_INET6) phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr); } pskb_pull(skb, skb->mac_len + hsize + (x->props.header_len - phlen)); } /* Adjust pointers into the packet when IPsec is done at layer2 */ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_TRANSPORT: if (x->outer_mode.family == AF_INET) return __xfrm_transport_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_transport_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_BEET: if (x->outer_mode.family == AF_INET) return __xfrm_mode_beet_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_beet_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: break; } } static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); __u32 seq = xo->seq.low; seq += skb_shinfo(skb)->gso_segs; if (unlikely(seq < xo->seq.low)) return true; return false; } struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; unsigned long flags; struct xfrm_state *x; struct softnet_data *sd; struct sk_buff *skb2, *nskb, *pskb = NULL; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct net_device *dev = skb->dev; struct sec_path *sp; if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) return skb; /* The packet was sent to HW IPsec packet offload engine, * but to wrong device. Drop the packet, so it won't skip * XFRM stack. */ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); err = !skb_queue_empty(&sd->xfrm_backlog); local_irq_restore(flags); if (err) { *again = true; return skb; } if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); segs = skb_gso_segment(skb, esp_features); if (IS_ERR(segs)) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } else { consume_skb(skb); skb = segs; } } if (!skb->next) { esp_features |= skb->dev->gso_partial_features; xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; err = x->type_offload->xmit(x, skb, esp_features); if (err) { if (err == -EINPROGRESS) return NULL; XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); kfree_skb(skb); return NULL; } skb_push(skb, skb->data - skb_mac_header(skb)); return skb; } skb_list_walk_safe(skb, skb2, nskb) { esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { skb2->next = nskb; } else if (err != -EINPROGRESS) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); skb2->next = nskb; kfree_skb_list(skb2); return NULL; } else { if (skb == skb2) skb = nskb; else pskb->next = nskb; continue; } skb_push(skb2, skb2->data - skb_mac_header(skb2)); pskb = skb2; } return skb; } EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; struct net_device *dev; struct xfrm_dev_offload *xso = &x->xso; xfrm_address_t *saddr; xfrm_address_t *daddr; bool is_packet_offload; if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); return -EINVAL; } if (xuo->flags & XFRM_OFFLOAD_INBOUND && x->if_id) { NL_SET_ERR_MSG(extack, "XFRM if_id is not supported in RX path"); return -EINVAL; } is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support TFC padding. */ if (x->tfcpad) { NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { struct xfrm_dst_lookup_params params; if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { saddr = &x->props.saddr; daddr = &x->id.daddr; } else { saddr = &x->id.daddr; daddr = &x->props.saddr; } memset(&params, 0, sizeof(params)); params.net = net; params.saddr = saddr; params.daddr = daddr; params.mark = xfrm_smark_get(0, x); dst = __xfrm_dst_lookup(x->props.family, &params); if (IS_ERR(dst)) return (is_packet_offload) ? -EINVAL : 0; dev = dst->dev; dev_hold(dev); dst_release(dst); } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { xso->dev = NULL; dev_put(dev); return (is_packet_offload) ? -EINVAL : 0; } if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; } if (!x->type_offload) { NL_SET_ERR_MSG(extack, "Type doesn't support offload"); dev_put(dev); return -EINVAL; } xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; else xso->dir = XFRM_DEV_OFFLOAD_OUT; if (is_packet_offload) xso->type = XFRM_DEV_OFFLOAD_PACKET; else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; err = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, extack); if (err) { xso->dev = NULL; xso->dir = 0; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xfrm_unset_type_offload(x); /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. */ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) { NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state"); return err; } } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_state_add); int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack) { struct xfrm_dev_offload *xdo = &xp->xdo; struct net_device *dev; int err; if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) { /* We support only packet offload mode and it means * that user must set XFRM_OFFLOAD_PACKET bit. */ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) return -EINVAL; if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) { xdo->dev = NULL; dev_put(dev); NL_SET_ERR_MSG(extack, "Policy offload is not supported"); return -EINVAL; } xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: xdo->dir = XFRM_DEV_OFFLOAD_IN; break; case XFRM_POLICY_OUT: xdo->dir = XFRM_DEV_OFFLOAD_OUT; break; case XFRM_POLICY_FWD: xdo->dir = XFRM_DEV_OFFLOAD_FWD; break; default: xdo->dev = NULL; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG(extack, "Unrecognized offload direction"); return -EINVAL; } err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy"); return err; } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_policy_add); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { int mtu; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; bool check_tunnel_size; if (!x->type_offload || (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; if ((!dev || dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } return false; ok: if (!dev) return true; check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (skb_dst(skb)->ops->family) { case AF_INET: /* Check for IPv4 options */ if (ip_hdr(skb)->ihl != 5) return false; if (check_tunnel_size && xfrm4_tunnel_check_size(skb)) return false; break; case AF_INET6: /* Check for IPv6 extensions */ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) return false; if (check_tunnel_size && xfrm6_tunnel_check_size(skb)) return false; break; default: break; } if (dev->xfrmdev_ops->xdo_dev_offload_ok) return dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); void xfrm_dev_resume(struct sk_buff *skb) { struct net_device *dev = skb->dev; int ret = NETDEV_TX_BUSY; struct netdev_queue *txq; struct softnet_data *sd; unsigned long flags; rcu_read_lock(); txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); if (!dev_xmit_complete(ret)) { local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); skb_queue_tail(&sd->xfrm_backlog, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_dev_resume); void xfrm_dev_backlog(struct softnet_data *sd) { struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; struct sk_buff_head list; struct sk_buff *skb; if (skb_queue_empty(xfrm_backlog)) return; __skb_queue_head_init(&list); spin_lock(&xfrm_backlog->lock); skb_queue_splice_init(xfrm_backlog, &list); spin_unlock(&xfrm_backlog->lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); xfrm_dev_resume(skb); } } #endif static int xfrm_api_check(struct net_device *dev) { #ifdef CONFIG_XFRM_OFFLOAD if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && !(dev->features & NETIF_F_HW_ESP)) return NOTIFY_BAD; if ((dev->features & NETIF_F_HW_ESP) && (!(dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_add && dev->xfrmdev_ops->xdo_dev_state_delete))) return NOTIFY_BAD; #else if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) return NOTIFY_BAD; #endif return NOTIFY_DONE; } static int xfrm_dev_down(struct net_device *dev) { if (dev->features & NETIF_F_HW_ESP) { xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_dev_policy_flush(dev_net(dev), dev, true); } return NOTIFY_DONE; } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: return xfrm_api_check(dev); case NETDEV_FEAT_CHANGE: return xfrm_api_check(dev); case NETDEV_DOWN: case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); }
5 2 5 5 2 2 5 1 2 2 2 2 5 1 5 4 4 2 2 1 1 3 4 4 2 2 2 2 2 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/gen_stats.h> #include <linux/jhash.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/slab.h> #include <net/gen_stats.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_RATEEST.h> #include <net/netfilter/xt_rateest.h> #define RATEEST_HSIZE 16 struct xt_rateest_net { struct mutex hash_lock; struct hlist_head hash[RATEEST_HSIZE]; }; static unsigned int xt_rateest_id; static unsigned int jhash_rnd __read_mostly; static unsigned int xt_rateest_hash(const char *name) { return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) & (RATEEST_HSIZE - 1); } static void xt_rateest_hash_insert(struct xt_rateest_net *xn, struct xt_rateest *est) { unsigned int h; h = xt_rateest_hash(est->name); hlist_add_head(&est->list, &xn->hash[h]); } static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn, const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); hlist_for_each_entry(est, &xn->hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; return est; } } return NULL; } struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); struct xt_rateest *est; mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, name); mutex_unlock(&xn->hash_lock); return est; } EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct net *net, struct xt_rateest *est) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); mutex_lock(&xn->hash_lock); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' */ kfree_rcu(est, rcu); } mutex_unlock(&xn->hash_lock); } EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic_sync *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); u64_stats_add(&stats->bytes, skb->len); u64_stats_inc(&stats->packets); spin_unlock_bh(&info->est->lock); return XT_CONTINUE; } static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id); struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { struct nlattr opt; struct gnet_estimator est; } cfg; int ret; if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) return -ENAMETOOLONG; net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, info->name); if (est) { mutex_unlock(&xn->hash_lock); /* * If estimator parameters are specified, they must match the * existing estimator. */ if ((!info->interval && !info->ewma_log) || (info->interval != est->params.interval || info->ewma_log != est->params.ewma_log)) { xt_rateest_put(par->net, est); return -EINVAL; } info->est = est; return 0; } ret = -ENOMEM; est = kzalloc(sizeof(*est), GFP_KERNEL); if (!est) goto err1; gnet_stats_basic_sync_init(&est->bstats); strscpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; est->params.interval = info->interval; est->params.ewma_log = info->ewma_log; cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est)); cfg.opt.nla_type = TCA_STATS_RATE_EST; cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; info->est = est; xt_rateest_hash_insert(xn, est); mutex_unlock(&xn->hash_lock); return 0; err2: kfree(est); err1: mutex_unlock(&xn->hash_lock); return ret; } static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { struct xt_rateest_target_info *info = par->targinfo; xt_rateest_put(par->net, info->est); } static struct xt_target xt_rateest_tg_reg[] __read_mostly = { { .name = "RATEEST", .revision = 0, .family = NFPROTO_IPV4, .target = xt_rateest_tg, .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "RATEEST", .revision = 0, .family = NFPROTO_IPV6, .target = xt_rateest_tg, .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }, #endif }; static __net_init int xt_rateest_net_init(struct net *net) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); int i; mutex_init(&xn->hash_lock); for (i = 0; i < ARRAY_SIZE(xn->hash); i++) INIT_HLIST_HEAD(&xn->hash[i]); return 0; } static struct pernet_operations xt_rateest_net_ops = { .init = xt_rateest_net_init, .id = &xt_rateest_id, .size = sizeof(struct xt_rateest_net), }; static int __init xt_rateest_tg_init(void) { int err = register_pernet_subsys(&xt_rateest_net_ops); if (err) return err; return xt_register_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg)); } static void __exit xt_rateest_tg_fini(void) { xt_unregister_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg)); unregister_pernet_subsys(&xt_rateest_net_ops); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: packet rate estimator"); MODULE_ALIAS("ipt_RATEEST"); MODULE_ALIAS("ip6t_RATEEST"); module_init(xt_rateest_tg_init); module_exit(xt_rateest_tg_fini);
39 39 39 29 38 2 2 39 39 39 39 39 39 2 39 1 3 1 39 38 39 39 39 2 2 1 1 2 1 2 2 2 2 2 1 2 1 1 2 39 6 5 1 1 6 6 8 1 1 5 1 1 1 1 2 2 11 11 11 11 9 10 10 9 10 10 10 10 4 9 5 10 6 6 5 4 6 6 6 6 5 5 1 5 3 4 4 4 4 4 4 4 4 4 8 8 8 8 8 1 8 1 11 4 4 4 3 3 1 1 4 2 2 1 2 1 2 1 2 1 1 2 1 1 1 3 3 2 1 1 1 1 2 2 2 2 3 1 1 1 1 1 4 3 11 11 5 5 2 1 1 3 4 3 3 3 3 2 2 1 1 2 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 // SPDX-License-Identifier: GPL-2.0-only /* * binfmt_misc.c * * Copyright (C) 1997 Richard Günther * * binfmt_misc detects binaries via a magic or filename extension and invokes * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/magic.h> #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/ctype.h> #include <linux/string_helpers.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/syscalls.h> #include <linux/fs.h> #include <linux/uaccess.h> #include "internal.h" #ifdef DEBUG # define USE_DEBUG 1 #else # define USE_DEBUG 0 #endif enum { VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ }; enum {Enabled, Magic}; #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) #define MISC_FMT_OPEN_BINARY (1UL << 30) #define MISC_FMT_CREDENTIALS (1UL << 29) #define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; unsigned long flags; /* type, status, etc. */ int offset; /* offset of magic */ int size; /* size of magic/mask */ char *magic; /* magic or filename extension */ char *mask; /* mask, NULL for exact match */ const char *interpreter; /* filename of interpreter */ char *name; struct dentry *dentry; struct file *interp_file; refcount_t users; /* sync removal with load_misc_binary() */ } Node; static struct file_system_type bm_fs_type; /* * Max length of the register string. Determined by: * - 7 delimiters * - name: ~50 bytes * - type: 1 byte * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE) * - magic: 128 bytes (512 in escaped form) * - mask: 128 bytes (512 in escaped form) * - interp: ~50 bytes * - flags: 5 bytes * Round that up a bit, and then back off to hold the internal data * (like struct Node). */ #define MAX_REGISTER_LENGTH 1920 /** * search_binfmt_handler - search for a binary handler for @bprm * @misc: handle to binfmt_misc instance * @bprm: binary for which we are looking for a handler * * Search for a binary type handler for @bprm in the list of registered binary * type handlers. * * Return: binary type list entry on success, NULL on failure */ static Node *search_binfmt_handler(struct binfmt_misc *misc, struct linux_binprm *bprm) { char *p = strrchr(bprm->interp, '.'); Node *e; /* Walk all the registered handlers. */ list_for_each_entry(e, &misc->entries, list) { char *s; int j; /* Make sure this one is currently enabled. */ if (!test_bit(Enabled, &e->flags)) continue; /* Do matching based on extension if applicable. */ if (!test_bit(Magic, &e->flags)) { if (p && !strcmp(e->magic, p + 1)) return e; continue; } /* Do matching based on magic & mask. */ s = bprm->buf + e->offset; if (e->mask) { for (j = 0; j < e->size; j++) if ((*s++ ^ e->magic[j]) & e->mask[j]) break; } else { for (j = 0; j < e->size; j++) if ((*s++ ^ e->magic[j])) break; } if (j == e->size) return e; } return NULL; } /** * get_binfmt_handler - try to find a binary type handler * @misc: handle to binfmt_misc instance * @bprm: binary for which we are looking for a handler * * Try to find a binfmt handler for the binary type. If one is found take a * reference to protect against removal via bm_{entry,status}_write(). * * Return: binary type list entry on success, NULL on failure */ static Node *get_binfmt_handler(struct binfmt_misc *misc, struct linux_binprm *bprm) { Node *e; read_lock(&misc->entries_lock); e = search_binfmt_handler(misc, bprm); if (e) refcount_inc(&e->users); read_unlock(&misc->entries_lock); return e; } /** * put_binfmt_handler - put binary handler node * @e: node to put * * Free node syncing with load_misc_binary() and defer final free to * load_misc_binary() in case it is using the binary type handler we were * requested to remove. */ static void put_binfmt_handler(Node *e) { if (refcount_dec_and_test(&e->users)) { if (e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); kfree(e); } } /** * load_binfmt_misc - load the binfmt_misc of the caller's user namespace * * To be called in load_misc_binary() to load the relevant struct binfmt_misc. * If a user namespace doesn't have its own binfmt_misc mount it can make use * of its ancestor's binfmt_misc handlers. This mimicks the behavior of * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where * available to all user and user namespaces on the system. * * Return: the binfmt_misc instance of the caller's user namespace */ static struct binfmt_misc *load_binfmt_misc(void) { const struct user_namespace *user_ns; struct binfmt_misc *misc; user_ns = current_user_ns(); while (user_ns) { /* Pairs with smp_store_release() in bm_fill_super(). */ misc = smp_load_acquire(&user_ns->binfmt_misc); if (misc) return misc; user_ns = user_ns->parent; } return &init_binfmt_misc; } /* * the loader itself */ static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; struct file *interp_file = NULL; int retval = -ENOEXEC; struct binfmt_misc *misc; misc = load_binfmt_misc(); if (!misc->enabled) return retval; fmt = get_binfmt_handler(misc, bprm); if (!fmt) return retval; /* Need to be able to load the file after exec */ retval = -ENOENT; if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) goto ret; if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) { bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0; } else { retval = remove_arg_zero(bprm); if (retval) goto ret; } if (fmt->flags & MISC_FMT_OPEN_BINARY) bprm->have_execfd = 1; /* make argv[1] be the path to the binary */ retval = copy_string_kernel(bprm->interp, bprm); if (retval < 0) goto ret; bprm->argc++; /* add the interp as argv[0] */ retval = copy_string_kernel(fmt->interpreter, bprm); if (retval < 0) goto ret; bprm->argc++; /* Update interp in case binfmt_script needs it. */ retval = bprm_change_interp(fmt->interpreter, bprm); if (retval < 0) goto ret; if (fmt->flags & MISC_FMT_OPEN_FILE) { interp_file = file_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { interp_file = open_exec(fmt->interpreter); } retval = PTR_ERR(interp_file); if (IS_ERR(interp_file)) goto ret; bprm->interpreter = interp_file; if (fmt->flags & MISC_FMT_CREDENTIALS) bprm->execfd_creds = 1; retval = 0; ret: /* * If we actually put the node here all concurrent calls to * load_misc_binary() will have finished. We also know * that for the refcount to be zero someone must have concurently * removed the binary type handler from the list and it's our job to * free it. */ put_binfmt_handler(fmt); return retval; } /* Command parsers */ /* * parses and copies one argument enclosed in del from *sp to *dp, * recognising the \x special. * returns pointer to the copied argument or NULL in case of an * error (and sets err) or null argument length. */ static char *scanarg(char *s, char del) { char c; while ((c = *s++) != del) { if (c == '\\' && *s == 'x') { s++; if (!isxdigit(*s++)) return NULL; if (!isxdigit(*s++)) return NULL; } } s[-1] ='\0'; return s; } static char *check_special_flags(char *sfs, Node *e) { char *p = sfs; int cont = 1; /* special flags */ while (cont) { switch (*p) { case 'P': pr_debug("register: flag: P (preserve argv0)\n"); p++; e->flags |= MISC_FMT_PRESERVE_ARGV0; break; case 'O': pr_debug("register: flag: O (open binary)\n"); p++; e->flags |= MISC_FMT_OPEN_BINARY; break; case 'C': pr_debug("register: flag: C (preserve creds)\n"); p++; /* this flags also implies the open-binary flag */ e->flags |= (MISC_FMT_CREDENTIALS | MISC_FMT_OPEN_BINARY); break; case 'F': pr_debug("register: flag: F: open interpreter file now\n"); p++; e->flags |= MISC_FMT_OPEN_FILE; break; default: cont = 0; } } return p; } /* * This registers a new binary format, it recognises the syntax * ':name:type:offset:magic:mask:interpreter:flags' * where the ':' is the IFS, that can be chosen with the first char */ static Node *create_entry(const char __user *buffer, size_t count) { Node *e; int memsize, err; char *buf, *p; char del; pr_debug("register: received %zu bytes\n", count); /* some sanity checks */ err = -EINVAL; if ((count < 11) || (count > MAX_REGISTER_LENGTH)) goto out; err = -ENOMEM; memsize = sizeof(Node) + count + 8; e = kmalloc(memsize, GFP_KERNEL_ACCOUNT); if (!e) goto out; p = buf = (char *)e + sizeof(Node); memset(e, 0, sizeof(Node)); if (copy_from_user(buf, buffer, count)) goto efault; del = *p++; /* delimeter */ pr_debug("register: delim: %#x {%c}\n", del, del); /* Pad the buffer with the delim to simplify parsing below. */ memset(buf + count, del, 8); /* Parse the 'name' field. */ e->name = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->name[0] || !strcmp(e->name, ".") || !strcmp(e->name, "..") || strchr(e->name, '/')) goto einval; pr_debug("register: name: {%s}\n", e->name); /* Parse the 'type' field. */ switch (*p++) { case 'E': pr_debug("register: type: E (extension)\n"); e->flags = 1 << Enabled; break; case 'M': pr_debug("register: type: M (magic)\n"); e->flags = (1 << Enabled) | (1 << Magic); break; default: goto einval; } if (*p++ != del) goto einval; if (test_bit(Magic, &e->flags)) { /* Handle the 'M' (magic) format. */ char *s; /* Parse the 'offset' field. */ s = strchr(p, del); if (!s) goto einval; *s = '\0'; if (p != s) { int r = kstrtoint(p, 10, &e->offset); if (r != 0 || e->offset < 0) goto einval; } p = s; if (*p++) goto einval; pr_debug("register: offset: %#x\n", e->offset); /* Parse the 'magic' field. */ e->magic = p; p = scanarg(p, del); if (!p) goto einval; if (!e->magic[0]) goto einval; if (USE_DEBUG) print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[raw]: ", DUMP_PREFIX_NONE, e->magic, p - e->magic); /* Parse the 'mask' field. */ e->mask = p; p = scanarg(p, del); if (!p) goto einval; if (!e->mask[0]) { e->mask = NULL; pr_debug("register: mask[raw]: none\n"); } else if (USE_DEBUG) print_hex_dump_bytes( KBUILD_MODNAME ": register: mask[raw]: ", DUMP_PREFIX_NONE, e->mask, p - e->mask); /* * Decode the magic & mask fields. * Note: while we might have accepted embedded NUL bytes from * above, the unescape helpers here will stop at the first one * it encounters. */ e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX); if (e->mask && string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) goto einval; if (e->size > BINPRM_BUF_SIZE || BINPRM_BUF_SIZE - e->size < e->offset) goto einval; pr_debug("register: magic/mask length: %i\n", e->size); if (USE_DEBUG) { print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[decoded]: ", DUMP_PREFIX_NONE, e->magic, e->size); if (e->mask) { int i; char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT); print_hex_dump_bytes( KBUILD_MODNAME ": register: mask[decoded]: ", DUMP_PREFIX_NONE, e->mask, e->size); if (masked) { for (i = 0; i < e->size; ++i) masked[i] = e->magic[i] & e->mask[i]; print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[masked]: ", DUMP_PREFIX_NONE, masked, e->size); kfree(masked); } } } } else { /* Handle the 'E' (extension) format. */ /* Skip the 'offset' field. */ p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; /* Parse the 'magic' field. */ e->magic = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->magic[0] || strchr(e->magic, '/')) goto einval; pr_debug("register: extension: {%s}\n", e->magic); /* Skip the 'mask' field. */ p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; } /* Parse the 'interpreter' field. */ e->interpreter = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->interpreter[0]) goto einval; pr_debug("register: interpreter: {%s}\n", e->interpreter); /* Parse the 'flags' field. */ p = check_special_flags(p, e); if (*p == '\n') p++; if (p != buf + count) goto einval; return e; out: return ERR_PTR(err); efault: kfree(e); return ERR_PTR(-EFAULT); einval: kfree(e); return ERR_PTR(-EINVAL); } /* * Set status of entry/binfmt_misc: * '1' enables, '0' disables and '-1' clears entry/binfmt_misc */ static int parse_command(const char __user *buffer, size_t count) { char s[4]; if (count > 3) return -EINVAL; if (copy_from_user(s, buffer, count)) return -EFAULT; if (!count) return 0; if (s[count - 1] == '\n') count--; if (count == 1 && s[0] == '0') return 1; if (count == 1 && s[0] == '1') return 2; if (count == 2 && s[0] == '-' && s[1] == '1') return 3; return -EINVAL; } /* generic stuff */ static void entry_status(Node *e, char *page) { char *dp = page; const char *status = "disabled"; if (test_bit(Enabled, &e->flags)) status = "enabled"; if (!VERBOSE_STATUS) { sprintf(page, "%s\n", status); return; } dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter); /* print the special flags */ dp += sprintf(dp, "flags: "); if (e->flags & MISC_FMT_PRESERVE_ARGV0) *dp++ = 'P'; if (e->flags & MISC_FMT_OPEN_BINARY) *dp++ = 'O'; if (e->flags & MISC_FMT_CREDENTIALS) *dp++ = 'C'; if (e->flags & MISC_FMT_OPEN_FILE) *dp++ = 'F'; *dp++ = '\n'; if (!test_bit(Magic, &e->flags)) { sprintf(dp, "extension .%s\n", e->magic); } else { dp += sprintf(dp, "offset %i\nmagic ", e->offset); dp = bin2hex(dp, e->magic, e->size); if (e->mask) { dp += sprintf(dp, "\nmask "); dp = bin2hex(dp, e->mask, e->size); } *dp++ = '\n'; *dp = '\0'; } } static struct inode *bm_get_inode(struct super_block *sb, int mode) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); } return inode; } /** * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode * @inode: inode of the relevant binfmt_misc instance * * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can * be done without any memory barriers because we are guaranteed that * user_ns->binfmt_misc is fully initialized. It was fully initialized when the * binfmt_misc mount was first created. * * Return: struct binfmt_misc of the relevant binfmt_misc instance */ static struct binfmt_misc *i_binfmt_misc(struct inode *inode) { return inode->i_sb->s_user_ns->binfmt_misc; } /** * bm_evict_inode - cleanup data associated with @inode * @inode: inode to which the data is attached * * Cleanup the binary type handler data associated with @inode if a binary type * entry is removed or the filesystem is unmounted and the super block is * shutdown. * * If the ->evict call was not caused by a super block shutdown but by a write * to remove the entry or all entries via bm_{entry,status}_write() the entry * will have already been removed from the list. We keep the list_empty() check * to make that explicit. */ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; clear_inode(inode); if (e) { struct binfmt_misc *misc; misc = i_binfmt_misc(inode); write_lock(&misc->entries_lock); if (!list_empty(&e->list)) list_del_init(&e->list); write_unlock(&misc->entries_lock); put_binfmt_handler(e); } } /** * remove_binfmt_handler - remove a binary type handler * @misc: handle to binfmt_misc instance * @e: binary type handler to remove * * Remove a binary type handler from the list of binary type handlers and * remove its associated dentry. This is called from * binfmt_{entry,status}_write(). In the future, we might want to think about * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's * to use writes to files in order to delete binary type handlers. But it has * worked for so long that it's not a pressing issue. */ static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) { write_lock(&misc->entries_lock); list_del_init(&e->list); write_unlock(&misc->entries_lock); locked_recursive_removal(e->dentry, NULL); } /* /<entry> */ static ssize_t bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { Node *e = file_inode(file)->i_private; ssize_t res; char *page; page = (char *) __get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; entry_status(e, page); res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page)); free_page((unsigned long) page); return res; } static ssize_t bm_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); Node *e = inode->i_private; int res = parse_command(buffer, count); switch (res) { case 1: /* Disable this handler. */ clear_bit(Enabled, &e->flags); break; case 2: /* Enable this handler. */ set_bit(Enabled, &e->flags); break; case 3: /* Delete this handler. */ inode = d_inode(inode->i_sb->s_root); inode_lock_nested(inode, I_MUTEX_PARENT); /* * In order to add new element or remove elements from the list * via bm_{entry,register,status}_write() inode_lock() on the * root inode must be held. * The lock is exclusive ensuring that the list can't be * modified. Only load_misc_binary() can access but does so * read-only. So we only need to take the write lock when we * actually remove the entry from the list. */ if (!list_empty(&e->list)) remove_binfmt_handler(i_binfmt_misc(inode), e); inode_unlock(inode); break; default: return res; } return count; } static const struct file_operations bm_entry_operations = { .read = bm_entry_read, .write = bm_entry_write, .llseek = default_llseek, }; /* /register */ static ssize_t bm_register_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { Node *e; struct inode *inode; struct super_block *sb = file_inode(file)->i_sb; struct dentry *root = sb->s_root, *dentry; struct binfmt_misc *misc; int err = 0; struct file *f = NULL; e = create_entry(buffer, count); if (IS_ERR(e)) return PTR_ERR(e); if (e->flags & MISC_FMT_OPEN_FILE) { /* * Now that we support unprivileged binfmt_misc mounts make * sure we use the credentials that the register @file was * opened with to also open the interpreter. Before that this * didn't matter much as only a privileged process could open * the register file. */ scoped_with_creds(file->f_cred) f = open_exec(e->interpreter); if (IS_ERR(f)) { pr_notice("register: failed to install interpreter file %s\n", e->interpreter); kfree(e); return PTR_ERR(f); } e->interp_file = f; } inode_lock(d_inode(root)); dentry = lookup_noperm(&QSTR(e->name), root); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out; err = -EEXIST; if (d_really_is_positive(dentry)) goto out2; inode = bm_get_inode(sb, S_IFREG | 0644); err = -ENOMEM; if (!inode) goto out2; refcount_set(&e->users, 1); e->dentry = dget(dentry); inode->i_private = e; inode->i_fop = &bm_entry_operations; d_instantiate(dentry, inode); misc = i_binfmt_misc(inode); write_lock(&misc->entries_lock); list_add(&e->list, &misc->entries); write_unlock(&misc->entries_lock); err = 0; out2: dput(dentry); out: inode_unlock(d_inode(root)); if (err) { if (f) { exe_file_allow_write_access(f); filp_close(f, NULL); } kfree(e); return err; } return count; } static const struct file_operations bm_register_operations = { .write = bm_register_write, .llseek = noop_llseek, }; /* /status */ static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct binfmt_misc *misc; char *s; misc = i_binfmt_misc(file_inode(file)); s = misc->enabled ? "enabled\n" : "disabled\n"; return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } static ssize_t bm_status_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct binfmt_misc *misc; int res = parse_command(buffer, count); Node *e, *next; struct inode *inode; misc = i_binfmt_misc(file_inode(file)); switch (res) { case 1: /* Disable all handlers. */ misc->enabled = false; break; case 2: /* Enable all handlers. */ misc->enabled = true; break; case 3: /* Delete all handlers. */ inode = d_inode(file_inode(file)->i_sb->s_root); inode_lock_nested(inode, I_MUTEX_PARENT); /* * In order to add new element or remove elements from the list * via bm_{entry,register,status}_write() inode_lock() on the * root inode must be held. * The lock is exclusive ensuring that the list can't be * modified. Only load_misc_binary() can access but does so * read-only. So we only need to take the write lock when we * actually remove the entry from the list. */ list_for_each_entry_safe(e, next, &misc->entries, list) remove_binfmt_handler(misc, e); inode_unlock(inode); break; default: return res; } return count; } static const struct file_operations bm_status_operations = { .read = bm_status_read, .write = bm_status_write, .llseek = default_llseek, }; /* Superblock handling */ static void bm_put_super(struct super_block *sb) { struct user_namespace *user_ns = sb->s_fs_info; sb->s_fs_info = NULL; put_user_ns(user_ns); } static const struct super_operations s_ops = { .statfs = simple_statfs, .evict_inode = bm_evict_inode, .put_super = bm_put_super, }; static int bm_fill_super(struct super_block *sb, struct fs_context *fc) { int err; struct user_namespace *user_ns = sb->s_user_ns; struct binfmt_misc *misc; static const struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} }; if (WARN_ON(user_ns != current_user_ns())) return -EINVAL; /* * Lazily allocate a new binfmt_misc instance for this namespace, i.e. * do it here during the first mount of binfmt_misc. We don't need to * waste memory for every user namespace allocation. It's likely much * more common to not mount a separate binfmt_misc instance than it is * to mount one. * * While multiple superblocks can exist they are keyed by userns in * s_fs_info for binfmt_misc. Hence, the vfs guarantees that * bm_fill_super() is called exactly once whenever a binfmt_misc * superblock for a userns is created. This in turn lets us conclude * that when a binfmt_misc superblock is created for the first time for * a userns there's no one racing us. Therefore we don't need any * barriers when we dereference binfmt_misc. */ misc = user_ns->binfmt_misc; if (!misc) { /* * If it turns out that most user namespaces actually want to * register their own binary type handler and therefore all * create their own separate binfmt_misc mounts we should * consider turning this into a kmem cache. */ misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL); if (!misc) return -ENOMEM; INIT_LIST_HEAD(&misc->entries); rwlock_init(&misc->entries_lock); /* Pairs with smp_load_acquire() in load_binfmt_misc(). */ smp_store_release(&user_ns->binfmt_misc, misc); } /* * When the binfmt_misc superblock for this userns is shutdown * ->enabled might have been set to false and we don't reinitialize * ->enabled again in put_super() as someone might already be mounting * binfmt_misc again. It also would be pointless since by the time * ->put_super() is called we know that the binary type list for this * bintfmt_misc mount is empty making load_misc_binary() return * -ENOEXEC independent of whether ->enabled is true. Instead, if * someone mounts binfmt_misc for the first time or again we simply * reset ->enabled to true. */ misc->enabled = true; err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); if (!err) sb->s_op = &s_ops; return err; } static void bm_free(struct fs_context *fc) { if (fc->s_fs_info) put_user_ns(fc->s_fs_info); } static int bm_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns)); } static const struct fs_context_operations bm_context_ops = { .free = bm_free, .get_tree = bm_get_tree, }; static int bm_init_fs_context(struct fs_context *fc) { fc->ops = &bm_context_ops; return 0; } static struct linux_binfmt misc_format = { .module = THIS_MODULE, .load_binary = load_misc_binary, }; static struct file_system_type bm_fs_type = { .owner = THIS_MODULE, .name = "binfmt_misc", .init_fs_context = bm_init_fs_context, .fs_flags = FS_USERNS_MOUNT, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("binfmt_misc"); static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) insert_binfmt(&misc_format); return err; } static void __exit exit_misc_binfmt(void) { unregister_binfmt(&misc_format); unregister_filesystem(&bm_fs_type); } core_initcall(init_misc_binfmt); module_exit(exit_misc_binfmt); MODULE_DESCRIPTION("Kernel support for miscellaneous binaries"); MODULE_LICENSE("GPL");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/sysctl.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/in.h> #include <linux/if_ether.h> #include <linux/slab.h> #include <asm/io.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/arp.h> #include <net/ax25.h> #include <net/rose.h> static int rose_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); if (daddr) memcpy(buff + 7, daddr, dev->addr_len); *buff++ = ROSE_GFI | ROSE_Q_BIT; *buff++ = 0x00; *buff++ = ROSE_DATA; *buff++ = 0x7F; *buff++ = AX25_P_IP; if (daddr != NULL) return 37; return -37; } static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; int err; if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) return 0; if (dev->flags & IFF_UP) { err = rose_add_loopback_node((rose_address *)sa->sa_data); if (err) return err; rose_del_loopback_node((const rose_address *)dev->dev_addr); } dev_addr_set(dev, sa->sa_data); return 0; } static int rose_open(struct net_device *dev) { int err; err = rose_add_loopback_node((const rose_address *)dev->dev_addr); if (err) return err; netif_start_queue(dev); return 0; } static int rose_close(struct net_device *dev) { netif_stop_queue(dev); rose_del_loopback_node((const rose_address *)dev->dev_addr); return 0; } static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; unsigned int len = skb->len; if (!netif_running(dev)) { printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); return NETDEV_TX_BUSY; } if (!rose_route_frame(skb, NULL)) { dev_kfree_skb(skb); stats->tx_errors++; return NETDEV_TX_OK; } stats->tx_packets++; stats->tx_bytes += len; return NETDEV_TX_OK; } static const struct header_ops rose_header_ops = { .create = rose_header, }; static const struct net_device_ops rose_netdev_ops = { .ndo_open = rose_open, .ndo_stop = rose_close, .ndo_start_xmit = rose_xmit, .ndo_set_mac_address = rose_set_mac_address, }; void rose_setup(struct net_device *dev) { dev->mtu = ROSE_MAX_PACKET_SIZE - 2; dev->netdev_ops = &rose_netdev_ops; dev->header_ops = &rose_header_ops; dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; dev->addr_len = ROSE_ADDR_LEN; dev->type = ARPHRD_ROSE; /* New-style flags. */ dev->flags = IFF_NOARP; }
13 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2024 Google LLC * * dbitmap - dynamically sized bitmap library. * * Used by the binder driver to optimize the allocation of the smallest * available descriptor ID. Each bit in the bitmap represents the state * of an ID. * * A dbitmap can grow or shrink as needed. This part has been designed * considering that users might need to briefly release their locks in * order to allocate memory for the new bitmap. These operations then, * are verified to determine if the grow or shrink is sill valid. * * This library does not provide protection against concurrent access * by itself. Binder uses the proc->outer_lock for this purpose. */ #ifndef _LINUX_DBITMAP_H #define _LINUX_DBITMAP_H #include <linux/bitmap.h> #define NBITS_MIN BITS_PER_TYPE(unsigned long) struct dbitmap { unsigned int nbits; unsigned long *map; }; static inline int dbitmap_enabled(struct dbitmap *dmap) { return !!dmap->nbits; } static inline void dbitmap_free(struct dbitmap *dmap) { dmap->nbits = 0; kfree(dmap->map); dmap->map = NULL; } /* Returns the nbits that a dbitmap can shrink to, 0 if not possible. */ static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) { unsigned int bit; if (dmap->nbits <= NBITS_MIN) return 0; /* * Determine if the bitmap can shrink based on the position of * its last set bit. If the bit is within the first quarter of * the bitmap then shrinking is possible. In this case, the * bitmap should shrink to half its current size. */ bit = find_last_bit(dmap->map, dmap->nbits); if (bit < (dmap->nbits >> 2)) return dmap->nbits >> 1; /* find_last_bit() returns dmap->nbits when no bits are set. */ if (bit == dmap->nbits) return NBITS_MIN; return 0; } /* Replace the internal bitmap with a new one of different size */ static inline void dbitmap_replace(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) { bitmap_copy(new, dmap->map, min(dmap->nbits, nbits)); kfree(dmap->map); dmap->map = new; dmap->nbits = nbits; } static inline void dbitmap_shrink(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) { if (!new) return; /* * Verify that shrinking to @nbits is still possible. The @new * bitmap might have been allocated without locks, so this call * could now be outdated. In this case, free @new and move on. */ if (!dbitmap_enabled(dmap) || dbitmap_shrink_nbits(dmap) != nbits) { kfree(new); return; } dbitmap_replace(dmap, new, nbits); } /* Returns the nbits that a dbitmap can grow to. */ static inline unsigned int dbitmap_grow_nbits(struct dbitmap *dmap) { return dmap->nbits << 1; } static inline void dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) { /* * Verify that growing to @nbits is still possible. The @new * bitmap might have been allocated without locks, so this call * could now be outdated. In this case, free @new and move on. */ if (!dbitmap_enabled(dmap) || nbits <= dmap->nbits) { kfree(new); return; } /* * Check for ENOMEM after confirming the grow operation is still * required. This ensures we only disable the dbitmap when it's * necessary. Once the dbitmap is disabled, binder will fallback * to slow_desc_lookup_olocked(). */ if (!new) { dbitmap_free(dmap); return; } dbitmap_replace(dmap, new, nbits); } /* * Finds and sets the next zero bit in the bitmap. Upon success @bit * is populated with the index and 0 is returned. Otherwise, -ENOSPC * is returned to indicate that a dbitmap_grow() is needed. */ static inline int dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, unsigned long *bit) { unsigned long n; n = find_next_zero_bit(dmap->map, dmap->nbits, offset); if (n == dmap->nbits) return -ENOSPC; *bit = n; set_bit(n, dmap->map); return 0; } static inline void dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) { clear_bit(bit, dmap->map); } static inline int dbitmap_init(struct dbitmap *dmap) { dmap->map = bitmap_zalloc(NBITS_MIN, GFP_KERNEL); if (!dmap->map) { dmap->nbits = 0; return -ENOMEM; } dmap->nbits = NBITS_MIN; return 0; } #endif
3 782 779 6 6 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 // SPDX-License-Identifier: GPL-2.0-only /* * * Authors: * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de> */ #include <linux/if_arp.h> #include <linux/module.h> #include <net/6lowpan.h> #include <net/addrconf.h> #include "6lowpan_i.h" int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { int i, ret; switch (lltype) { case LOWPAN_LLTYPE_IEEE802154: dev->addr_len = EUI64_ADDR_LEN; break; case LOWPAN_LLTYPE_BTLE: dev->addr_len = ETH_ALEN; break; } dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; lowpan_dev(dev)->lltype = lltype; spin_lock_init(&lowpan_dev(dev)->ctx.lock); for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) lowpan_dev(dev)->ctx.table[i].id = i; dev->ndisc_ops = &lowpan_ndisc_ops; ret = register_netdevice(dev); if (ret < 0) return ret; lowpan_dev_debugfs_init(dev); return ret; } EXPORT_SYMBOL(lowpan_register_netdevice); int lowpan_register_netdev(struct net_device *dev, enum lowpan_lltypes lltype) { int ret; rtnl_lock(); ret = lowpan_register_netdevice(dev, lltype); rtnl_unlock(); return ret; } EXPORT_SYMBOL(lowpan_register_netdev); void lowpan_unregister_netdevice(struct net_device *dev) { unregister_netdevice(dev); lowpan_dev_debugfs_exit(dev); } EXPORT_SYMBOL(lowpan_unregister_netdevice); void lowpan_unregister_netdev(struct net_device *dev) { rtnl_lock(); lowpan_unregister_netdevice(dev); rtnl_unlock(); } EXPORT_SYMBOL(lowpan_unregister_netdev); int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev) { struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr; /* Set short_addr autoconfiguration if short_addr is present only */ if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) return -1; /* For either address format, all zero addresses MUST NOT be used */ if (wpan_dev->pan_id == cpu_to_le16(0x0000) && wpan_dev->short_addr == cpu_to_le16(0x0000)) return -1; /* Alternatively, if no PAN ID is known, 16 zero bits may be used */ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)) memset(eui, 0, 2); else ieee802154_le16_to_be16(eui, &wpan_dev->pan_id); /* The "Universal/Local" (U/L) bit shall be set to zero */ eui[0] &= ~2; eui[2] = 0; eui[3] = 0xFF; eui[4] = 0xFE; eui[5] = 0; ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr); return 0; } static int lowpan_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev; struct in6_addr addr; int i; if (dev->type != ARPHRD_6LOWPAN) return NOTIFY_DONE; idev = __in6_dev_get(dev); if (!idev) return NOTIFY_DONE; switch (event) { case NETDEV_UP: case NETDEV_CHANGE: /* (802.15.4 6LoWPAN short address slaac handling */ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) { __ipv6_addr_set_half(&addr.s6_addr32[0], htonl(0xFE800000), 0); addrconf_add_linklocal(idev, &addr, 0); } break; case NETDEV_DOWN: for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &lowpan_dev(dev)->ctx.table[i].flags); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static struct notifier_block lowpan_notifier = { .notifier_call = lowpan_event, }; static int __init lowpan_module_init(void) { int ret; lowpan_debugfs_init(); ret = register_netdevice_notifier(&lowpan_notifier); if (ret < 0) { lowpan_debugfs_exit(); return ret; } request_module_nowait("nhc_dest"); request_module_nowait("nhc_fragment"); request_module_nowait("nhc_hop"); request_module_nowait("nhc_ipv6"); request_module_nowait("nhc_mobility"); request_module_nowait("nhc_routing"); request_module_nowait("nhc_udp"); return 0; } static void __exit lowpan_module_exit(void) { lowpan_debugfs_exit(); unregister_netdevice_notifier(&lowpan_notifier); } module_init(lowpan_module_init); module_exit(lowpan_module_exit); MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module"); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 1 1 1 1 1 1 19 17 1 17 16 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 997 990 95 95 50 95 5 94 50 96 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 /* SPDX-License-Identifier: GPL-2.0-only */ /* Authors: Karl MacMillan <kmacmillan@tresys.com> * Frank Mayer <mayerf@tresys.com> * Copyright (C) 2003 - 2004 Tresys Technology, LLC */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/spinlock.h> #include <linux/slab.h> #include "security.h" #include "conditional.h" #include "services.h" /* * cond_evaluate_expr evaluates a conditional expr * in reverse polish notation. It returns true (1), false (0), * or undefined (-1). Undefined occurs when the expression * exceeds the stack depth of COND_EXPR_MAXDEPTH. */ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr) { u32 i; int s[COND_EXPR_MAXDEPTH]; int sp = -1; if (expr->len == 0) return -1; for (i = 0; i < expr->len; i++) { struct cond_expr_node *node = &expr->nodes[i]; switch (node->expr_type) { case COND_BOOL: if (sp == (COND_EXPR_MAXDEPTH - 1)) return -1; sp++; s[sp] = p->bool_val_to_struct[node->boolean - 1]->state; break; case COND_NOT: if (sp < 0) return -1; s[sp] = !s[sp]; break; case COND_OR: if (sp < 1) return -1; sp--; s[sp] |= s[sp + 1]; break; case COND_AND: if (sp < 1) return -1; sp--; s[sp] &= s[sp + 1]; break; case COND_XOR: if (sp < 1) return -1; sp--; s[sp] ^= s[sp + 1]; break; case COND_EQ: if (sp < 1) return -1; sp--; s[sp] = (s[sp] == s[sp + 1]); break; case COND_NEQ: if (sp < 1) return -1; sp--; s[sp] = (s[sp] != s[sp + 1]); break; default: return -1; } } return s[0]; } /* * evaluate_cond_node evaluates the conditional stored in * a struct cond_node and if the result is different than the * current state of the node it sets the rules in the true/false * list appropriately. If the result of the expression is undefined * all of the rules are disabled for safety. */ static void evaluate_cond_node(struct policydb *p, struct cond_node *node) { struct avtab_node *avnode; int new_state; u32 i; new_state = cond_evaluate_expr(p, &node->expr); if (new_state != node->cur_state) { node->cur_state = new_state; if (new_state == -1) pr_err("SELinux: expression result was undefined - disabling all rules.\n"); /* turn the rules on or off */ for (i = 0; i < node->true_list.len; i++) { avnode = node->true_list.nodes[i]; if (new_state <= 0) avnode->key.specified &= ~AVTAB_ENABLED; else avnode->key.specified |= AVTAB_ENABLED; } for (i = 0; i < node->false_list.len; i++) { avnode = node->false_list.nodes[i]; /* -1 or 1 */ if (new_state) avnode->key.specified &= ~AVTAB_ENABLED; else avnode->key.specified |= AVTAB_ENABLED; } } } void evaluate_cond_nodes(struct policydb *p) { u32 i; for (i = 0; i < p->cond_list_len; i++) evaluate_cond_node(p, &p->cond_list[i]); } void cond_policydb_init(struct policydb *p) { p->bool_val_to_struct = NULL; p->cond_list = NULL; p->cond_list_len = 0; avtab_init(&p->te_cond_avtab); } static void cond_node_destroy(struct cond_node *node) { kfree(node->expr.nodes); /* the avtab_ptr_t nodes are destroyed by the avtab */ kfree(node->true_list.nodes); kfree(node->false_list.nodes); } static void cond_list_destroy(struct policydb *p) { u32 i; for (i = 0; i < p->cond_list_len; i++) cond_node_destroy(&p->cond_list[i]); kfree(p->cond_list); p->cond_list = NULL; p->cond_list_len = 0; } void cond_policydb_destroy(struct policydb *p) { kfree(p->bool_val_to_struct); avtab_destroy(&p->te_cond_avtab); cond_list_destroy(p); } int cond_init_bool_indexes(struct policydb *p) { kfree(p->bool_val_to_struct); p->bool_val_to_struct = kmalloc_array( p->p_bools.nprim, sizeof(*p->bool_val_to_struct), GFP_KERNEL); if (!p->bool_val_to_struct) return -ENOMEM; avtab_hash_eval(&p->te_cond_avtab, "conditional_rules"); return 0; } int cond_destroy_bool(void *key, void *datum, void *p) { kfree(key); kfree(datum); return 0; } int cond_index_bool(void *key, void *datum, void *datap) { struct policydb *p; struct cond_bool_datum *booldatum; booldatum = datum; p = datap; if (!booldatum->value || booldatum->value > p->p_bools.nprim) return -EINVAL; p->sym_val_to_name[SYM_BOOLS][booldatum->value - 1] = key; p->bool_val_to_struct[booldatum->value - 1] = booldatum; return 0; } static int bool_isvalid(struct cond_bool_datum *b) { if (!(b->state == 0 || b->state == 1)) return 0; return 1; } int cond_read_bool(struct policydb *p, struct symtab *s, struct policy_file *fp) { char *key = NULL; struct cond_bool_datum *booldatum; __le32 buf[3]; u32 len; int rc; booldatum = kzalloc(sizeof(*booldatum), GFP_KERNEL); if (!booldatum) return -ENOMEM; rc = next_entry(buf, fp, sizeof(buf)); if (rc) goto err; booldatum->value = le32_to_cpu(buf[0]); booldatum->state = le32_to_cpu(buf[1]); rc = -EINVAL; if (!bool_isvalid(booldatum)) goto err; len = le32_to_cpu(buf[2]); rc = str_read(&key, GFP_KERNEL, fp, len); if (rc) goto err; rc = symtab_insert(s, key, booldatum); if (rc) goto err; return 0; err: cond_destroy_bool(key, booldatum, NULL); return rc; } struct cond_insertf_data { struct policydb *p; struct avtab_node **dst; struct cond_av_list *other; }; static int cond_insertf(struct avtab *a, const struct avtab_key *k, const struct avtab_datum *d, void *ptr) { struct cond_insertf_data *data = ptr; struct policydb *p = data->p; struct cond_av_list *other = data->other; struct avtab_node *node_ptr; u32 i; bool found; /* * For type rules we have to make certain there aren't any * conflicting rules by searching the te_avtab and the * cond_te_avtab. */ if (k->specified & AVTAB_TYPE) { if (avtab_search_node(&p->te_avtab, k)) { pr_err("SELinux: type rule already exists outside of a conditional.\n"); return -EINVAL; } /* * If we are reading the false list other will be a pointer to * the true list. We can have duplicate entries if there is only * 1 other entry and it is in our true list. * * If we are reading the true list (other == NULL) there shouldn't * be any other entries. */ if (other) { node_ptr = avtab_search_node(&p->te_cond_avtab, k); if (node_ptr) { if (avtab_search_node_next(node_ptr, k->specified)) { pr_err("SELinux: too many conflicting type rules.\n"); return -EINVAL; } found = false; for (i = 0; i < other->len; i++) { if (other->nodes[i] == node_ptr) { found = true; break; } } if (!found) { pr_err("SELinux: conflicting type rules.\n"); return -EINVAL; } } } else { if (avtab_search_node(&p->te_cond_avtab, k)) { pr_err("SELinux: conflicting type rules when adding type rule for true.\n"); return -EINVAL; } } } node_ptr = avtab_insert_nonunique(&p->te_cond_avtab, k, d); if (!node_ptr) { pr_err("SELinux: could not insert rule.\n"); return -ENOMEM; } *data->dst = node_ptr; return 0; } static int cond_read_av_list(struct policydb *p, struct policy_file *fp, struct cond_av_list *list, struct cond_av_list *other) { int rc; __le32 buf[1]; u32 i, len; struct cond_insertf_data data; rc = next_entry(buf, fp, sizeof(u32)); if (rc) return rc; len = le32_to_cpu(buf[0]); if (len == 0) return 0; list->nodes = kcalloc(len, sizeof(*list->nodes), GFP_KERNEL); if (!list->nodes) return -ENOMEM; data.p = p; data.other = other; for (i = 0; i < len; i++) { data.dst = &list->nodes[i]; rc = avtab_read_item(&p->te_cond_avtab, fp, p, cond_insertf, &data, true); if (rc) { kfree(list->nodes); list->nodes = NULL; return rc; } } list->len = len; return 0; } static int expr_node_isvalid(struct policydb *p, struct cond_expr_node *expr) { if (expr->expr_type <= 0 || expr->expr_type > COND_LAST) { pr_err("SELinux: conditional expressions uses unknown operator.\n"); return 0; } if (expr->boolean > p->p_bools.nprim) { pr_err("SELinux: conditional expressions uses unknown bool.\n"); return 0; } return 1; } static int cond_read_node(struct policydb *p, struct cond_node *node, struct policy_file *fp) { __le32 buf[2]; u32 i, len; int rc; rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) return rc; node->cur_state = le32_to_cpu(buf[0]); /* expr */ len = le32_to_cpu(buf[1]); node->expr.nodes = kcalloc(len, sizeof(*node->expr.nodes), GFP_KERNEL); if (!node->expr.nodes) return -ENOMEM; node->expr.len = len; for (i = 0; i < len; i++) { struct cond_expr_node *expr = &node->expr.nodes[i]; rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) return rc; expr->expr_type = le32_to_cpu(buf[0]); expr->boolean = le32_to_cpu(buf[1]); if (!expr_node_isvalid(p, expr)) return -EINVAL; } rc = cond_read_av_list(p, fp, &node->true_list, NULL); if (rc) return rc; return cond_read_av_list(p, fp, &node->false_list, &node->true_list); } int cond_read_list(struct policydb *p, struct policy_file *fp) { __le32 buf[1]; u32 i, len; int rc; rc = next_entry(buf, fp, sizeof(buf)); if (rc) return rc; len = le32_to_cpu(buf[0]); p->cond_list = kcalloc(len, sizeof(*p->cond_list), GFP_KERNEL); if (!p->cond_list) return -ENOMEM; rc = avtab_alloc(&(p->te_cond_avtab), p->te_avtab.nel); if (rc) goto err; p->cond_list_len = len; for (i = 0; i < len; i++) { rc = cond_read_node(p, &p->cond_list[i], fp); if (rc) goto err; } return 0; err: cond_list_destroy(p); return rc; } int cond_write_bool(void *vkey, void *datum, void *ptr) { char *key = vkey; struct cond_bool_datum *booldatum = datum; struct policy_data *pd = ptr; struct policy_file *fp = pd->fp; __le32 buf[3]; u32 len; int rc; len = strlen(key); buf[0] = cpu_to_le32(booldatum->value); buf[1] = cpu_to_le32(booldatum->state); buf[2] = cpu_to_le32(len); rc = put_entry(buf, sizeof(u32), 3, fp); if (rc) return rc; rc = put_entry(key, 1, len, fp); if (rc) return rc; return 0; } /* * cond_write_cond_av_list doesn't write out the av_list nodes. * Instead it writes out the key/value pairs from the avtab. This * is necessary because there is no way to uniquely identifying rules * in the avtab so it is not possible to associate individual rules * in the avtab with a conditional without saving them as part of * the conditional. This means that the avtab with the conditional * rules will not be saved but will be rebuilt on policy load. */ static int cond_write_av_list(struct policydb *p, struct cond_av_list *list, struct policy_file *fp) { __le32 buf[1]; u32 i; int rc; buf[0] = cpu_to_le32(list->len); rc = put_entry(buf, sizeof(u32), 1, fp); if (rc) return rc; for (i = 0; i < list->len; i++) { rc = avtab_write_item(p, list->nodes[i], fp); if (rc) return rc; } return 0; } static int cond_write_node(struct policydb *p, struct cond_node *node, struct policy_file *fp) { __le32 buf[2]; int rc; u32 i; buf[0] = cpu_to_le32(node->cur_state); rc = put_entry(buf, sizeof(u32), 1, fp); if (rc) return rc; buf[0] = cpu_to_le32(node->expr.len); rc = put_entry(buf, sizeof(u32), 1, fp); if (rc) return rc; for (i = 0; i < node->expr.len; i++) { buf[0] = cpu_to_le32(node->expr.nodes[i].expr_type); buf[1] = cpu_to_le32(node->expr.nodes[i].boolean); rc = put_entry(buf, sizeof(u32), 2, fp); if (rc) return rc; } rc = cond_write_av_list(p, &node->true_list, fp); if (rc) return rc; rc = cond_write_av_list(p, &node->false_list, fp); if (rc) return rc; return 0; } int cond_write_list(struct policydb *p, struct policy_file *fp) { u32 i; __le32 buf[1]; int rc; buf[0] = cpu_to_le32(p->cond_list_len); rc = put_entry(buf, sizeof(u32), 1, fp); if (rc) return rc; for (i = 0; i < p->cond_list_len; i++) { rc = cond_write_node(p, &p->cond_list[i], fp); if (rc) return rc; } return 0; } void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key, struct extended_perms_decision *xpermd) { struct avtab_node *node; if (!ctab || !key || !xpermd) return; for (node = avtab_search_node(ctab, key); node; node = avtab_search_node_next(node, key->specified)) { if (node->key.specified & AVTAB_ENABLED) services_compute_xperms_decision(xpermd, node); } } /* Determine whether additional permissions are granted by the conditional * av table, and if so, add them to the result */ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd, struct extended_perms *xperms) { struct avtab_node *node; if (!ctab || !key || !avd) return; for (node = avtab_search_node(ctab, key); node; node = avtab_search_node_next(node, key->specified)) { if ((u16)(AVTAB_ALLOWED | AVTAB_ENABLED) == (node->key.specified & (AVTAB_ALLOWED | AVTAB_ENABLED))) avd->allowed |= node->datum.u.data; if ((u16)(AVTAB_AUDITDENY | AVTAB_ENABLED) == (node->key.specified & (AVTAB_AUDITDENY | AVTAB_ENABLED))) /* Since a '0' in an auditdeny mask represents a * permission we do NOT want to audit (dontaudit), we use * the '&' operand to ensure that all '0's in the mask * are retained (much unlike the allow and auditallow cases). */ avd->auditdeny &= node->datum.u.data; if ((u16)(AVTAB_AUDITALLOW | AVTAB_ENABLED) == (node->key.specified & (AVTAB_AUDITALLOW | AVTAB_ENABLED))) avd->auditallow |= node->datum.u.data; if (xperms && (node->key.specified & AVTAB_ENABLED) && (node->key.specified & AVTAB_XPERMS)) services_compute_xperms_drivers(xperms, node); } } static int cond_dup_av_list(struct cond_av_list *new, const struct cond_av_list *orig, struct avtab *avtab) { u32 i; memset(new, 0, sizeof(*new)); new->nodes = kcalloc(orig->len, sizeof(*new->nodes), GFP_KERNEL); if (!new->nodes) return -ENOMEM; for (i = 0; i < orig->len; i++) { new->nodes[i] = avtab_insert_nonunique( avtab, &orig->nodes[i]->key, &orig->nodes[i]->datum); if (!new->nodes[i]) return -ENOMEM; new->len++; } return 0; } static int duplicate_policydb_cond_list(struct policydb *newp, const struct policydb *origp) { int rc; u32 i; rc = avtab_alloc_dup(&newp->te_cond_avtab, &origp->te_cond_avtab); if (rc) return rc; newp->cond_list_len = 0; newp->cond_list = kcalloc(origp->cond_list_len, sizeof(*newp->cond_list), GFP_KERNEL); if (!newp->cond_list) goto error; for (i = 0; i < origp->cond_list_len; i++) { struct cond_node *newn = &newp->cond_list[i]; const struct cond_node *orign = &origp->cond_list[i]; newp->cond_list_len++; newn->cur_state = orign->cur_state; newn->expr.nodes = kmemdup(orign->expr.nodes, orign->expr.len * sizeof(*orign->expr.nodes), GFP_KERNEL); if (!newn->expr.nodes) goto error; newn->expr.len = orign->expr.len; rc = cond_dup_av_list(&newn->true_list, &orign->true_list, &newp->te_cond_avtab); if (rc) goto error; rc = cond_dup_av_list(&newn->false_list, &orign->false_list, &newp->te_cond_avtab); if (rc) goto error; } return 0; error: avtab_destroy(&newp->te_cond_avtab); cond_list_destroy(newp); return -ENOMEM; } static int cond_bools_destroy(void *key, void *datum, void *args) { /* key was not copied so no need to free here */ kfree(datum); return 0; } static int cond_bools_copy(struct hashtab_node *new, const struct hashtab_node *orig, void *args) { struct cond_bool_datum *datum; datum = kmemdup(orig->datum, sizeof(struct cond_bool_datum), GFP_KERNEL); if (!datum) return -ENOMEM; new->key = orig->key; /* No need to copy, never modified */ new->datum = datum; return 0; } static int cond_bools_index(void *key, void *datum, void *args) { struct cond_bool_datum *booldatum, **cond_bool_array; booldatum = datum; cond_bool_array = args; cond_bool_array[booldatum->value - 1] = booldatum; return 0; } static int duplicate_policydb_bools(struct policydb *newdb, const struct policydb *orig) { struct cond_bool_datum **cond_bool_array; int rc; cond_bool_array = kmalloc_array(orig->p_bools.nprim, sizeof(*orig->bool_val_to_struct), GFP_KERNEL); if (!cond_bool_array) return -ENOMEM; rc = hashtab_duplicate(&newdb->p_bools.table, &orig->p_bools.table, cond_bools_copy, cond_bools_destroy, NULL); if (rc) { kfree(cond_bool_array); return -ENOMEM; } hashtab_map(&newdb->p_bools.table, cond_bools_index, cond_bool_array); newdb->bool_val_to_struct = cond_bool_array; newdb->p_bools.nprim = orig->p_bools.nprim; return 0; } void cond_policydb_destroy_dup(struct policydb *p) { hashtab_map(&p->p_bools.table, cond_bools_destroy, NULL); hashtab_destroy(&p->p_bools.table); cond_policydb_destroy(p); } int cond_policydb_dup(struct policydb *new, const struct policydb *orig) { cond_policydb_init(new); if (duplicate_policydb_bools(new, orig)) return -ENOMEM; if (duplicate_policydb_cond_list(new, orig)) { cond_policydb_destroy_dup(new); return -ENOMEM; } return 0; }
3 1 2 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0-only */ /* * sm3_base.h - core logic for SM3 implementations * * Copyright (C) 2017 ARM Limited or its affiliates. * Written by Gilad Ben-Yossef <gilad@benyossef.com> */ #ifndef _CRYPTO_SM3_BASE_H #define _CRYPTO_SM3_BASE_H #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <linux/math.h> #include <linux/module.h> #include <linux/string.h> #include <linux/types.h> #include <linux/unaligned.h> typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks); static inline int sm3_base_init(struct shash_desc *desc) { sm3_init(shash_desc_ctx(desc)); return 0; } static inline int sm3_base_do_update_blocks(struct shash_desc *desc, const u8 *data, unsigned int len, sm3_block_fn *block_fn) { unsigned int remain = len - round_down(len, SM3_BLOCK_SIZE); struct sm3_state *sctx = shash_desc_ctx(desc); sctx->count += len - remain; block_fn(sctx, data, len / SM3_BLOCK_SIZE); return remain; } static inline int sm3_base_do_finup(struct shash_desc *desc, const u8 *src, unsigned int len, sm3_block_fn *block_fn) { unsigned int bit_offset = SM3_BLOCK_SIZE / 8 - 1; struct sm3_state *sctx = shash_desc_ctx(desc); union { __be64 b64[SM3_BLOCK_SIZE / 4]; u8 u8[SM3_BLOCK_SIZE * 2]; } block = {}; if (len >= SM3_BLOCK_SIZE) { int remain; remain = sm3_base_do_update_blocks(desc, src, len, block_fn); src += len - remain; len = remain; } if (len >= bit_offset * 8) bit_offset += SM3_BLOCK_SIZE / 8; memcpy(&block, src, len); block.u8[len] = 0x80; sctx->count += len; block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SM3_BLOCK_SIZE); memzero_explicit(&block, sizeof(block)); return 0; } static inline int sm3_base_finish(struct shash_desc *desc, u8 *out) { struct sm3_state *sctx = shash_desc_ctx(desc); __be32 *digest = (__be32 *)out; int i; for (i = 0; i < SM3_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); return 0; } #endif /* _CRYPTO_SM3_BASE_H */
3 3 3 3 3 3 3 3 14 15 15 15 15 15 15 15 15 15 15 7 6 16 16 16 1 1 21 21 19 20 1 21 20 20 20 20 20 21 6 21 22 21 21 15 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 4 4 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 21 21 22 1 1 1 1 1 1 3 3 3 3 3 3 16 16 16 16 15 15 15 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2017 Intel Deutschland GmbH * Copyright (C) 2019, 2022-2025 Intel Corporation */ #include <linux/kernel.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/slab.h> #include "rate.h" #include "ieee80211_i.h" #include "debugfs.h" struct rate_control_alg { struct list_head list; const struct rate_control_ops *ops; }; static LIST_HEAD(rate_ctrl_algs); static DEFINE_MUTEX(rate_ctrl_mutex); static char *ieee80211_default_rc_algo = CONFIG_MAC80211_RC_DEFAULT; module_param(ieee80211_default_rc_algo, charp, 0644); MODULE_PARM_DESC(ieee80211_default_rc_algo, "Default rate control algorithm for mac80211 to use"); void rate_control_rate_init(struct link_sta_info *link_sta) { struct sta_info *sta = link_sta->sta; struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; ieee80211_sta_init_nss(link_sta); if (!ref) return; /* SW rate control isn't supported with MLO right now */ if (WARN_ON(ieee80211_vif_is_mld(&sta->sdata->vif))) return; rcu_read_lock(); chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; } sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; /* TODO: check for minstrel_s1g ? */ if (sband->band == NL80211_BAND_S1GHZ) { ieee80211_s1g_sta_rate_init(sta); rcu_read_unlock(); return; } spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } void rate_control_rate_init_all_links(struct sta_info *sta) { int link_id; for (link_id = 0; link_id < ARRAY_SIZE(sta->link); link_id++) { struct link_sta_info *link_sta; link_sta = sdata_dereference(sta->link[link_id], sta->sdata); if (!link_sta) continue; rate_control_rate_init(link_sta); } } void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_tx_status *st) { struct rate_control_ref *ref = local->rate_ctrl; struct sta_info *sta = container_of(st->sta, struct sta_info, sta); void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) return; if (st->info->band >= NUM_NL80211_BANDS) return; sband = local->hw.wiphy->bands[st->info->band]; spin_lock_bh(&sta->rate_ctrl_lock); if (ref->ops->tx_status_ext) ref->ops->tx_status_ext(ref->priv, sband, priv_sta, st); else if (st->skb) ref->ops->tx_status(ref->priv, sband, st->sta, priv_sta, st->skb); else WARN_ON_ONCE(1); spin_unlock_bh(&sta->rate_ctrl_lock); } void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct link_sta_info *link_sta, u32 changed) { struct rate_control_ref *ref = local->rate_ctrl; struct sta_info *sta = link_sta->sta; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_chanctx_conf *chanctx_conf; if (ref && ref->ops->rate_update) { rcu_read_lock(); chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; } spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def, ista, priv_sta, changed); spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); } if (sta->uploaded) drv_link_sta_rc_update(local, sta->sdata, link_sta->pub, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) { struct rate_control_alg *alg; if (!ops->name) return -EINVAL; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (!strcmp(alg->ops->name, ops->name)) { /* don't register an algorithm twice */ WARN_ON(1); mutex_unlock(&rate_ctrl_mutex); return -EALREADY; } } alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { mutex_unlock(&rate_ctrl_mutex); return -ENOMEM; } alg->ops = ops; list_add_tail(&alg->list, &rate_ctrl_algs); mutex_unlock(&rate_ctrl_mutex); return 0; } EXPORT_SYMBOL(ieee80211_rate_control_register); void ieee80211_rate_control_unregister(const struct rate_control_ops *ops) { struct rate_control_alg *alg; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (alg->ops == ops) { list_del(&alg->list); kfree(alg); break; } } mutex_unlock(&rate_ctrl_mutex); } EXPORT_SYMBOL(ieee80211_rate_control_unregister); static const struct rate_control_ops * ieee80211_try_rate_control_ops_get(const char *name) { struct rate_control_alg *alg; const struct rate_control_ops *ops = NULL; if (!name) return NULL; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (!strcmp(alg->ops->name, name)) { ops = alg->ops; break; } } mutex_unlock(&rate_ctrl_mutex); return ops; } /* Get the rate control algorithm. */ static const struct rate_control_ops * ieee80211_rate_control_ops_get(const char *name) { const struct rate_control_ops *ops; const char *alg_name; kernel_param_lock(THIS_MODULE); if (!name) alg_name = ieee80211_default_rc_algo; else alg_name = name; ops = ieee80211_try_rate_control_ops_get(alg_name); if (!ops && name) /* try default if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo); /* Note: check for > 0 is intentional to avoid clang warning */ if (!ops && (strlen(CONFIG_MAC80211_RC_DEFAULT) > 0)) /* try built-in one if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); kernel_param_unlock(THIS_MODULE); return ops; } #ifdef CONFIG_MAC80211_DEBUGFS static ssize_t rcname_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct rate_control_ref *ref = file->private_data; int len = strlen(ref->ops->name); return simple_read_from_buffer(userbuf, count, ppos, ref->ops->name, len); } const struct debugfs_short_fops rcname_ops = { .read = rcname_read, .llseek = default_llseek, }; #endif static struct rate_control_ref * rate_control_alloc(const char *name, struct ieee80211_local *local) { struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) return NULL; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto free; ref->priv = ref->ops->alloc(&local->hw); if (!ref->priv) goto free; return ref; free: kfree(ref); return NULL; } static void rate_control_free(struct ieee80211_local *local, struct rate_control_ref *ctrl_ref) { ctrl_ref->ops->free(ctrl_ref->priv); #ifdef CONFIG_MAC80211_DEBUGFS debugfs_remove_recursive(local->debugfs.rcdir); local->debugfs.rcdir = NULL; #endif kfree(ctrl_ref); } void ieee80211_check_rate_mask(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u32 user_mask, basic_rates = link->conf->basic_rates; enum nl80211_band band; if (WARN_ON(!link->conf->chanreq.oper.chan)) return; band = link->conf->chanreq.oper.chan->band; if (band == NL80211_BAND_S1GHZ) { /* TODO */ return; } if (WARN_ON_ONCE(!basic_rates)) return; user_mask = sdata->rc_rateidx_mask[band]; sband = local->hw.wiphy->bands[band]; if (user_mask & basic_rates) return; sdata_dbg(sdata, "no overlap between basic rates (0x%x) and user mask (0x%x on band %d) - clearing the latter", basic_rates, user_mask, band); sdata->rc_rateidx_mask[band] = (1 << sband->n_bitrates) - 1; } static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); return (info->flags & (IEEE80211_TX_CTL_NO_ACK | IEEE80211_TX_CTL_USE_MINRATE)) || !ieee80211_is_tx_data(skb); } static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate, u32 basic_rates, struct ieee80211_supported_band *sband) { u8 i; if (sband->band == NL80211_BAND_S1GHZ) { /* TODO */ rate->flags |= IEEE80211_TX_RC_S1G_MCS; rate->idx = 0; return; } if (basic_rates == 0) return; /* assume basic rates unknown and accept rate */ if (rate->idx < 0) return; if (basic_rates & (1 << rate->idx)) return; /* selected rate is a basic rate */ for (i = rate->idx + 1; i <= sband->n_bitrates; i++) { if (basic_rates & (1 << i)) { rate->idx = i; return; } } /* could not find a basic rate; use original selection */ } static void __rate_control_send_low(struct ieee80211_hw *hw, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, struct ieee80211_tx_info *info, u32 rate_mask) { u32 rate_flags = 0; int i; if (sband->band == NL80211_BAND_S1GHZ) { info->control.rates[0].flags |= IEEE80211_TX_RC_S1G_MCS; info->control.rates[0].idx = 0; return; } if ((sband->band == NL80211_BAND_2GHZ) && (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) rate_flags |= IEEE80211_RATE_ERP_G; info->control.rates[0].idx = 0; for (i = 0; i < sband->n_bitrates; i++) { if (!(rate_mask & BIT(i))) continue; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; if (!rate_supported(sta, sband->band, i)) continue; info->control.rates[0].idx = i; break; } WARN_ONCE(i == sband->n_bitrates, "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n", sta ? sta->addr : NULL, sta ? sta->deflink.supp_rates[sband->band] : -1, sband->band, rate_mask, rate_flags); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : hw->max_rate_tries; info->control.skip_table = 1; } static bool rate_control_send_low(struct ieee80211_sta *pubsta, struct ieee80211_tx_rate_control *txrc) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_supported_band *sband = txrc->sband; struct sta_info *sta; int mcast_rate; bool use_basicrate = false; if (!sband) return false; if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) { __rate_control_send_low(txrc->hw, sband, pubsta, info, txrc->rate_idx_mask); if (!pubsta && txrc->bss) { mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; if (mcast_rate > 0) { info->control.rates[0].idx = mcast_rate - 1; return true; } use_basicrate = true; } else if (pubsta) { sta = container_of(pubsta, struct sta_info, sta); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) use_basicrate = true; } if (use_basicrate) rc_send_low_basicrate(&info->control.rates[0], txrc->bss_conf->basic_rates, sband); return true; } return false; } static bool rate_idx_match_legacy_mask(s8 *rate_idx, int n_bitrates, u32 mask) { int j; /* See whether the selected rate or anything below it is allowed. */ for (j = *rate_idx; j >= 0; j--) { if (mask & (1 << j)) { /* Okay, found a suitable rate. Use it. */ *rate_idx = j; return true; } } /* Try to find a higher rate that would be allowed */ for (j = *rate_idx + 1; j < n_bitrates; j++) { if (mask & (1 << j)) { /* Okay, found a suitable rate. Use it. */ *rate_idx = j; return true; } } return false; } static bool rate_idx_match_mcs_mask(s8 *rate_idx, u8 *mcs_mask) { int i, j; int ridx, rbit; ridx = *rate_idx / 8; rbit = *rate_idx % 8; /* sanity check */ if (ridx < 0 || ridx >= IEEE80211_HT_MCS_MASK_LEN) return false; /* See whether the selected rate or anything below it is allowed. */ for (i = ridx; i >= 0; i--) { for (j = rbit; j >= 0; j--) if (mcs_mask[i] & BIT(j)) { *rate_idx = i * 8 + j; return true; } rbit = 7; } /* Try to find a higher rate that would be allowed */ ridx = (*rate_idx + 1) / 8; rbit = (*rate_idx + 1) % 8; for (i = ridx; i < IEEE80211_HT_MCS_MASK_LEN; i++) { for (j = rbit; j < 8; j++) if (mcs_mask[i] & BIT(j)) { *rate_idx = i * 8 + j; return true; } rbit = 0; } return false; } static bool rate_idx_match_vht_mcs_mask(s8 *rate_idx, u16 *vht_mask) { int i, j; int ridx, rbit; ridx = *rate_idx >> 4; rbit = *rate_idx & 0xf; if (ridx < 0 || ridx >= NL80211_VHT_NSS_MAX) return false; /* See whether the selected rate or anything below it is allowed. */ for (i = ridx; i >= 0; i--) { for (j = rbit; j >= 0; j--) { if (vht_mask[i] & BIT(j)) { *rate_idx = (i << 4) | j; return true; } } rbit = 15; } /* Try to find a higher rate that would be allowed */ ridx = (*rate_idx + 1) >> 4; rbit = (*rate_idx + 1) & 0xf; for (i = ridx; i < NL80211_VHT_NSS_MAX; i++) { for (j = rbit; j < 16; j++) { if (vht_mask[i] & BIT(j)) { *rate_idx = (i << 4) | j; return true; } } rbit = 0; } return false; } static void rate_idx_match_mask(s8 *rate_idx, u16 *rate_flags, struct ieee80211_supported_band *sband, enum nl80211_chan_width chan_width, u32 mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], u16 vht_mask[NL80211_VHT_NSS_MAX]) { if (*rate_flags & IEEE80211_TX_RC_VHT_MCS) { /* handle VHT rates */ if (rate_idx_match_vht_mcs_mask(rate_idx, vht_mask)) return; *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); *rate_flags |= IEEE80211_TX_RC_MCS; if (chan_width == NL80211_CHAN_WIDTH_40) *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; /* also try the legacy rates. */ *rate_flags &= ~(IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_40_MHZ_WIDTH); if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; } else if (*rate_flags & IEEE80211_TX_RC_MCS) { /* handle HT rates */ if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; /* also try the legacy rates. */ *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; } else { /* handle legacy rates */ if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; /* if HT BSS, and we handle a data frame, also try HT rates */ switch (chan_width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: return; default: break; } *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); *rate_flags |= IEEE80211_TX_RC_MCS; if (chan_width == NL80211_CHAN_WIDTH_40) *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; } /* * Uh.. No suitable rate exists. This should not really happen with * sane TX rate mask configurations. However, should someone manage to * configure supported rates and TX rate mask in incompatible way, * allow the frame to be transmitted with whatever the rate control * selected. */ } static void rate_fixup_ratelist(struct ieee80211_vif *vif, struct ieee80211_supported_band *sband, struct ieee80211_tx_info *info, struct ieee80211_tx_rate *rates, int max_rates) { struct ieee80211_rate *rate; bool inval = false; int i; /* * Set up the RTS/CTS rate as the fastest basic rate * that is not faster than the data rate unless there * is no basic rate slower than the data rate, in which * case we pick the slowest basic rate * * XXX: Should this check all retry rates? */ if (!(rates[0].flags & (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS))) { u32 basic_rates = vif->bss_conf.basic_rates; s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; for (i = 0; i < sband->n_bitrates; i++) { /* must be a basic rate */ if (!(basic_rates & BIT(i))) continue; /* must not be faster than the data rate */ if (sband->bitrates[i].bitrate > rate->bitrate) continue; /* maximum */ if (sband->bitrates[baserate].bitrate < sband->bitrates[i].bitrate) baserate = i; } info->control.rts_cts_rate_idx = baserate; } for (i = 0; i < max_rates; i++) { /* * make sure there's no valid rate following * an invalid one, just in case drivers don't * take the API seriously to stop at -1. */ if (inval) { rates[i].idx = -1; continue; } if (rates[i].idx < 0) { inval = true; continue; } /* * For now assume MCS is already set up correctly, this * needs to be fixed. */ if (rates[i].flags & IEEE80211_TX_RC_MCS) { WARN_ON(rates[i].idx > 76); if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot) rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; continue; } if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); continue; } /* set up RTS protection if desired */ if (info->control.use_rts) { rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS; info->control.use_cts_prot = false; } /* RC is busted */ if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { rates[i].idx = -1; continue; } rate = &sband->bitrates[rates[i].idx]; /* set up short preamble */ if (info->control.short_preamble && rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; /* set up G protection */ if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot && rate->flags & IEEE80211_RATE_ERP_G) rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; } } static void rate_control_fill_sta_table(struct ieee80211_sta *sta, struct ieee80211_tx_info *info, struct ieee80211_tx_rate *rates, int max_rates) { struct ieee80211_sta_rates *ratetbl = NULL; int i; if (sta && !info->control.skip_table) ratetbl = rcu_dereference(sta->rates); /* Fill remaining rate slots with data from the sta rate table. */ max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE); for (i = 0; i < max_rates; i++) { if (i < ARRAY_SIZE(info->control.rates) && info->control.rates[i].idx >= 0 && info->control.rates[i].count) { if (rates != info->control.rates) rates[i] = info->control.rates[i]; } else if (ratetbl) { rates[i].idx = ratetbl->rate[i].idx; rates[i].flags = ratetbl->rate[i].flags; if (info->control.use_rts) rates[i].count = ratetbl->rate[i].count_rts; else if (info->control.use_cts_prot) rates[i].count = ratetbl->rate[i].count_cts; else rates[i].count = ratetbl->rate[i].count; } else { rates[i].idx = -1; rates[i].count = 0; } if (rates[i].idx < 0 || !rates[i].count) break; } } static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, u32 *mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], u16 vht_mask[NL80211_VHT_NSS_MAX]) { u32 i; *mask = sdata->rc_rateidx_mask[sband->band]; if (*mask == (1 << sband->n_bitrates) - 1 && !sdata->rc_has_mcs_mask[sband->band] && !sdata->rc_has_vht_mcs_mask[sband->band]) return false; if (sdata->rc_has_mcs_mask[sband->band]) memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[sband->band], IEEE80211_HT_MCS_MASK_LEN); else memset(mcs_mask, 0xff, IEEE80211_HT_MCS_MASK_LEN); if (sdata->rc_has_vht_mcs_mask[sband->band]) memcpy(vht_mask, sdata->rc_rateidx_vht_mcs_mask[sband->band], sizeof(u16) * NL80211_VHT_NSS_MAX); else memset(vht_mask, 0xff, sizeof(u16) * NL80211_VHT_NSS_MAX); if (sta) { __le16 sta_vht_cap; u16 sta_vht_mask[NL80211_VHT_NSS_MAX]; /* Filter out rates that the STA does not support */ *mask &= sta->deflink.supp_rates[sband->band]; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i]; sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map; ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) vht_mask[i] &= sta_vht_mask[i]; } return true; } static void rate_control_apply_mask_ratetbl(struct sta_info *sta, struct ieee80211_supported_band *sband, struct ieee80211_sta_rates *rates) { int i; u32 mask; u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mask[NL80211_VHT_NSS_MAX]; enum nl80211_chan_width chan_width; if (!rate_control_cap_mask(sta->sdata, sband, &sta->sta, &mask, mcs_mask, vht_mask)) return; chan_width = sta->sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) { if (rates->rate[i].idx < 0) break; rate_idx_match_mask(&rates->rate[i].idx, &rates->rate[i].flags, sband, chan_width, mask, mcs_mask, vht_mask); } } static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, struct ieee80211_supported_band *sband, struct ieee80211_tx_rate *rates, int max_rates) { enum nl80211_chan_width chan_width; u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; u32 mask; u16 rate_flags, vht_mask[NL80211_VHT_NSS_MAX]; int i; /* * Try to enforce the rateidx mask the user wanted. skip this if the * default mask (allow all rates) is used to save some processing for * the common case. */ if (!rate_control_cap_mask(sdata, sband, sta, &mask, mcs_mask, vht_mask)) return; /* * Make sure the rate index selected for each TX rate is * included in the configured mask and change the rate indexes * if needed. */ chan_width = sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < max_rates; i++) { /* Skip invalid rates */ if (rates[i].idx < 0) break; rate_flags = rates[i].flags; rate_idx_match_mask(&rates[i].idx, &rate_flags, sband, chan_width, mask, mcs_mask, vht_mask); rates[i].flags = rate_flags; } } void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct sk_buff *skb, struct ieee80211_tx_rate *dest, int max_rates) { struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); if (!vif) return; sdata = vif_to_sdata(vif); if (info->band >= NUM_NL80211_BANDS) return; sband = sdata->local->hw.wiphy->bands[info->band]; if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) mask = sdata->rc_rateidx_mask[info->band]; if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); } EXPORT_SYMBOL(ieee80211_get_tx_rates); void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc) { struct rate_control_ref *ref = sdata->local->rate_ctrl; void *priv_sta = NULL; struct ieee80211_sta *ista = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { info->control.rates[i].idx = -1; info->control.rates[i].flags = 0; info->control.rates[i].count = 0; } if (rate_control_send_low(sta ? &sta->sta : NULL, txrc)) return; if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL)) return; if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { ista = &sta->sta; priv_sta = sta->rate_ctrl_priv; } if (ista) { spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); spin_unlock_bh(&sta->rate_ctrl_lock); } else { rate_control_send_low(NULL, txrc); } if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE)) return; ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, info->control.rates, ARRAY_SIZE(info->control.rates)); } int rate_control_set_rates(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, struct ieee80211_sta_rates *rates) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sta_rates *old; struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); if (!sband) return -EINVAL; rate_control_apply_mask_ratetbl(sta, sband, rates); /* * mac80211 guarantees that this function will not be called * concurrently, so the following RCU access is safe, even without * extra locking. This can not be checked easily, so we just set * the condition to true. */ old = rcu_dereference_protected(pubsta->rates, true); rcu_assign_pointer(pubsta->rates, rates); if (old) kfree_rcu(old, rcu_head); if (sta->uploaded) drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); return 0; } EXPORT_SYMBOL(rate_control_set_rates); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name) { struct rate_control_ref *ref; ASSERT_RTNL(); if (local->open_count) return -EBUSY; if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { if (WARN_ON(!local->ops->set_rts_threshold)) return -EINVAL; return 0; } ref = rate_control_alloc(name, local); if (!ref) { wiphy_warn(local->hw.wiphy, "Failed to select rate control algorithm\n"); return -ENOENT; } WARN_ON(local->rate_ctrl); local->rate_ctrl = ref; wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n", ref->ops->name); return 0; } void rate_control_deinitialize(struct ieee80211_local *local) { struct rate_control_ref *ref; ref = local->rate_ctrl; if (!ref) return; local->rate_ctrl = NULL; rate_control_free(local, ref); }
29 29 31 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 // SPDX-License-Identifier: GPL-2.0-only /* * Linux VM pressure * * Copyright 2012 Linaro Ltd. * Anton Vorontsov <anton.vorontsov@linaro.org> * * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro, * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg. */ #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/log2.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/vmstat.h> #include <linux/eventfd.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/printk.h> #include <linux/vmpressure.h> /* * The window size (vmpressure_win) is the number of scanned pages before * we try to analyze scanned/reclaimed ratio. So the window is used as a * rate-limit tunable for the "low" level notification, and also for * averaging the ratio for medium/critical levels. Using small window * sizes can cause lot of false positives, but too big window size will * delay the notifications. * * As the vmscan reclaimer logic works with chunks which are multiple of * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well. * * TODO: Make the window size depend on machine size, as we do for vmstat * thresholds. Currently we set it to 512 pages (2MB for 4KB pages). */ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; /* * These thresholds are used when we account memory pressure through * scanned/reclaimed ratio. The current values were chosen empirically. In * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were. */ static const unsigned int vmpressure_level_med = 60; static const unsigned int vmpressure_level_critical = 95; /* * When there are too little pages left to scan, vmpressure() may miss the * critical pressure as number of pages will be less than "window size". * However, in that case the vmscan priority will raise fast as the * reclaimer will try to scan LRUs more deeply. * * The vmscan logic considers these special priorities: * * prio == DEF_PRIORITY (12): reclaimer starts with that value * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed * prio == 0 : close to OOM, kernel scans every page in an lru * * Any value in this range is acceptable for this tunable (i.e. from 12 to * 0). Current value for the vmpressure_level_critical_prio is chosen * empirically, but the number, in essence, means that we consider * critical level when scanning depth is ~10% of the lru size (vmscan * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one * eights). */ static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10); static struct vmpressure *work_to_vmpressure(struct work_struct *work) { return container_of(work, struct vmpressure, work); } static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) { struct mem_cgroup *memcg = vmpressure_to_memcg(vmpr); memcg = parent_mem_cgroup(memcg); if (!memcg) return NULL; return memcg_to_vmpressure(memcg); } enum vmpressure_levels { VMPRESSURE_LOW = 0, VMPRESSURE_MEDIUM, VMPRESSURE_CRITICAL, VMPRESSURE_NUM_LEVELS, }; enum vmpressure_modes { VMPRESSURE_NO_PASSTHROUGH = 0, VMPRESSURE_HIERARCHY, VMPRESSURE_LOCAL, VMPRESSURE_NUM_MODES, }; static const char * const vmpressure_str_levels[] = { [VMPRESSURE_LOW] = "low", [VMPRESSURE_MEDIUM] = "medium", [VMPRESSURE_CRITICAL] = "critical", }; static const char * const vmpressure_str_modes[] = { [VMPRESSURE_NO_PASSTHROUGH] = "default", [VMPRESSURE_HIERARCHY] = "hierarchy", [VMPRESSURE_LOCAL] = "local", }; static enum vmpressure_levels vmpressure_level(unsigned long pressure) { if (pressure >= vmpressure_level_critical) return VMPRESSURE_CRITICAL; else if (pressure >= vmpressure_level_med) return VMPRESSURE_MEDIUM; return VMPRESSURE_LOW; } static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, unsigned long reclaimed) { unsigned long scale = scanned + reclaimed; unsigned long pressure = 0; /* * reclaimed can be greater than scanned for things such as reclaimed * slab pages. shrink_node() just adds reclaimed pages without a * related increment to scanned pages. */ if (reclaimed >= scanned) goto out; /* * We calculate the ratio (in percents) of how many pages were * scanned vs. reclaimed in a given time frame (window). Note that * time is in VM reclaimer's "ticks", i.e. number of pages * scanned. This makes it possible to set desired reaction time * and serves as a ratelimit. */ pressure = scale - (reclaimed * scale / scanned); pressure = pressure * 100 / scale; out: pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, scanned, reclaimed); return vmpressure_level(pressure); } struct vmpressure_event { struct eventfd_ctx *efd; enum vmpressure_levels level; enum vmpressure_modes mode; struct list_head node; }; static bool vmpressure_event(struct vmpressure *vmpr, const enum vmpressure_levels level, bool ancestor, bool signalled) { struct vmpressure_event *ev; bool ret = false; mutex_lock(&vmpr->events_lock); list_for_each_entry(ev, &vmpr->events, node) { if (ancestor && ev->mode == VMPRESSURE_LOCAL) continue; if (signalled && ev->mode == VMPRESSURE_NO_PASSTHROUGH) continue; if (level < ev->level) continue; eventfd_signal(ev->efd); ret = true; } mutex_unlock(&vmpr->events_lock); return ret; } static void vmpressure_work_fn(struct work_struct *work) { struct vmpressure *vmpr = work_to_vmpressure(work); unsigned long scanned; unsigned long reclaimed; enum vmpressure_levels level; bool ancestor = false; bool signalled = false; spin_lock(&vmpr->sr_lock); /* * Several contexts might be calling vmpressure(), so it is * possible that the work was rescheduled again before the old * work context cleared the counters. In that case we will run * just after the old work returns, but then scanned might be zero * here. No need for any locks here since we don't care if * vmpr->reclaimed is in sync. */ scanned = vmpr->tree_scanned; if (!scanned) { spin_unlock(&vmpr->sr_lock); return; } reclaimed = vmpr->tree_reclaimed; vmpr->tree_scanned = 0; vmpr->tree_reclaimed = 0; spin_unlock(&vmpr->sr_lock); level = vmpressure_calc_level(scanned, reclaimed); do { if (vmpressure_event(vmpr, level, ancestor, signalled)) signalled = true; ancestor = true; } while ((vmpr = vmpressure_parent(vmpr))); } /** * vmpressure() - Account memory pressure through scanned/reclaimed ratio * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @tree: legacy subtree mode * @scanned: number of pages scanned * @reclaimed: number of pages reclaimed * * This function should be called from the vmscan reclaim path to account * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw * pressure index is then further refined and averaged over time. * * If @tree is set, vmpressure is in traditional userspace reporting * mode: @memcg is considered the pressure root and userspace is * notified of the entire subtree's reclaim efficiency. * * If @tree is not set, reclaim efficiency is recorded for @memcg, and * only in-kernel users are notified. * * This function does not return any value. */ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed) { struct vmpressure *vmpr; if (mem_cgroup_disabled()) return; /* * The in-kernel users only care about the reclaim efficiency * for this @memcg rather than the whole subtree, and there * isn't and won't be any in-kernel user in a legacy cgroup. */ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !tree) return; vmpr = memcg_to_vmpressure(memcg); /* * Here we only want to account pressure that userland is able to * help us with. For example, suppose that DMA zone is under * pressure; if we notify userland about that kind of pressure, * then it will be mostly a waste as it will trigger unnecessary * freeing of memory by userland (since userland is more likely to * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That * is why we include only movable, highmem and FS/IO pages. * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so * we account it too. */ if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) return; /* * If we got here with no pages scanned, then that is an indicator * that reclaimer was unable to find any shrinkable LRUs at the * current scanning depth. But it does not mean that we should * report the critical pressure, yet. If the scanning priority * (scanning depth) goes too high (deep), we will be notified * through vmpressure_prio(). But so far, keep calm. */ if (!scanned) return; if (tree) { spin_lock(&vmpr->sr_lock); scanned = vmpr->tree_scanned += scanned; vmpr->tree_reclaimed += reclaimed; spin_unlock(&vmpr->sr_lock); if (scanned < vmpressure_win) return; schedule_work(&vmpr->work); } else { enum vmpressure_levels level; /* For now, no users for root-level efficiency */ if (!memcg || mem_cgroup_is_root(memcg)) return; spin_lock(&vmpr->sr_lock); scanned = vmpr->scanned += scanned; reclaimed = vmpr->reclaimed += reclaimed; if (scanned < vmpressure_win) { spin_unlock(&vmpr->sr_lock); return; } vmpr->scanned = vmpr->reclaimed = 0; spin_unlock(&vmpr->sr_lock); level = vmpressure_calc_level(scanned, reclaimed); if (level > VMPRESSURE_LOW) { /* * Let the socket buffer allocator know that * we are having trouble reclaiming LRU pages. * * For hysteresis keep the pressure state * asserted for a second in which subsequent * pressure events can occur. */ mem_cgroup_set_socket_pressure(memcg); } } } /** * vmpressure_prio() - Account memory pressure through reclaimer priority level * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @prio: reclaimer's priority * * This function should be called from the reclaim path every time when * the vmscan's reclaiming priority (scanning depth) changes. * * This function does not return any value. */ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) { /* * We only use prio for accounting critical level. For more info * see comment for vmpressure_level_critical_prio variable above. */ if (prio > vmpressure_level_critical_prio) return; /* * OK, the prio is below the threshold, updating vmpressure * information before shrinker dives into long shrinking of long * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0 * to the vmpressure() basically means that we signal 'critical' * level. */ vmpressure(gfp, memcg, true, vmpressure_win, 0); } #define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2) /** * vmpressure_register_event() - Bind vmpressure notifications to an eventfd * @memcg: memcg that is interested in vmpressure notifications * @eventfd: eventfd context to link notifications with * @args: event arguments (pressure level threshold, optional mode) * * This function associates eventfd context with the vmpressure * infrastructure, so that the notifications will be delivered to the * @eventfd. The @args parameter is a comma-delimited string that denotes a * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium", * or "critical") and an optional mode (one of vmpressure_str_modes, i.e. * "hierarchy" or "local"). * * To be used as memcg event method. * * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could * not be parsed. */ int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH; enum vmpressure_levels level; char *spec, *spec_orig; char *token; int ret = 0; spec_orig = spec = kstrndup(args, MAX_VMPRESSURE_ARGS_LEN, GFP_KERNEL); if (!spec) return -ENOMEM; /* Find required level */ token = strsep(&spec, ","); ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token); if (ret < 0) goto out; level = ret; /* Find optional mode */ token = strsep(&spec, ","); if (token) { ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token); if (ret < 0) goto out; mode = ret; } ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) { ret = -ENOMEM; goto out; } ev->efd = eventfd; ev->level = level; ev->mode = mode; mutex_lock(&vmpr->events_lock); list_add(&ev->node, &vmpr->events); mutex_unlock(&vmpr->events_lock); ret = 0; out: kfree(spec_orig); return ret; } /** * vmpressure_unregister_event() - Unbind eventfd from vmpressure * @memcg: memcg handle * @eventfd: eventfd context that was used to link vmpressure with the @cg * * This function does internal manipulations to detach the @eventfd from * the vmpressure notifications, and then frees internal resources * associated with the @eventfd (but the @eventfd itself is not freed). * * To be used as memcg event method. */ void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; mutex_lock(&vmpr->events_lock); list_for_each_entry(ev, &vmpr->events, node) { if (ev->efd != eventfd) continue; list_del(&ev->node); kfree(ev); break; } mutex_unlock(&vmpr->events_lock); } /** * vmpressure_init() - Initialize vmpressure control structure * @vmpr: Structure to be initialized * * This function should be called on every allocated vmpressure structure * before any usage. */ void vmpressure_init(struct vmpressure *vmpr) { spin_lock_init(&vmpr->sr_lock); mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } /** * vmpressure_cleanup() - shuts down vmpressure control structure * @vmpr: Structure to be cleaned up * * This function should be called before the structure in which it is * embedded is cleaned up. */ void vmpressure_cleanup(struct vmpressure *vmpr) { /* * Make sure there is no pending work before eventfd infrastructure * goes away. */ flush_work(&vmpr->work); }
6 3 3 3 3 32 3 3 3 3 32 3 3 3 3 3 3 3 3 1 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 26 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 3 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> * Copyright (C) 2019-2022 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/debugfs.h> #include <linux/random.h> #include <linux/moduleparam.h> #include <linux/ieee80211.h> #include <linux/minmax.h> #include <net/mac80211.h> #include "rate.h" #include "sta_info.h" #include "rc80211_minstrel_ht.h" #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) /* Number of symbols for a packet with (bps) bits per symbol */ #define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ (sgi ? \ ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \ ((syms) * 1000) << 2 /* syms * 4 us */ \ ) /* Transmit duration for the raw data part of an average sized packet */ #define MCS_DURATION(streams, sgi, bps) \ (MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) / AVG_AMPDU_SIZE) #define BW_20 0 #define BW_40 1 #define BW_80 2 /* * Define group sort order: HT40 -> SGI -> #streams */ #define GROUP_IDX(_streams, _sgi, _ht40) \ MINSTREL_HT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * _ht40 + \ MINSTREL_MAX_STREAMS * _sgi + \ _streams - 1 #define _MAX(a, b) (((a)>(b))?(a):(b)) #define GROUP_SHIFT(duration) \ _MAX(0, 16 - __builtin_clz(duration)) /* MCS rate information for an MCS group */ #define __MCS_GROUP(_streams, _sgi, _ht40, _s) \ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _ht40, \ .flags = \ IEEE80211_TX_RC_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) >> _s \ } \ } #define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26)) #define MCS_GROUP(_streams, _sgi, _ht40) \ __MCS_GROUP(_streams, _sgi, _ht40, \ MCS_GROUP_SHIFT(_streams, _sgi, _ht40)) #define VHT_GROUP_IDX(_streams, _sgi, _bw) \ (MINSTREL_VHT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * (_bw) + \ MINSTREL_MAX_STREAMS * (_sgi) + \ (_streams) - 1) #define BW2VBPS(_bw, r3, r2, r1) \ (_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1) #define __VHT_GROUP(_streams, _sgi, _bw, _s) \ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _bw, \ .flags = \ IEEE80211_TX_RC_VHT_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_bw == BW_80 ? IEEE80211_TX_RC_80_MHZ_WIDTH : \ _bw == BW_40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 234, 108, 52)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 351, 162, 78)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 468, 216, 104)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 702, 324, 156)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 936, 432, 208)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1053, 486, 234)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1170, 540, 260)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1404, 648, 312)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1560, 720, 346)) >> _s \ } \ } #define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26))) #define VHT_GROUP(_streams, _sgi, _bw) \ __VHT_GROUP(_streams, _sgi, _bw, \ VHT_GROUP_SHIFT(_streams, _sgi, _bw)) #define CCK_DURATION(_bitrate, _short) \ (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48) + \ (8 * (AVG_PKT_SIZE + 4) * 10) / (_bitrate))) #define CCK_DURATION_LIST(_short, _s) \ CCK_DURATION(10, _short) >> _s, \ CCK_DURATION(20, _short) >> _s, \ CCK_DURATION(55, _short) >> _s, \ CCK_DURATION(110, _short) >> _s #define __CCK_GROUP(_s) \ [MINSTREL_CCK_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ CCK_DURATION_LIST(false, _s), \ CCK_DURATION_LIST(true, _s) \ } \ } #define CCK_GROUP_SHIFT \ GROUP_SHIFT(CCK_DURATION(10, false)) #define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) #define OFDM_DURATION(_bitrate) \ (1000 * (16 /* SIFS + signal ext */ + \ 16 /* T_PREAMBLE */ + \ 4 /* T_SIGNAL */ + \ 4 * (((16 + 80 * (AVG_PKT_SIZE + 4) + 6) / \ ((_bitrate) * 4))))) #define OFDM_DURATION_LIST(_s) \ OFDM_DURATION(60) >> _s, \ OFDM_DURATION(90) >> _s, \ OFDM_DURATION(120) >> _s, \ OFDM_DURATION(180) >> _s, \ OFDM_DURATION(240) >> _s, \ OFDM_DURATION(360) >> _s, \ OFDM_DURATION(480) >> _s, \ OFDM_DURATION(540) >> _s #define __OFDM_GROUP(_s) \ [MINSTREL_OFDM_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ OFDM_DURATION_LIST(_s), \ } \ } #define OFDM_GROUP_SHIFT \ GROUP_SHIFT(OFDM_DURATION(60)) #define OFDM_GROUP __OFDM_GROUP(OFDM_GROUP_SHIFT) static bool minstrel_vht_only = true; module_param(minstrel_vht_only, bool, 0644); MODULE_PARM_DESC(minstrel_vht_only, "Use only VHT rates when VHT is supported by sta."); /* * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they * use. * * Sortorder has to be fixed for GROUP_IDX macro to be applicable: * BW -> SGI -> #streams */ const struct mcs_group minstrel_mcs_groups[] = { MCS_GROUP(1, 0, BW_20), MCS_GROUP(2, 0, BW_20), MCS_GROUP(3, 0, BW_20), MCS_GROUP(4, 0, BW_20), MCS_GROUP(1, 1, BW_20), MCS_GROUP(2, 1, BW_20), MCS_GROUP(3, 1, BW_20), MCS_GROUP(4, 1, BW_20), MCS_GROUP(1, 0, BW_40), MCS_GROUP(2, 0, BW_40), MCS_GROUP(3, 0, BW_40), MCS_GROUP(4, 0, BW_40), MCS_GROUP(1, 1, BW_40), MCS_GROUP(2, 1, BW_40), MCS_GROUP(3, 1, BW_40), MCS_GROUP(4, 1, BW_40), CCK_GROUP, OFDM_GROUP, VHT_GROUP(1, 0, BW_20), VHT_GROUP(2, 0, BW_20), VHT_GROUP(3, 0, BW_20), VHT_GROUP(4, 0, BW_20), VHT_GROUP(1, 1, BW_20), VHT_GROUP(2, 1, BW_20), VHT_GROUP(3, 1, BW_20), VHT_GROUP(4, 1, BW_20), VHT_GROUP(1, 0, BW_40), VHT_GROUP(2, 0, BW_40), VHT_GROUP(3, 0, BW_40), VHT_GROUP(4, 0, BW_40), VHT_GROUP(1, 1, BW_40), VHT_GROUP(2, 1, BW_40), VHT_GROUP(3, 1, BW_40), VHT_GROUP(4, 1, BW_40), VHT_GROUP(1, 0, BW_80), VHT_GROUP(2, 0, BW_80), VHT_GROUP(3, 0, BW_80), VHT_GROUP(4, 0, BW_80), VHT_GROUP(1, 1, BW_80), VHT_GROUP(2, 1, BW_80), VHT_GROUP(3, 1, BW_80), VHT_GROUP(4, 1, BW_80), }; const s16 minstrel_cck_bitrates[4] = { 10, 20, 55, 110 }; const s16 minstrel_ofdm_bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly; static const u8 minstrel_sample_seq[] = { MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_SLOW, }; static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi); /* * Some VHT MCSes are invalid (when Ndbps / Nes is not an integer) * e.g for MCS9@20MHzx1Nss: Ndbps=8x52*(5/6) Nes=1 * * Returns the valid mcs map for struct minstrel_mcs_group_data.supported */ static u16 minstrel_get_valid_vht_rates(int bw, int nss, __le16 mcs_map) { u16 mask = 0; if (bw == BW_20) { if (nss != 3 && nss != 6) mask = BIT(9); } else if (bw == BW_80) { if (nss == 3 || nss == 7) mask = BIT(6); else if (nss == 6) mask = BIT(9); } else { WARN_ON(bw != BW_40); } switch ((le16_to_cpu(mcs_map) >> (2 * (nss - 1))) & 3) { case IEEE80211_VHT_MCS_SUPPORT_0_7: mask |= 0x300; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: mask |= 0x200; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: break; default: mask = 0x3ff; } return 0x3ff & ~mask; } static bool minstrel_ht_is_legacy_group(int group) { return group == MINSTREL_CCK_GROUP || group == MINSTREL_OFDM_GROUP; } /* * Look up an MCS group index based on mac80211 rate information */ static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { return GROUP_IDX((rate->idx / 8) + 1, !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_ht_ri_get_group_idx(struct rate_info *rate) { return GROUP_IDX((rate->mcs / 8) + 1, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40)); } static int minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate) { return VHT_GROUP_IDX(ieee80211_rate_get_vht_nss(rate), !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + 2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_vht_ri_get_group_idx(struct rate_info *rate) { return VHT_GROUP_IDX(rate->nss, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40) + 2*!!(rate->bw & RATE_INFO_BW_80)); } static struct minstrel_rate_stats * minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int group, idx; if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); idx = rate->idx % 8; goto out; } if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { group = minstrel_vht_get_group_idx(rate); idx = ieee80211_rate_get_vht_mcs(rate); goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (!(mi->supported[group] & BIT(idx))) continue; if (rate->idx != mp->cck_rates[idx]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->idx == mp->ofdm_rates[mi->band][idx]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } /* * Get the minstrel rate statistics for specified STA and rate info. */ static struct minstrel_rate_stats * minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int group, idx; struct rate_info *rate = &rate_status->rate_idx; if (rate->flags & RATE_INFO_FLAGS_MCS) { group = minstrel_ht_ri_get_group_idx(rate); idx = rate->mcs % 8; goto out; } if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) { group = minstrel_vht_ri_get_group_idx(rate); idx = rate->mcs; goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && mi->use_short_preamble) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { return &mi->groups[MI_RATE_GROUP(index)].rates[MI_RATE_IDX(index)]; } static inline int minstrel_get_duration(int index) { const struct mcs_group *group = &minstrel_mcs_groups[MI_RATE_GROUP(index)]; unsigned int duration = group->duration[MI_RATE_IDX(index)]; return duration << group->shift; } static unsigned int minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) { int duration; if (mi->avg_ampdu_len) return MINSTREL_TRUNC(mi->avg_ampdu_len); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_tp_rate[0]))) return 1; duration = minstrel_get_duration(mi->max_tp_rate[0]); if (duration > 400 * 1000) return 2; if (duration > 250 * 1000) return 4; if (duration > 150 * 1000) return 8; return 16; } /* * Return current throughput based on the average A-MPDU length, taking into * account the expected number of retransmissions and their expected length */ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, int prob_avg) { unsigned int nsecs = 0, overhead = mi->overhead; unsigned int ampdu_len = 1; /* do not account throughput if success prob is below 10% */ if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; else ampdu_len = minstrel_ht_avg_ampdu_len(mi); nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << minstrel_mcs_groups[group].shift; /* * For the throughput calculation, limit the probability value to 90% to * account for collision related packet error rate fluctuation * (prob is scaled - see MINSTREL_FRAC above) */ if (prob_avg > MINSTREL_FRAC(90, 100)) prob_avg = MINSTREL_FRAC(90, 100); return MINSTREL_TRUNC(100 * ((prob_avg * 1000000) / nsecs)); } /* * Find & sort topmost throughput rates * * If multiple rates provide equal throughput the sorting is based on their * current success probability. Higher success probability is preferred among * MCS groups, CCK rates do not provide aggregation and are therefore at last. */ static void minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, u16 *tp_list) { int cur_group, cur_idx, cur_tp_avg, cur_prob; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int j = MAX_THR_RATES; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg; cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = MI_RATE_GROUP(tp_list[j - 1]); tmp_idx = MI_RATE_IDX(tp_list[j - 1]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (cur_tp_avg < tmp_tp_avg || (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob)) break; j--; } while (j > 0); if (j < MAX_THR_RATES - 1) { memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) * (MAX_THR_RATES - (j + 1)))); } if (j < MAX_THR_RATES) tp_list[j] = index; } /* * Find and set the topmost probability rate per sta and per group */ static void minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int max_tp_group, max_tp_idx, max_tp_prob; int cur_tp_avg, cur_group, cur_idx; int max_gpr_group, max_gpr_idx; int max_gpr_tp_avg, max_gpr_prob; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); mg = &mi->groups[cur_group]; mrs = &mg->rates[cur_idx]; tmp_group = MI_RATE_GROUP(*dest); tmp_idx = MI_RATE_IDX(*dest); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ max_tp_group = MI_RATE_GROUP(mi->max_tp_rate[0]); max_tp_idx = MI_RATE_IDX(mi->max_tp_rate[0]); max_tp_prob = mi->groups[max_tp_group].rates[max_tp_idx].prob_avg; if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index)) && !minstrel_ht_is_legacy_group(max_tp_group)) return; /* skip rates faster than max tp rate with lower prob */ if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) && mrs->prob_avg < max_tp_prob) return; max_gpr_group = MI_RATE_GROUP(mg->max_group_prob_rate); max_gpr_idx = MI_RATE_IDX(mg->max_group_prob_rate); max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) { cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) *dest = index; max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, max_gpr_idx, max_gpr_prob); if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { if (mrs->prob_avg > tmp_prob) *dest = index; if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } } /* * Assign new rate set per sta and use CCK rates only if the fastest * rate (max_tp_rate[0]) is from CCK group. This prohibits such sorted * rate sets where MCS and CCK rates are mixed, because CCK rates can * not use aggregation. */ static void minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], u16 tmp_legacy_tp_rate[MAX_THR_RATES]) { unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; tmp_group = MI_RATE_GROUP(tmp_legacy_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_legacy_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = MI_RATE_GROUP(tmp_mcs_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_mcs_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_legacy_tp_rate[i], tmp_mcs_tp_rate); } } } /* * Try to increase robustness of max_prob rate by decrease number of * streams if possible. */ static inline void minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; if (!mi->sta->deflink.ht_cap.ht_supported) return; group = MI_RATE_GROUP(mi->max_tp_rate[0]); tmp_max_streams = minstrel_mcs_groups[group].streams; for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { mg = &mi->groups[group]; if (!mi->supported[group] || group == MINSTREL_CCK_GROUP) continue; tmp_idx = MI_RATE_IDX(mg->max_group_prob_rate); tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg; if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { mi->max_prob_rate = mg->max_group_prob_rate; tmp_tp = minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob); } } } static u16 __minstrel_ht_get_sample_rate(struct minstrel_ht_sta *mi, enum minstrel_sample_type type) { u16 *rates = mi->sample[type].sample_rates; u16 cur; int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { if (!rates[i]) continue; cur = rates[i]; rates[i] = 0; return cur; } return 0; } static inline int minstrel_ewma(int old, int new, int weight) { int diff, incr; diff = new - old; incr = (EWMA_DIV - weight) * diff / EWMA_DIV; return old + incr; } static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) { s32 out_1 = *prev_1; s32 out_2 = *prev_2; s32 val; if (!in) in += 1; if (!out_1) { val = out_1 = in; goto out; } val = MINSTREL_AVG_COEFF1 * in; val += MINSTREL_AVG_COEFF2 * out_1; val += MINSTREL_AVG_COEFF3 * out_2; val >>= MINSTREL_SCALE; if (val > 1 << MINSTREL_SCALE) val = 1 << MINSTREL_SCALE; if (val < 0) val = 1; out: *prev_2 = out_1; *prev_1 = val; return val; } /* * Recalculate statistics and counters of a given rate */ static void minstrel_ht_calc_rate_stats(struct minstrel_priv *mp, struct minstrel_rate_stats *mrs) { unsigned int cur_prob; if (unlikely(mrs->attempts > 0)) { cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); minstrel_filter_avg_add(&mrs->prob_avg, &mrs->prob_avg_1, cur_prob); mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; } mrs->last_success = mrs->success; mrs->last_attempts = mrs->attempts; mrs->success = 0; mrs->attempts = 0; } static bool minstrel_ht_find_sample_rate(struct minstrel_ht_sta *mi, int type, int idx) { int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { u16 cur = mi->sample[type].sample_rates[i]; if (cur == idx) return true; if (!cur) break; } return false; } static int minstrel_ht_move_sample_rates(struct minstrel_ht_sta *mi, int type, u32 fast_rate_dur, u32 slow_rate_dur) { u16 *rates = mi->sample[type].sample_rates; int i, j; for (i = 0, j = 0; i < MINSTREL_SAMPLE_RATES; i++) { u32 duration; bool valid = false; u16 cur; cur = rates[i]; if (!cur) continue; duration = minstrel_get_duration(cur); switch (type) { case MINSTREL_SAMPLE_TYPE_SLOW: valid = duration > fast_rate_dur && duration < slow_rate_dur; break; case MINSTREL_SAMPLE_TYPE_INC: case MINSTREL_SAMPLE_TYPE_JUMP: valid = duration < fast_rate_dur; break; default: valid = false; break; } if (!valid) { rates[i] = 0; continue; } if (i == j) continue; rates[j++] = cur; rates[i] = 0; } return j; } static int minstrel_ht_group_min_rate_offset(struct minstrel_ht_sta *mi, int group, u32 max_duration) { u16 supported = mi->supported[group]; int i; for (i = 0; i < MCS_GROUP_RATES && supported; i++, supported >>= 1) { if (!(supported & BIT(0))) continue; if (minstrel_get_duration(MI_RATE(group, i)) >= max_duration) continue; return i; } return -1; } /* * Incremental update rates: * Flip through groups and pick the first group rate that is faster than the * highest currently selected rate */ static u16 minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur) { u8 type = MINSTREL_SAMPLE_TYPE_INC; int i, index = 0; u8 group; group = mi->sample[type].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); index = minstrel_ht_group_min_rate_offset(mi, group, fast_rate_dur); if (index < 0) continue; index = MI_RATE(group, index & 0xf); if (!minstrel_ht_find_sample_rate(mi, type, index)) goto out; } index = 0; out: mi->sample[type].sample_group = group; return index; } static int minstrel_ht_next_group_sample_rate(struct minstrel_ht_sta *mi, int group, u16 supported, int offset) { struct minstrel_mcs_group_data *mg = &mi->groups[group]; u16 idx; int i; for (i = 0; i < MCS_GROUP_RATES; i++) { idx = sample_table[mg->column][mg->index]; if (++mg->index >= MCS_GROUP_RATES) { mg->index = 0; if (++mg->column >= ARRAY_SIZE(sample_table)) mg->column = 0; } if (idx < offset) continue; if (!(supported & BIT(idx))) continue; return MI_RATE(group, idx); } return -1; } /* * Jump rates: * Sample random rates, use those that are faster than the highest * currently selected rate. Rates between the fastest and the slowest * get sorted into the slow sample bucket, but only if it has room */ static u16 minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur, u32 slow_rate_dur, int *slow_rate_ofs) { struct minstrel_rate_stats *mrs; u32 max_duration = slow_rate_dur; int i, index, offset; u16 *slow_rates; u16 supported; u32 duration; u8 group; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; slow_rates = mi->sample[MINSTREL_SAMPLE_TYPE_SLOW].sample_rates; group = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { u8 type; group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); supported = mi->supported[group]; if (!supported) continue; offset = minstrel_ht_group_min_rate_offset(mi, group, max_duration); if (offset < 0) continue; index = minstrel_ht_next_group_sample_rate(mi, group, supported, offset); if (index < 0) continue; duration = minstrel_get_duration(index); if (duration < fast_rate_dur) type = MINSTREL_SAMPLE_TYPE_JUMP; else type = MINSTREL_SAMPLE_TYPE_SLOW; if (minstrel_ht_find_sample_rate(mi, type, index)) continue; if (type == MINSTREL_SAMPLE_TYPE_JUMP) goto found; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) continue; if (duration >= slow_rate_dur) continue; /* skip slow rates with high success probability */ mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg > MINSTREL_FRAC(95, 100)) continue; slow_rates[(*slow_rate_ofs)++] = index; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; } index = 0; found: mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group = group; return index; } static void minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi) { u32 prob_dur = minstrel_get_duration(mi->max_prob_rate); u32 tp_dur = minstrel_get_duration(mi->max_tp_rate[0]); u32 tp2_dur = minstrel_get_duration(mi->max_tp_rate[1]); u32 fast_rate_dur = min(min(tp_dur, tp2_dur), prob_dur); u32 slow_rate_dur = max(max(tp_dur, tp2_dur), prob_dur); u16 *rates; int i, j; rates = mi->sample[MINSTREL_SAMPLE_TYPE_INC].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_INC, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_inc_rate(mi, tp_dur); if (!rates[i]) break; i++; } rates = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_JUMP, fast_rate_dur, slow_rate_dur); j = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_SLOW, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_jump_rate(mi, fast_rate_dur, slow_rate_dur, &j); if (!rates[i]) break; i++; } for (i = 0; i < ARRAY_SIZE(mi->sample); i++) memcpy(mi->sample[i].cur_sample_rates, mi->sample[i].sample_rates, sizeof(mi->sample[i].cur_sample_rates)); } /* * Update rate statistics and select new primary rates * * Rules for rate selection: * - max_prob_rate must use only one stream, as a tradeoff between delivery * probability and throughput during strong fluctuations * - as long as the max prob rate has a probability of more than 75%, pick * higher throughput rates, even if the probability is a bit lower */ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; u16 index; bool ht_supported = mi->sta->deflink.ht_cap.ht_supported; if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); else mi->avg_ampdu_len = 0; mi->ampdu_len = 0; mi->ampdu_packets = 0; } if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else if (mi->supported[MINSTREL_OFDM_GROUP]) group = MINSTREL_OFDM_GROUP; else group = 0; index = MI_RATE(group, 0); for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++) tmp_legacy_tp_rate[j] = index; if (mi->supported[MINSTREL_VHT_GROUP_0]) group = MINSTREL_VHT_GROUP_0; else if (ht_supported) group = MINSTREL_HT_GROUP_0; else if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else group = MINSTREL_OFDM_GROUP; index = MI_RATE(group, 0); tmp_max_prob_rate = index; for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { u16 *tp_rate = tmp_mcs_tp_rate; u16 last_prob = 0; mg = &mi->groups[group]; if (!mi->supported[group]) continue; /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) tmp_group_tp_rate[j] = MI_RATE(group, 0); if (group == MINSTREL_CCK_GROUP && ht_supported) tp_rate = tmp_legacy_tp_rate; for (i = MCS_GROUP_RATES - 1; i >= 0; i--) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); mrs = &mg->rates[i]; mrs->retry_updated = false; minstrel_ht_calc_rate_stats(mp, mrs); if (mrs->att_hist) last_prob = max(last_prob, mrs->prob_avg); else mrs->prob_avg = max(last_prob, mrs->prob_avg); cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; /* Find max throughput rate set */ minstrel_ht_sort_best_tp_rates(mi, index, tp_rate); /* Find max throughput rate set within a group */ minstrel_ht_sort_best_tp_rates(mi, index, tmp_group_tp_rate); } memcpy(mg->max_group_tp_rate, tmp_group_tp_rate, sizeof(mg->max_group_tp_rate)); } /* Assign new rate set per sta */ minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_legacy_tp_rate); memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate)); for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { if (!mi->supported[group]) continue; mg = &mi->groups[group]; mg->max_group_prob_rate = MI_RATE(group, 0); for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); /* Find max probability rate per group and global */ minstrel_ht_set_best_prob_rate(mi, &tmp_max_prob_rate, index); } } mi->max_prob_rate = tmp_max_prob_rate; /* Try to increase robustness of max_prob_rate*/ minstrel_ht_prob_rate_reduce_streams(mi); minstrel_ht_refill_sample_rates(mi); #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ if (mp->fixed_rate_idx != -1) { for (i = 0; i < 4; i++) mi->max_tp_rate[i] = mp->fixed_rate_idx; mi->max_prob_rate = mp->fixed_rate_idx; } #endif /* Reset update timer */ mi->last_stats_update = jiffies; mi->sample_time = jiffies; } static bool minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int i; if (rate->idx < 0) return false; if (!rate->count) return false; if (rate->flags & IEEE80211_TX_RC_MCS || rate->flags & IEEE80211_TX_RC_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) if (rate->idx == mp->cck_rates[i]) return true; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) if (rate->idx == mp->ofdm_rates[mi->band][i]) return true; return false; } /* * Check whether rate_status contains valid information. */ static bool minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int i; if (!rate_status) return false; if (!rate_status->try_count) return false; if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS || rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_cck_bitrates[ mp->cck_rates[i] ]) return true; } for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ]) return true; } return false; } static void minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary) { int group, orig_group; orig_group = group = MI_RATE_GROUP(*idx); while (group > 0) { group--; if (!mi->supported[group]) continue; if (minstrel_mcs_groups[group].streams > minstrel_mcs_groups[orig_group].streams) continue; if (primary) *idx = mi->groups[group].max_group_tp_rate[0]; else *idx = mi->groups[group].max_group_tp_rate[1]; break; } } static void minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, void *priv_sta, struct ieee80211_tx_status *st) { struct ieee80211_tx_info *info = st->info; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2; struct minstrel_priv *mp = priv; u32 update_interval = mp->update_interval; bool last, update = false; int i; /* Ignore packet that was sent with noAck flag */ if (info->flags & IEEE80211_TX_CTL_NO_ACK) return; /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; if (!(info->flags & IEEE80211_TX_STAT_AMPDU)) { info->status.ampdu_ack_len = (info->flags & IEEE80211_TX_STAT_ACK ? 1 : 0); info->status.ampdu_len = 1; } /* wraparound */ if (mi->total_packets >= ~0 - info->status.ampdu_len) { mi->total_packets = 0; mi->sample_packets = 0; } mi->total_packets += info->status.ampdu_len; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; mi->ampdu_packets++; mi->ampdu_len += info->status.ampdu_len; if (st->rates && st->n_rates) { last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0])); for (i = 0; !last; i++) { last = (i == st->n_rates - 1) || !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[i + 1])); rate = minstrel_ht_ri_get_stats(mp, mi, &(st->rates[i])); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += st->rates[i].try_count * info->status.ampdu_len; } } else { last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += ar[i].count * info->status.ampdu_len; } } if (mp->hw->max_rates > 1) { /* * check for sudden death of spatial multiplexing, * downgrade to a lower number of streams if necessary. */ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); if (rate->attempts > 30 && rate->success < rate->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); update = true; } rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); if (rate2->attempts > 30 && rate2->success < rate2->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; } } if (time_after(jiffies, mi->last_stats_update + update_interval)) { update = true; minstrel_ht_update_stats(mp, mi); } if (update) minstrel_ht_update_rates(mp, mi); } static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) { struct minstrel_rate_stats *mrs; unsigned int tx_time, tx_time_rtscts, tx_time_data; unsigned int cw = mp->cw_min; unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi); unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) { mrs->retry_count = 1; mrs->retry_count_rtscts = 1; return; } mrs->retry_count = 2; mrs->retry_count_rtscts = 2; mrs->retry_updated = true; tx_time_data = minstrel_get_duration(index) * ampdu_len / 1000; /* Contention time for first 2 tries */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index))) { overhead = mi->overhead_legacy; overhead_rtscts = mi->overhead_legacy_rtscts; } else { overhead = mi->overhead; overhead_rtscts = mi->overhead_rtscts; } /* Total TX time for data and Contention after first 2 tries */ tx_time = ctime + 2 * (overhead + tx_time_data); tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data); /* See how many more tries we can fit inside segment size */ do { /* Contention time for this try */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); /* Total TX time after this try */ tx_time += ctime + overhead + tx_time_data; tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) mrs->retry_count_rtscts++; } while ((tx_time < mp->segment_size) && (++mrs->retry_count < mp->max_retry)); } static void minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { int group_idx = MI_RATE_GROUP(index); const struct mcs_group *group = &minstrel_mcs_groups[group_idx]; struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; mrs = minstrel_get_ratestats(mi, index); if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; } else { ratetbl->rate[offset].count = mrs->retry_count; ratetbl->rate[offset].count_cts = mrs->retry_count; ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } index = MI_RATE_IDX(index); if (group_idx == MINSTREL_CCK_GROUP) idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; else if (group_idx == MINSTREL_OFDM_GROUP) idx = mp->ofdm_rates[mi->band][index % ARRAY_SIZE(mp->ofdm_rates[0])]; else if (flags & IEEE80211_TX_RC_VHT_MCS) idx = ((group->streams - 1) << 4) | (index & 0xF); else idx = index + (group->streams - 1) * 8; /* enable RTS/CTS if needed: * - if station is in dynamic SMPS (and streams > 1) * - for fallback rates, to increase chances of getting through */ if (offset > 0 || (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; flags |= IEEE80211_TX_RC_USE_RTS_CTS; } ratetbl->rate[offset].idx = idx; ratetbl->rate[offset].flags = flags; } static inline int minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate) { int group = MI_RATE_GROUP(rate); rate = MI_RATE_IDX(rate); return mi->groups[group].rates[rate].prob_avg; } static int minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) { int group = MI_RATE_GROUP(mi->max_prob_rate); const struct mcs_group *g = &minstrel_mcs_groups[group]; int rate = MI_RATE_IDX(mi->max_prob_rate); unsigned int duration; /* Disable A-MSDU if max_prob_rate is bad */ if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100)) return 1; duration = g->duration[rate]; duration <<= g->shift; /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */ if (duration > MCS_DURATION(1, 0, 52)) return 500; /* * If the rate is slower than single-stream MCS4, limit A-MSDU to usual * data packet size */ if (duration > MCS_DURATION(1, 0, 104)) return 1600; /* * If the rate is slower than single-stream MCS7, or if the max throughput * rate success probability is less than 75%, limit A-MSDU to twice the usual * data packet size */ if (duration > MCS_DURATION(1, 0, 260) || (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) < MINSTREL_FRAC(75, 100))) return 3200; /* * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes. * Since aggregation sessions are started/stopped without txq flush, use * the limit here to avoid the complexity of having to de-aggregate * packets in the queue. */ if (!mi->sta->deflink.vht_cap.vht_supported) return IEEE80211_MAX_MPDU_LEN_HT_BA; /* unlimited */ return 0; } static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; int i = 0; int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE); rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) return; /* Start with max_tp_rate[0] */ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); /* Fill up remaining, keep one entry for max_probe_rate */ for (; i < (max_rates - 1); i++) minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); if (i < max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); if (i < IEEE80211_TX_RATE_TABLE_SIZE) rates->rate[i].idx = -1; mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); ieee80211_sta_recalc_aggregates(mi->sta); rate_control_set_rates(mp->hw, mi->sta, rates); } static u16 minstrel_ht_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { u8 seq; if (mp->hw->max_rates > 1) { seq = mi->sample_seq; mi->sample_seq = (seq + 1) % ARRAY_SIZE(minstrel_sample_seq); seq = minstrel_sample_seq[seq]; } else { seq = MINSTREL_SAMPLE_TYPE_INC; } return __minstrel_ht_get_sample_rate(mi, seq); } static void minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { const struct mcs_group *sample_group; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_tx_rate *rate = &info->status.rates[0]; struct minstrel_ht_sta *mi = priv_sta; struct minstrel_priv *mp = priv; u16 sample_idx; info->flags |= mi->tx_flags; #ifdef CONFIG_MAC80211_DEBUGFS if (mp->fixed_rate_idx != -1) return; #endif /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) return; if (time_is_after_jiffies(mi->sample_time)) return; mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL; sample_idx = minstrel_ht_get_sample_rate(mp, mi); if (!sample_idx) return; sample_group = &minstrel_mcs_groups[MI_RATE_GROUP(sample_idx)]; sample_idx = MI_RATE_IDX(sample_idx); if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && (sample_idx >= 4) != txrc->short_preamble) return; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group == &minstrel_mcs_groups[MINSTREL_OFDM_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->ofdm_rates[0]); rate->idx = mp->ofdm_rates[mi->band][idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, MI_RATE_IDX(sample_idx), sample_group->streams); } else { rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; } static void minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { int i; if (sband->band != NL80211_BAND_2GHZ) return; if (sta->deflink.ht_cap.ht_supported && !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; for (i = 0; i < 4; i++) { if (mp->cck_rates[i] == 0xff || !rate_supported(sta, sband->band, mp->cck_rates[i])) continue; mi->supported[MINSTREL_CCK_GROUP] |= BIT(i); if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) mi->supported[MINSTREL_CCK_GROUP] |= BIT(i + 4); } } static void minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { const u8 *rates; int i; if (sta->deflink.ht_cap.ht_supported) return; rates = mp->ofdm_rates[sband->band]; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) { if (rates[i] == 0xff || !rate_supported(sta, sband->band, rates[i])) continue; mi->supported[MINSTREL_OFDM_GROUP] |= BIT(i); } } static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { struct minstrel_priv *mp = priv; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs; u16 ht_cap = sta->deflink.ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; const struct ieee80211_rate *ctl_rate; struct sta_info *sta_info; bool ldpc, erp; int use_vht; int ack_dur; int stbc; int i; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB); if (vht_cap->vht_supported) use_vht = vht_cap->vht_mcs.tx_mcs_map != cpu_to_le16(~0); else use_vht = 0; memset(mi, 0, sizeof(*mi)); mi->sta = sta; mi->band = sband->band; mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1); mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; ack_dur = ieee80211_frame_duration(sband->band, 10, ctl_rate->bitrate, erp, 1); mi->overhead_legacy = ack_dur; mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); if (!use_vht) { stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >> IEEE80211_HT_CAP_RX_STBC_SHIFT; ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING; } else { stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >> IEEE80211_VHT_CAP_RXSTBC_SHIFT; ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC; } mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; if (ldpc) mi->tx_flags |= IEEE80211_TX_CTL_LDPC; for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { u32 gflags = minstrel_mcs_groups[i].flags; int bw, nss; mi->supported[i] = 0; if (minstrel_ht_is_legacy_group(i)) continue; if (gflags & IEEE80211_TX_RC_SHORT_GI) { if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) { if (!(ht_cap & IEEE80211_HT_CAP_SGI_40)) continue; } else { if (!(ht_cap & IEEE80211_HT_CAP_SGI_20)) continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH && sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40) continue; nss = minstrel_mcs_groups[i].streams; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1) continue; /* HT rate */ if (gflags & IEEE80211_TX_RC_MCS) { if (use_vht && minstrel_vht_only) continue; mi->supported[i] = mcs->rx_mask[nss - 1]; continue; } /* VHT rate */ if (!vht_cap->vht_supported || WARN_ON(!(gflags & IEEE80211_TX_RC_VHT_MCS)) || WARN_ON(gflags & IEEE80211_TX_RC_160_MHZ_WIDTH)) continue; if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) { if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 || ((gflags & IEEE80211_TX_RC_SHORT_GI) && !(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) { continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) bw = BW_40; else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) bw = BW_80; else bw = BW_20; mi->supported[i] = minstrel_get_valid_vht_rates(bw, nss, vht_cap->vht_mcs.tx_mcs_map); } sta_info = container_of(sta, struct sta_info, sta); mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) && sta_info->sdata->vif.bss_conf.use_short_preamble; minstrel_ht_update_cck(mp, mi, sband, sta); minstrel_ht_update_ofdm(mp, mi, sband, sta); /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); minstrel_ht_update_rates(mp, mi); } static void minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta, u32 changed) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct ieee80211_supported_band *sband; struct minstrel_ht_sta *mi; struct minstrel_priv *mp = priv; struct ieee80211_hw *hw = mp->hw; int max_rates = 0; int i; for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; } return kzalloc(sizeof(*mi), gfp); } static void minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) { kfree(priv_sta); } static void minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband, const s16 *bitrates, int n_rates) { int i, j; for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; for (j = 0; j < n_rates; j++) { if (rate->bitrate != bitrates[j]) continue; dest[j] = i; break; } } } static void minstrel_ht_init_cck_rates(struct minstrel_priv *mp) { static const s16 bitrates[4] = { 10, 20, 55, 110 }; struct ieee80211_supported_band *sband; memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates)); sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->cck_rates, sband, minstrel_cck_bitrates, ARRAY_SIZE(minstrel_cck_bitrates)); } static void minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) { static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; struct ieee80211_supported_band *sband; memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band])); sband = mp->hw->wiphy->bands[band]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband, minstrel_ofdm_bitrates, ARRAY_SIZE(minstrel_ofdm_bitrates)); } static void * minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; int i; mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); if (!mp) return NULL; /* contention window settings * Just an approximation. Using the per-queue values would complicate * the calculations and is probably unnecessary */ mp->cw_min = 15; mp->cw_max = 1023; /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; if (hw->max_rate_tries > 0) mp->max_retry = hw->max_rate_tries; else /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; mp->hw = hw; mp->update_interval = HZ / 20; minstrel_ht_init_cck_rates(mp); for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) minstrel_ht_init_ofdm_rates(mp, i); return mp; } #ifdef CONFIG_MAC80211_DEBUGFS static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, struct dentry *debugfsdir) { struct minstrel_priv *mp = priv; mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); } #endif static void minstrel_ht_free(void *priv) { kfree(priv); } static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { struct minstrel_ht_sta *mi = priv_sta; int i, j, prob, tp_avg; i = MI_RATE_GROUP(mi->max_tp_rate[0]); j = MI_RATE_IDX(mi->max_tp_rate[0]); prob = mi->groups[i].rates[j].prob_avg; /* convert tp_avg from pkt per second in kbps */ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024; return tp_avg; } static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER, .tx_status_ext = minstrel_ht_tx_status, .get_rate = minstrel_ht_get_rate, .rate_init = minstrel_ht_rate_init, .rate_update = minstrel_ht_rate_update, .alloc_sta = minstrel_ht_alloc_sta, .free_sta = minstrel_ht_free_sta, .alloc = minstrel_ht_alloc, .free = minstrel_ht_free, #ifdef CONFIG_MAC80211_DEBUGFS .add_debugfs = minstrel_ht_add_debugfs, .add_sta_debugfs = minstrel_ht_add_sta_debugfs, #endif .get_expected_throughput = minstrel_ht_get_expected_throughput, }; static void __init init_sample_table(void) { int col, i, new_idx; u8 rnd[MCS_GROUP_RATES]; memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { get_random_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { new_idx = (i + rnd[i]) % MCS_GROUP_RATES; while (sample_table[col][new_idx] != 0xff) new_idx = (new_idx + 1) % MCS_GROUP_RATES; sample_table[col][new_idx] = i; } } } int __init rc80211_minstrel_init(void) { init_sample_table(); return ieee80211_rate_control_register(&mac80211_minstrel_ht); } void rc80211_minstrel_exit(void) { ieee80211_rate_control_unregister(&mac80211_minstrel_ht); }
2 1 1 2 2 8 8 8 2 2 2 2 2 8 8 6 6 6 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/module.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_socket.h> #include <net/inet_sock.h> #include <net/tcp.h> struct nft_socket { enum nft_socket_keys key:8; u8 level; /* cgroupv2 level to extract */ u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; }; }; static void nft_socket_wildcard(const struct nft_pktinfo *pkt, struct nft_regs *regs, struct sock *sk, u32 *dest) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); break; #endif default: regs->verdict.code = NFT_BREAK; return; } } #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { struct cgroup *cgrp; u64 cgid; if (!sk_fullsock(sk)) return false; cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level); if (!cgrp) return false; cgid = cgroup_id(cgrp); memcpy(dest, &cgid, sizeof(u64)); return true; } /* process context only, uses current->nsproxy. */ static noinline int nft_socket_cgroup_subtree_level(void) { struct cgroup *cgrp = cgroup_get_from_path("/"); int level; if (IS_ERR(cgrp)) return PTR_ERR(cgrp); level = cgrp->level; cgroup_put(cgrp); if (level > 255) return -ERANGE; if (WARN_ON_ONCE(level < 0)) return -EINVAL; return level; } #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) { const struct net_device *indev = nft_in(pkt); const struct sk_buff *skb = pkt->skb; struct sock *sk = NULL; if (!indev) return NULL; switch (nft_pf(pkt)) { case NFPROTO_IPV4: sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); break; #endif default: WARN_ON_ONCE(1); break; } return sk; } static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_socket *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; struct sock *sk = skb->sk; u32 *dest = &regs->data[priv->dreg]; if (sk && !net_eq(nft_net(pkt), sock_net(sk))) sk = NULL; if (!sk) sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; return; } switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; #endif default: WARN_ON(1); regs->verdict.code = NFT_BREAK; } out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_socket_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_socket *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY]) return -EINVAL; switch(ctx->family) { case NFPROTO_IPV4: #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: #endif case NFPROTO_INET: break; default: return -EOPNOTSUPP; } priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: len = sizeof(u32); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL])); if (level > 255) return -EOPNOTSUPP; err = nft_socket_cgroup_subtree_level(); if (err < 0) return err; priv->level_user = level; level += err; /* Implies a giant cgroup tree */ if (level > 255) return -EOPNOTSUPP; priv->level = level; len = sizeof(u64); break; } #endif default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_socket *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key))) return -1; if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } static bool nft_socket_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_socket *priv = nft_expr_priv(expr); const struct nft_socket *socket; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } socket = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != socket->key || priv->dreg != socket->dreg || priv->level != socket->level) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT)); } static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)), .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { .name = "socket", .ops = &nft_socket_ops, .policy = nft_socket_policy, .maxattr = NFTA_SOCKET_MAX, .owner = THIS_MODULE, }; static int __init nft_socket_module_init(void) { return nft_register_expr(&nft_socket_type); } static void __exit nft_socket_module_exit(void) { nft_unregister_expr(&nft_socket_type); } module_init(nft_socket_module_init); module_exit(nft_socket_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Máté Eckl"); MODULE_DESCRIPTION("nf_tables socket match module"); MODULE_ALIAS_NFT_EXPR("socket");
254 253 255 253 784 775 254 252 256 782 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 // SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/netconsole.c * * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> * * This file contains the implementation of an IRQ-safe, crash-safe * kernel console implementation that outputs kernel messages to the * network. * * Modification history: * * 2001-09-17 started by Ingo Molnar. * 2003-08-11 2.6 port by Matt Mackall * simplified options * generic card hooks * works non-modular * 2003-09-07 rewritten with netpoll api */ /**************************************************************** * ****************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/console.h> #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/netpoll.h> #include <linux/inet.h> #include <linux/configfs.h> #include <linux/etherdevice.h> #include <linux/u64_stats_sync.h> #include <linux/utsname.h> #include <linux/rtnetlink.h> MODULE_AUTHOR("Matt Mackall <mpm@selenic.com>"); MODULE_DESCRIPTION("Console driver for network interfaces"); MODULE_LICENSE("GPL"); #define MAX_PARAM_LENGTH 256 #define MAX_EXTRADATA_ENTRY_LEN 256 #define MAX_EXTRADATA_VALUE_LEN 200 /* The number 3 comes from userdata entry format characters (' ', '=', '\n') */ #define MAX_EXTRADATA_NAME_LEN (MAX_EXTRADATA_ENTRY_LEN - \ MAX_EXTRADATA_VALUE_LEN - 3) #define MAX_EXTRADATA_ITEMS 16 #define MAX_PRINT_CHUNK 1000 static char config[MAX_PARAM_LENGTH]; module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0); MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]"); static bool oops_only; module_param(oops_only, bool, 0600); MODULE_PARM_DESC(oops_only, "Only log oops messages"); #define NETCONSOLE_PARAM_TARGET_PREFIX "cmdline" #ifndef MODULE static int __init option_setup(char *opt) { strscpy(config, opt, MAX_PARAM_LENGTH); return 1; } __setup("netconsole=", option_setup); #endif /* MODULE */ /* Linked list of all configured targets */ static LIST_HEAD(target_list); /* target_cleanup_list is used to track targets that need to be cleaned outside * of target_list_lock. It should be cleaned in the same function it is * populated. */ static LIST_HEAD(target_cleanup_list); /* This needs to be a spinlock because write_msg() cannot sleep */ static DEFINE_SPINLOCK(target_list_lock); /* This needs to be a mutex because netpoll_cleanup might sleep */ static DEFINE_MUTEX(target_cleanup_list_lock); /* * Console driver for netconsoles. Register only consoles that have * an associated target of the same type. */ static struct console netconsole_ext, netconsole; struct netconsole_target_stats { u64_stats_t xmit_drop_count; u64_stats_t enomem_count; struct u64_stats_sync syncp; }; enum console_type { CONS_BASIC = BIT(0), CONS_EXTENDED = BIT(1), }; /* Features enabled in sysdata. Contrary to userdata, this data is populated by * the kernel. The fields are designed as bitwise flags, allowing multiple * features to be set in sysdata_fields. */ enum sysdata_feature { /* Populate the CPU that sends the message */ SYSDATA_CPU_NR = BIT(0), /* Populate the task name (as in current->comm) in sysdata */ SYSDATA_TASKNAME = BIT(1), /* Kernel release/version as part of sysdata */ SYSDATA_RELEASE = BIT(2), /* Include a per-target message ID as part of sysdata */ SYSDATA_MSGID = BIT(3), }; /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. * @group: Links us into the configfs subsystem hierarchy. * @userdata_group: Links to the userdata configfs hierarchy * @extradata_complete: Cached, formatted string of append * @userdata_length: String length of usedata in extradata_complete. * @sysdata_fields: Sysdata features enabled. * @msgcounter: Message sent counter. * @stats: Packet send stats for the target. Used for debugging. * @enabled: On / off knob to enable / disable target. * Visible from userspace (read-write). * We maintain a strict 1:1 correspondence between this and * whether the corresponding netpoll is active or inactive. * Also, other parameters of a target may be modified at * runtime only when it is disabled (enabled == 0). * @extended: Denotes whether console is extended or not. * @release: Denotes whether kernel release version should be prepended * to the message. Depends on extended console. * @np: The netpoll structure for this target. * Contains the other userspace visible parameters: * dev_name (read-write) * local_port (read-write) * remote_port (read-write) * local_ip (read-write) * remote_ip (read-write) * local_mac (read-only) * remote_mac (read-write) * @buf: The buffer used to send the full msg to the network stack */ struct netconsole_target { struct list_head list; #ifdef CONFIG_NETCONSOLE_DYNAMIC struct config_group group; struct config_group userdata_group; char extradata_complete[MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS]; size_t userdata_length; /* bit-wise with sysdata_feature bits */ u32 sysdata_fields; /* protected by target_list_lock */ u32 msgcounter; #endif struct netconsole_target_stats stats; bool enabled; bool extended; bool release; struct netpoll np; /* protected by target_list_lock */ char buf[MAX_PRINT_CHUNK]; }; #ifdef CONFIG_NETCONSOLE_DYNAMIC static struct configfs_subsystem netconsole_subsys; static DEFINE_MUTEX(dynamic_netconsole_mutex); static int __init dynamic_netconsole_init(void) { config_group_init(&netconsole_subsys.su_group); mutex_init(&netconsole_subsys.su_mutex); return configfs_register_subsystem(&netconsole_subsys); } static void __exit dynamic_netconsole_exit(void) { configfs_unregister_subsystem(&netconsole_subsys); } /* * Targets that were created by parsing the boot/module option string * do not exist in the configfs hierarchy (and have NULL names) and will * never go away, so make these a no-op for them. */ static void netconsole_target_get(struct netconsole_target *nt) { if (config_item_name(&nt->group.cg_item)) config_group_get(&nt->group); } static void netconsole_target_put(struct netconsole_target *nt) { if (config_item_name(&nt->group.cg_item)) config_group_put(&nt->group); } #else /* !CONFIG_NETCONSOLE_DYNAMIC */ static int __init dynamic_netconsole_init(void) { return 0; } static void __exit dynamic_netconsole_exit(void) { } /* * No danger of targets going away from under us when dynamic * reconfigurability is off. */ static void netconsole_target_get(struct netconsole_target *nt) { } static void netconsole_target_put(struct netconsole_target *nt) { } static void populate_configfs_item(struct netconsole_target *nt, int cmdline_count) { } #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Allocate and initialize with defaults. * Note that these targets get their config_item fields zeroed-out. */ static struct netconsole_target *alloc_and_init(void) { struct netconsole_target *nt; nt = kzalloc(sizeof(*nt), GFP_KERNEL); if (!nt) return nt; if (IS_ENABLED(CONFIG_NETCONSOLE_EXTENDED_LOG)) nt->extended = true; if (IS_ENABLED(CONFIG_NETCONSOLE_PREPEND_RELEASE)) nt->release = true; nt->np.name = "netconsole"; strscpy(nt->np.dev_name, "eth0", IFNAMSIZ); nt->np.local_port = 6665; nt->np.remote_port = 6666; eth_broadcast_addr(nt->np.remote_mac); return nt; } /* Clean up every target in the cleanup_list and move the clean targets back to * the main target_list. */ static void netconsole_process_cleanups_core(void) { struct netconsole_target *nt, *tmp; unsigned long flags; /* The cleanup needs RTNL locked */ ASSERT_RTNL(); mutex_lock(&target_cleanup_list_lock); list_for_each_entry_safe(nt, tmp, &target_cleanup_list, list) { /* all entries in the cleanup_list needs to be disabled */ WARN_ON_ONCE(nt->enabled); do_netpoll_cleanup(&nt->np); /* moved the cleaned target to target_list. Need to hold both * locks */ spin_lock_irqsave(&target_list_lock, flags); list_move(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); } WARN_ON_ONCE(!list_empty(&target_cleanup_list)); mutex_unlock(&target_cleanup_list_lock); } static void netconsole_print_banner(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); if (np->ipv6) np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface name '%s'\n", np->dev_name); np_info(np, "local ethernet address '%pM'\n", np->dev_mac); np_info(np, "remote port %d\n", np->remote_port); if (np->ipv6) np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); else np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } /* Parse the string and populate the `inet_addr` union. Return 0 if IPv4 is * populated, 1 if IPv6 is populated, and -1 upon failure. */ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) { const char *end = NULL; int len; len = strlen(str); if (!len) return -1; if (str[len - 1] == '\n') len -= 1; if (in4_pton(str, len, (void *)addr, -1, &end) > 0 && (!end || *end == 0 || *end == '\n')) return 0; if (IS_ENABLED(CONFIG_IPV6) && in6_pton(str, len, (void *)addr, -1, &end) > 0 && (!end || *end == 0 || *end == '\n')) return 1; return -1; } #ifdef CONFIG_NETCONSOLE_DYNAMIC /* * Our subsystem hierarchy is: * * /sys/kernel/config/netconsole/ * | * <target>/ * | enabled * | release * | dev_name * | local_port * | remote_port * | local_ip * | remote_ip * | local_mac * | remote_mac * | transmit_errors * | userdata/ * | <key>/ * | value * | ... * | * <target>/... */ static struct netconsole_target *to_target(struct config_item *item) { struct config_group *cfg_group; cfg_group = to_config_group(item); if (!cfg_group) return NULL; return container_of(to_config_group(item), struct netconsole_target, group); } /* Do the list cleanup with the rtnl lock hold. rtnl lock is necessary because * netdev might be cleaned-up by calling __netpoll_cleanup(), */ static void netconsole_process_cleanups(void) { /* rtnl lock is called here, because it has precedence over * target_cleanup_list_lock mutex and target_cleanup_list */ rtnl_lock(); netconsole_process_cleanups_core(); rtnl_unlock(); } /* Get rid of possible trailing newline, returning the new length */ static void trim_newline(char *s, size_t maxlen) { size_t len; len = strnlen(s, maxlen); if (s[len - 1] == '\n') s[len - 1] = '\0'; } /* * Attribute operations for netconsole_target. */ static ssize_t enabled_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->enabled); } static ssize_t extended_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->extended); } static ssize_t release_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->release); } static ssize_t dev_name_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%s\n", to_target(item)->np.dev_name); } static ssize_t local_port_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->np.local_port); } static ssize_t remote_port_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->np.remote_port); } static ssize_t local_ip_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); if (nt->np.ipv6) return sysfs_emit(buf, "%pI6c\n", &nt->np.local_ip.in6); else return sysfs_emit(buf, "%pI4\n", &nt->np.local_ip); } static ssize_t remote_ip_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); if (nt->np.ipv6) return sysfs_emit(buf, "%pI6c\n", &nt->np.remote_ip.in6); else return sysfs_emit(buf, "%pI4\n", &nt->np.remote_ip); } static ssize_t local_mac_show(struct config_item *item, char *buf) { struct net_device *dev = to_target(item)->np.dev; static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; return sysfs_emit(buf, "%pM\n", dev ? dev->dev_addr : bcast); } static ssize_t remote_mac_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac); } static ssize_t transmit_errors_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); u64 xmit_drop_count, enomem_count; unsigned int start; do { start = u64_stats_fetch_begin(&nt->stats.syncp); xmit_drop_count = u64_stats_read(&nt->stats.xmit_drop_count); enomem_count = u64_stats_read(&nt->stats.enomem_count); } while (u64_stats_fetch_retry(&nt->stats.syncp, start)); return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count); } /* configfs helper to display if cpu_nr sysdata feature is enabled */ static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool cpu_nr_enabled; mutex_lock(&dynamic_netconsole_mutex); cpu_nr_enabled = !!(nt->sysdata_fields & SYSDATA_CPU_NR); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", cpu_nr_enabled); } /* configfs helper to display if taskname sysdata feature is enabled */ static ssize_t sysdata_taskname_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool taskname_enabled; mutex_lock(&dynamic_netconsole_mutex); taskname_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", taskname_enabled); } static ssize_t sysdata_release_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool release_enabled; mutex_lock(&dynamic_netconsole_mutex); release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", release_enabled); } /* Iterate in the list of target, and make sure we don't have any console * register without targets of the same type */ static void unregister_netcons_consoles(void) { struct netconsole_target *nt; u32 console_type_needed = 0; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (nt->extended) console_type_needed |= CONS_EXTENDED; else console_type_needed |= CONS_BASIC; } spin_unlock_irqrestore(&target_list_lock, flags); if (!(console_type_needed & CONS_EXTENDED) && console_is_registered(&netconsole_ext)) unregister_console(&netconsole_ext); if (!(console_type_needed & CONS_BASIC) && console_is_registered(&netconsole)) unregister_console(&netconsole); } static ssize_t sysdata_msgid_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool msgid_enabled; mutex_lock(&dynamic_netconsole_mutex); msgid_enabled = !!(nt->sysdata_fields & SYSDATA_MSGID); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", msgid_enabled); } /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. * The user is expected to set the desired parameters first (which * would enable him to dynamically add new netpoll targets for new * network interfaces as and when they come up). */ static ssize_t enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); unsigned long flags; bool enabled; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); ret = kstrtobool(buf, &enabled); if (ret) goto out_unlock; ret = -EINVAL; if (enabled == nt->enabled) { pr_info("network logging has already %s\n", nt->enabled ? "started" : "stopped"); goto out_unlock; } if (enabled) { /* true */ if (nt->release && !nt->extended) { pr_err("Not enabling netconsole. Release feature requires extended log message"); goto out_unlock; } if (nt->extended && !console_is_registered(&netconsole_ext)) { netconsole_ext.flags |= CON_ENABLED; register_console(&netconsole_ext); } /* User might be enabling the basic format target for the very * first time, make sure the console is registered. */ if (!nt->extended && !console_is_registered(&netconsole)) { netconsole.flags |= CON_ENABLED; register_console(&netconsole); } /* * Skip netconsole_parser_cmdline() -- all the attributes are * already configured via configfs. Just print them out. */ netconsole_print_banner(&nt->np); ret = netpoll_setup(&nt->np); if (ret) goto out_unlock; nt->enabled = true; pr_info("network logging started\n"); } else { /* false */ /* We need to disable the netconsole before cleaning it up * otherwise we might end up in write_msg() with * nt->np.dev == NULL and nt->enabled == true */ mutex_lock(&target_cleanup_list_lock); spin_lock_irqsave(&target_list_lock, flags); nt->enabled = false; /* Remove the target from the list, while holding * target_list_lock */ list_move(&nt->list, &target_cleanup_list); spin_unlock_irqrestore(&target_list_lock, flags); mutex_unlock(&target_cleanup_list_lock); /* Unregister consoles, whose the last target of that type got * disabled. */ unregister_netcons_consoles(); } ret = strnlen(buf, count); /* Deferred cleanup */ netconsole_process_cleanups(); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t release_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); bool release; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); ret = -EINVAL; goto out_unlock; } ret = kstrtobool(buf, &release); if (ret) goto out_unlock; nt->release = release; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t extended_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); bool extended; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); ret = -EINVAL; goto out_unlock; } ret = kstrtobool(buf, &extended); if (ret) goto out_unlock; nt->extended = extended; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t dev_name_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); mutex_unlock(&dynamic_netconsole_mutex); return -EINVAL; } strscpy(nt->np.dev_name, buf, IFNAMSIZ); trim_newline(nt->np.dev_name, IFNAMSIZ); mutex_unlock(&dynamic_netconsole_mutex); return strnlen(buf, count); } static ssize_t local_port_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ret = kstrtou16(buf, 10, &nt->np.local_port); if (ret < 0) goto out_unlock; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t remote_port_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ret = kstrtou16(buf, 10, &nt->np.remote_port); if (ret < 0) goto out_unlock; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t local_ip_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ipv6 = netpoll_parse_ip_addr(buf, &nt->np.local_ip); if (ipv6 == -1) goto out_unlock; nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t remote_ip_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ipv6 = netpoll_parse_ip_addr(buf, &nt->np.remote_ip); if (ipv6 == -1) goto out_unlock; nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } /* Count number of entries we have in extradata. * This is important because the extradata_complete only supports * MAX_EXTRADATA_ITEMS entries. Before enabling any new {user,sys}data * feature, number of entries needs to checked for available space. */ static size_t count_extradata_entries(struct netconsole_target *nt) { size_t entries; /* Userdata entries */ entries = list_count_nodes(&nt->userdata_group.cg_children); /* Plus sysdata entries */ if (nt->sysdata_fields & SYSDATA_CPU_NR) entries += 1; if (nt->sysdata_fields & SYSDATA_TASKNAME) entries += 1; if (nt->sysdata_fields & SYSDATA_RELEASE) entries += 1; if (nt->sysdata_fields & SYSDATA_MSGID) entries += 1; return entries; } static ssize_t remote_mac_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); u8 remote_mac[ETH_ALEN]; ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } if (!mac_pton(buf, remote_mac)) goto out_unlock; if (buf[MAC_ADDR_STR_LEN] && buf[MAC_ADDR_STR_LEN] != '\n') goto out_unlock; memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN); ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } struct userdatum { struct config_item item; char value[MAX_EXTRADATA_VALUE_LEN]; }; static struct userdatum *to_userdatum(struct config_item *item) { return container_of(item, struct userdatum, item); } struct userdata { struct config_group group; }; static struct userdata *to_userdata(struct config_item *item) { return container_of(to_config_group(item), struct userdata, group); } static struct netconsole_target *userdata_to_target(struct userdata *ud) { struct config_group *netconsole_group; netconsole_group = to_config_group(ud->group.cg_item.ci_parent); return to_target(&netconsole_group->cg_item); } static ssize_t userdatum_value_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%s\n", &(to_userdatum(item)->value[0])); } static void update_userdata(struct netconsole_target *nt) { struct list_head *entry; int child_count = 0; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; nt->extradata_complete[0] = 0; list_for_each(entry, &nt->userdata_group.cg_children) { struct userdatum *udm_item; struct config_item *item; if (child_count >= MAX_EXTRADATA_ITEMS) { spin_unlock_irqrestore(&target_list_lock, flags); WARN_ON_ONCE(1); return; } child_count++; item = container_of(entry, struct config_item, ci_entry); udm_item = to_userdatum(item); /* Skip userdata with no value set */ if (strnlen(udm_item->value, MAX_EXTRADATA_VALUE_LEN) == 0) continue; /* This doesn't overflow extradata_complete since it will write * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", item->ci_name, udm_item->value); } spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, size_t count) { struct userdatum *udm = to_userdatum(item); struct netconsole_target *nt; struct userdata *ud; ssize_t ret; if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); if (ret < 0) goto out_unlock; trim_newline(udm->value, sizeof(udm->value)); ud = to_userdata(item->ci_parent); nt = userdata_to_target(ud); update_userdata(nt); ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } /* disable_sysdata_feature - Disable sysdata feature and clean sysdata * @nt: target that is disabling the feature * @feature: feature being disabled */ static void disable_sysdata_feature(struct netconsole_target *nt, enum sysdata_feature feature) { nt->sysdata_fields &= ~feature; nt->extradata_complete[nt->userdata_length] = 0; } static ssize_t sysdata_msgid_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool msgid_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &msgid_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) goto unlock_ok; if (msgid_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (msgid_enabled) nt->sysdata_fields |= SYSDATA_MSGID; else disable_sysdata_feature(nt, SYSDATA_MSGID); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } static ssize_t sysdata_release_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool release_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &release_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) goto unlock_ok; if (release_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (release_enabled) nt->sysdata_fields |= SYSDATA_RELEASE; else disable_sysdata_feature(nt, SYSDATA_RELEASE); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } static ssize_t sysdata_taskname_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool taskname_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &taskname_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) goto unlock_ok; if (taskname_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (taskname_enabled) nt->sysdata_fields |= SYSDATA_TASKNAME; else disable_sysdata_feature(nt, SYSDATA_TASKNAME); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } /* configfs helper to sysdata cpu_nr feature */ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool cpu_nr_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &cpu_nr_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) /* no change requested */ goto unlock_ok; if (cpu_nr_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { /* user wants the new feature, but there is no space in the * buffer. */ ret = -ENOSPC; goto unlock; } if (cpu_nr_enabled) nt->sysdata_fields |= SYSDATA_CPU_NR; else /* This is special because extradata_complete might have * remaining data from previous sysdata, and it needs to be * cleaned. */ disable_sysdata_feature(nt, SYSDATA_CPU_NR); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } CONFIGFS_ATTR(userdatum_, value); CONFIGFS_ATTR(sysdata_, cpu_nr_enabled); CONFIGFS_ATTR(sysdata_, taskname_enabled); CONFIGFS_ATTR(sysdata_, release_enabled); CONFIGFS_ATTR(sysdata_, msgid_enabled); static struct configfs_attribute *userdatum_attrs[] = { &userdatum_attr_value, NULL, }; static void userdatum_release(struct config_item *item) { kfree(to_userdatum(item)); } static struct configfs_item_operations userdatum_ops = { .release = userdatum_release, }; static const struct config_item_type userdatum_type = { .ct_item_ops = &userdatum_ops, .ct_attrs = userdatum_attrs, .ct_owner = THIS_MODULE, }; static struct config_item *userdatum_make_item(struct config_group *group, const char *name) { struct netconsole_target *nt; struct userdatum *udm; struct userdata *ud; if (strlen(name) > MAX_EXTRADATA_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); if (count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) return ERR_PTR(-ENOSPC); udm = kzalloc(sizeof(*udm), GFP_KERNEL); if (!udm) return ERR_PTR(-ENOMEM); config_item_init_type_name(&udm->item, name, &userdatum_type); return &udm->item; } static void userdatum_drop(struct config_group *group, struct config_item *item) { struct netconsole_target *nt; struct userdata *ud; ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); mutex_lock(&dynamic_netconsole_mutex); update_userdata(nt); config_item_put(item); mutex_unlock(&dynamic_netconsole_mutex); } static struct configfs_attribute *userdata_attrs[] = { &sysdata_attr_cpu_nr_enabled, &sysdata_attr_taskname_enabled, &sysdata_attr_release_enabled, &sysdata_attr_msgid_enabled, NULL, }; static struct configfs_group_operations userdata_ops = { .make_item = userdatum_make_item, .drop_item = userdatum_drop, }; static const struct config_item_type userdata_type = { .ct_item_ops = &userdatum_ops, .ct_group_ops = &userdata_ops, .ct_attrs = userdata_attrs, .ct_owner = THIS_MODULE, }; CONFIGFS_ATTR(, enabled); CONFIGFS_ATTR(, extended); CONFIGFS_ATTR(, dev_name); CONFIGFS_ATTR(, local_port); CONFIGFS_ATTR(, remote_port); CONFIGFS_ATTR(, local_ip); CONFIGFS_ATTR(, remote_ip); CONFIGFS_ATTR_RO(, local_mac); CONFIGFS_ATTR(, remote_mac); CONFIGFS_ATTR(, release); CONFIGFS_ATTR_RO(, transmit_errors); static struct configfs_attribute *netconsole_target_attrs[] = { &attr_enabled, &attr_extended, &attr_release, &attr_dev_name, &attr_local_port, &attr_remote_port, &attr_local_ip, &attr_remote_ip, &attr_local_mac, &attr_remote_mac, &attr_transmit_errors, NULL, }; /* * Item operations and type for netconsole_target. */ static void netconsole_target_release(struct config_item *item) { kfree(to_target(item)); } static struct configfs_item_operations netconsole_target_item_ops = { .release = netconsole_target_release, }; static const struct config_item_type netconsole_target_type = { .ct_attrs = netconsole_target_attrs, .ct_item_ops = &netconsole_target_item_ops, .ct_owner = THIS_MODULE, }; static void init_target_config_group(struct netconsole_target *nt, const char *name) { config_group_init_type_name(&nt->group, name, &netconsole_target_type); config_group_init_type_name(&nt->userdata_group, "userdata", &userdata_type); configfs_add_default_group(&nt->userdata_group, &nt->group); } static struct netconsole_target *find_cmdline_target(const char *name) { struct netconsole_target *nt, *ret = NULL; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (!strcmp(nt->group.cg_item.ci_name, name)) { ret = nt; break; } } spin_unlock_irqrestore(&target_list_lock, flags); return ret; } /* * Group operations and type for netconsole_subsys. */ static struct config_group *make_netconsole_target(struct config_group *group, const char *name) { struct netconsole_target *nt; unsigned long flags; /* Checking if a target by this name was created at boot time. If so, * attach a configfs entry to that target. This enables dynamic * control. */ if (!strncmp(name, NETCONSOLE_PARAM_TARGET_PREFIX, strlen(NETCONSOLE_PARAM_TARGET_PREFIX))) { nt = find_cmdline_target(name); if (nt) { init_target_config_group(nt, name); return &nt->group; } } nt = alloc_and_init(); if (!nt) return ERR_PTR(-ENOMEM); /* Initialize the config_group member */ init_target_config_group(nt, name); /* Adding, but it is disabled */ spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); return &nt->group; } static void drop_netconsole_target(struct config_group *group, struct config_item *item) { unsigned long flags; struct netconsole_target *nt = to_target(item); spin_lock_irqsave(&target_list_lock, flags); list_del(&nt->list); spin_unlock_irqrestore(&target_list_lock, flags); /* * The target may have never been enabled, or was manually disabled * before being removed so netpoll may have already been cleaned up. */ if (nt->enabled) netpoll_cleanup(&nt->np); config_item_put(&nt->group.cg_item); } static struct configfs_group_operations netconsole_subsys_group_ops = { .make_group = make_netconsole_target, .drop_item = drop_netconsole_target, }; static const struct config_item_type netconsole_subsys_type = { .ct_group_ops = &netconsole_subsys_group_ops, .ct_owner = THIS_MODULE, }; /* The netconsole configfs subsystem */ static struct configfs_subsystem netconsole_subsys = { .su_group = { .cg_item = { .ci_namebuf = "netconsole", .ci_type = &netconsole_subsys_type, }, }, }; static void populate_configfs_item(struct netconsole_target *nt, int cmdline_count) { char target_name[16]; snprintf(target_name, sizeof(target_name), "%s%d", NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count); init_target_config_group(nt, target_name); } static int sysdata_append_cpu_nr(struct netconsole_target *nt, int offset) { /* Append cpu=%d at extradata_complete after userdata str */ return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n", raw_smp_processor_id()); } static int sysdata_append_taskname(struct netconsole_target *nt, int offset) { return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n", current->comm); } static int sysdata_append_release(struct netconsole_target *nt, int offset) { return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " release=%s\n", init_utsname()->release); } static int sysdata_append_msgid(struct netconsole_target *nt, int offset) { wrapping_assign_add(nt->msgcounter, 1); return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " msgid=%u\n", nt->msgcounter); } /* * prepare_extradata - append sysdata at extradata_complete in runtime * @nt: target to send message to */ static int prepare_extradata(struct netconsole_target *nt) { int extradata_len; /* userdata was appended when configfs write helper was called * by update_userdata(). */ extradata_len = nt->userdata_length; if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) extradata_len += sysdata_append_cpu_nr(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_TASKNAME) extradata_len += sysdata_append_taskname(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_RELEASE) extradata_len += sysdata_append_release(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_MSGID) extradata_len += sysdata_append_msgid(nt, extradata_len); WARN_ON_ONCE(extradata_len > MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); out: return extradata_len; } #else /* CONFIG_NETCONSOLE_DYNAMIC not set */ static int prepare_extradata(struct netconsole_target *nt) { return 0; } #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Handle network interface device notifications */ static int netconsole_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { unsigned long flags; struct netconsole_target *nt, *tmp; struct net_device *dev = netdev_notifier_info_to_dev(ptr); bool stopped = false; if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER || event == NETDEV_RELEASE || event == NETDEV_JOIN)) goto done; mutex_lock(&target_cleanup_list_lock); spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry_safe(nt, tmp, &target_list, list) { netconsole_target_get(nt); if (nt->np.dev == dev) { switch (event) { case NETDEV_CHANGENAME: strscpy(nt->np.dev_name, dev->name, IFNAMSIZ); break; case NETDEV_RELEASE: case NETDEV_JOIN: case NETDEV_UNREGISTER: nt->enabled = false; list_move(&nt->list, &target_cleanup_list); stopped = true; } } netconsole_target_put(nt); } spin_unlock_irqrestore(&target_list_lock, flags); mutex_unlock(&target_cleanup_list_lock); if (stopped) { const char *msg = "had an event"; switch (event) { case NETDEV_UNREGISTER: msg = "unregistered"; break; case NETDEV_RELEASE: msg = "released slaves"; break; case NETDEV_JOIN: msg = "is joining a master device"; break; } pr_info("network logging stopped on interface %s as it %s\n", dev->name, msg); } /* Process target_cleanup_list entries. By the end, target_cleanup_list * should be empty */ netconsole_process_cleanups_core(); done: return NOTIFY_DONE; } static struct notifier_block netconsole_netdev_notifier = { .notifier_call = netconsole_netdev_event, }; /** * send_udp - Wrapper for netpoll_send_udp that counts errors * @nt: target to send message to * @msg: message to send * @len: length of message * * Calls netpoll_send_udp and classifies the return value. If an error * occurred it increments statistics in nt->stats accordingly. * Only calls netpoll_send_udp if CONFIG_NETCONSOLE_DYNAMIC is disabled. */ static void send_udp(struct netconsole_target *nt, const char *msg, int len) { int result = netpoll_send_udp(&nt->np, msg, len); if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) { if (result == NET_XMIT_DROP) { u64_stats_update_begin(&nt->stats.syncp); u64_stats_inc(&nt->stats.xmit_drop_count); u64_stats_update_end(&nt->stats.syncp); } else if (result == -ENOMEM) { u64_stats_update_begin(&nt->stats.syncp); u64_stats_inc(&nt->stats.enomem_count); u64_stats_update_end(&nt->stats.syncp); } } } static void send_msg_no_fragmentation(struct netconsole_target *nt, const char *msg, int msg_len, int release_len) { const char *extradata = NULL; const char *release; #ifdef CONFIG_NETCONSOLE_DYNAMIC extradata = nt->extradata_complete; #endif if (release_len) { release = init_utsname()->release; scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); msg_len += release_len; } else { memcpy(nt->buf, msg, msg_len); } if (extradata) msg_len += scnprintf(&nt->buf[msg_len], MAX_PRINT_CHUNK - msg_len, "%s", extradata); send_udp(nt, nt->buf, msg_len); } static void append_release(char *buf) { const char *release; release = init_utsname()->release; scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release); } static void send_fragmented_body(struct netconsole_target *nt, const char *msgbody, int header_len, int msgbody_len, int extradata_len) { int sent_extradata, preceding_bytes; const char *extradata = NULL; int body_len, offset = 0; #ifdef CONFIG_NETCONSOLE_DYNAMIC extradata = nt->extradata_complete; #endif /* body_len represents the number of bytes that will be sent. This is * bigger than MAX_PRINT_CHUNK, thus, it will be split in multiple * packets */ body_len = msgbody_len + extradata_len; /* In each iteration of the while loop below, we send a packet * containing the header and a portion of the body. The body is * composed of two parts: msgbody and extradata. We keep track of how * many bytes have been sent so far using the offset variable, which * ranges from 0 to the total length of the body. */ while (offset < body_len) { int this_header = header_len; bool msgbody_written = false; int this_offset = 0; int this_chunk = 0; this_header += scnprintf(nt->buf + this_header, MAX_PRINT_CHUNK - this_header, ",ncfrag=%d/%d;", offset, body_len); /* Not all msgbody data has been written yet */ if (offset < msgbody_len) { this_chunk = min(msgbody_len - offset, MAX_PRINT_CHUNK - this_header); if (WARN_ON_ONCE(this_chunk <= 0)) return; memcpy(nt->buf + this_header, msgbody + offset, this_chunk); this_offset += this_chunk; } /* msgbody was finally written, either in the previous * messages and/or in the current buf. Time to write * the extradata. */ msgbody_written |= offset + this_offset >= msgbody_len; /* Msg body is fully written and there is pending extradata to * write, append extradata in this chunk */ if (msgbody_written && offset + this_offset < body_len) { /* Track how much user data was already sent. First * time here, sent_userdata is zero */ sent_extradata = (offset + this_offset) - msgbody_len; /* offset of bytes used in current buf */ preceding_bytes = this_chunk + this_header; if (WARN_ON_ONCE(sent_extradata < 0)) return; this_chunk = min(extradata_len - sent_extradata, MAX_PRINT_CHUNK - preceding_bytes); if (WARN_ON_ONCE(this_chunk < 0)) /* this_chunk could be zero if all the previous * message used all the buffer. This is not a * problem, extradata will be sent in the next * iteration */ return; memcpy(nt->buf + this_header + this_offset, extradata + sent_extradata, this_chunk); this_offset += this_chunk; } send_udp(nt, nt->buf, this_header + this_offset); offset += this_offset; } } static void send_msg_fragmented(struct netconsole_target *nt, const char *msg, int msg_len, int release_len, int extradata_len) { int header_len, msgbody_len; const char *msgbody; /* need to insert extra header fields, detect header and msgbody */ msgbody = memchr(msg, ';', msg_len); if (WARN_ON_ONCE(!msgbody)) return; header_len = msgbody - msg; msgbody_len = msg_len - header_len - 1; msgbody++; /* * Transfer multiple chunks with the following extra header. * "ncfrag=<byte-offset>/<total-bytes>" */ if (release_len) append_release(nt->buf); /* Copy the header into the buffer */ memcpy(nt->buf + release_len, msg, header_len); header_len += release_len; /* for now on, the header will be persisted, and the msgbody * will be replaced */ send_fragmented_body(nt, msgbody, header_len, msgbody_len, extradata_len); } /** * send_ext_msg_udp - send extended log message to target * @nt: target to send message to * @msg: extended log message to send * @msg_len: length of message * * Transfer extended log @msg to @nt. If @msg is longer than * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with * ncfrag header field added to identify them. */ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, int msg_len) { int release_len = 0; int extradata_len; extradata_len = prepare_extradata(nt); if (nt->release) release_len = strlen(init_utsname()->release) + 1; if (msg_len + release_len + extradata_len <= MAX_PRINT_CHUNK) return send_msg_no_fragmentation(nt, msg, msg_len, release_len); return send_msg_fragmented(nt, msg, msg_len, release_len, extradata_len); } static void write_ext_msg(struct console *con, const char *msg, unsigned int len) { struct netconsole_target *nt; unsigned long flags; if ((oops_only && !oops_in_progress) || list_empty(&target_list)) return; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) if (nt->extended && nt->enabled && netif_running(nt->np.dev)) send_ext_msg_udp(nt, msg, len); spin_unlock_irqrestore(&target_list_lock, flags); } static void write_msg(struct console *con, const char *msg, unsigned int len) { int frag, left; unsigned long flags; struct netconsole_target *nt; const char *tmp; if (oops_only && !oops_in_progress) return; /* Avoid taking lock and disabling interrupts unnecessarily */ if (list_empty(&target_list)) return; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) { /* * We nest this inside the for-each-target loop above * so that we're able to get as much logging out to * at least one target if we die inside here, instead * of unnecessarily keeping all targets in lock-step. */ tmp = msg; for (left = len; left;) { frag = min(left, MAX_PRINT_CHUNK); send_udp(nt, tmp, frag); tmp += frag; left -= frag; } } } spin_unlock_irqrestore(&target_list_lock, flags); } static int netconsole_parser_cmdline(struct netpoll *np, char *opt) { bool ipversion_set = false; char *cur = opt; char *delim; int ipv6; if (*cur != '@') { delim = strchr(cur, '@'); if (!delim) goto parse_failed; *delim = 0; if (kstrtou16(cur, 10, &np->local_port)) goto parse_failed; cur = delim; } cur++; if (*cur != '/') { ipversion_set = true; delim = strchr(cur, '/'); if (!delim) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); if (ipv6 < 0) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim; } cur++; if (*cur != ',') { /* parse out dev_name or dev_mac */ delim = strchr(cur, ','); if (!delim) goto parse_failed; *delim = 0; np->dev_name[0] = '\0'; eth_broadcast_addr(np->dev_mac); if (!strchr(cur, ':')) strscpy(np->dev_name, cur, sizeof(np->dev_name)); else if (!mac_pton(cur, np->dev_mac)) goto parse_failed; cur = delim; } cur++; if (*cur != '@') { /* dst port */ delim = strchr(cur, '@'); if (!delim) goto parse_failed; *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); if (kstrtou16(cur, 10, &np->remote_port)) goto parse_failed; cur = delim; } cur++; /* dst ip */ delim = strchr(cur, '/'); if (!delim) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { /* MAC address */ if (!mac_pton(cur, np->remote_mac)) goto parse_failed; } netconsole_print_banner(np); return 0; parse_failed: np_info(np, "couldn't parse config at '%s'!\n", cur); return -1; } /* Allocate new target (from boot/module param) and setup netpoll for it */ static struct netconsole_target *alloc_param_target(char *target_config, int cmdline_count) { struct netconsole_target *nt; int err; nt = alloc_and_init(); if (!nt) { err = -ENOMEM; goto fail; } if (*target_config == '+') { nt->extended = true; target_config++; } if (*target_config == 'r') { if (!nt->extended) { pr_err("Netconsole configuration error. Release feature requires extended log message"); err = -EINVAL; goto fail; } nt->release = true; target_config++; } /* Parse parameters and setup netpoll */ err = netconsole_parser_cmdline(&nt->np, target_config); if (err) goto fail; err = netpoll_setup(&nt->np); if (err) { pr_err("Not enabling netconsole for %s%d. Netpoll setup failed\n", NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count); if (!IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) /* only fail if dynamic reconfiguration is set, * otherwise, keep the target in the list, but disabled. */ goto fail; } else { nt->enabled = true; } populate_configfs_item(nt, cmdline_count); return nt; fail: kfree(nt); return ERR_PTR(err); } /* Cleanup netpoll for given target (from boot/module param) and free it */ static void free_param_target(struct netconsole_target *nt) { netpoll_cleanup(&nt->np); kfree(nt); } static struct console netconsole_ext = { .name = "netcon_ext", .flags = CON_ENABLED | CON_EXTENDED, .write = write_ext_msg, }; static struct console netconsole = { .name = "netcon", .flags = CON_ENABLED, .write = write_msg, }; static int __init init_netconsole(void) { int err; struct netconsole_target *nt, *tmp; u32 console_type_needed = 0; unsigned int count = 0; unsigned long flags; char *target_config; char *input = config; if (strnlen(input, MAX_PARAM_LENGTH)) { while ((target_config = strsep(&input, ";"))) { nt = alloc_param_target(target_config, count); if (IS_ERR(nt)) { if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) continue; err = PTR_ERR(nt); goto fail; } /* Dump existing printks when we register */ if (nt->extended) { console_type_needed |= CONS_EXTENDED; netconsole_ext.flags |= CON_PRINTBUFFER; } else { console_type_needed |= CONS_BASIC; netconsole.flags |= CON_PRINTBUFFER; } spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); count++; } } err = register_netdevice_notifier(&netconsole_netdev_notifier); if (err) goto fail; err = dynamic_netconsole_init(); if (err) goto undonotifier; if (console_type_needed & CONS_EXTENDED) register_console(&netconsole_ext); if (console_type_needed & CONS_BASIC) register_console(&netconsole); pr_info("network logging started\n"); return err; undonotifier: unregister_netdevice_notifier(&netconsole_netdev_notifier); fail: pr_err("cleaning up\n"); /* * Remove all targets and destroy them (only targets created * from the boot/module option exist here). Skipping the list * lock is safe here, and netpoll_cleanup() will sleep. */ list_for_each_entry_safe(nt, tmp, &target_list, list) { list_del(&nt->list); free_param_target(nt); } return err; } static void __exit cleanup_netconsole(void) { struct netconsole_target *nt, *tmp; if (console_is_registered(&netconsole_ext)) unregister_console(&netconsole_ext); if (console_is_registered(&netconsole)) unregister_console(&netconsole); dynamic_netconsole_exit(); unregister_netdevice_notifier(&netconsole_netdev_notifier); /* * Targets created via configfs pin references on our module * and would first be rmdir(2)'ed from userspace. We reach * here only when they are already destroyed, and only those * created from the boot/module option are left, so remove and * destroy them. Skipping the list lock is safe here, and * netpoll_cleanup() will sleep. */ list_for_each_entry_safe(nt, tmp, &target_list, list) { list_del(&nt->list); free_param_target(nt); } } /* * Use late_initcall to ensure netconsole is * initialized after network device driver if built-in. * * late_initcall() and module_init() are identical if built as module. */ late_initcall(init_netconsole); module_exit(cleanup_netconsole);
2 6 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KSTRTOX_H #define _LINUX_KSTRTOX_H #include <linux/compiler.h> #include <linux/types.h> /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); int __must_check kstrtoll(const char *s, unsigned int base, long long *res); /** * kstrtoul - convert a string to an unsigned long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign, but not a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. */ if (sizeof(unsigned long) == sizeof(unsigned long long) && __alignof__(unsigned long) == __alignof__(unsigned long long)) return kstrtoull(s, base, (unsigned long long *)res); else return _kstrtoul(s, base, res); } /** * kstrtol - convert a string to a long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign or a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(long, long long) = 0. */ if (sizeof(long) == sizeof(long long) && __alignof__(long) == __alignof__(long long)) return kstrtoll(s, base, (long long *)res); else return _kstrtol(s, base, res); } int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); int __must_check kstrtoint(const char *s, unsigned int base, int *res); static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) { return kstrtoull(s, base, res); } static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) { return kstrtoll(s, base, res); } static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) { return kstrtouint(s, base, res); } static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) { return kstrtoint(s, base, res); } int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { return kstrtoull_from_user(s, count, base, res); } static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) { return kstrtoll_from_user(s, count, base, res); } static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) { return kstrtouint_from_user(s, count, base, res); } static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) { return kstrtoint_from_user(s, count, base, res); } /* * Use kstrto<foo> instead. * * NOTE: simple_strto<foo> does not check for the range overflow and, * depending on the input, may give interesting results. * * Use these functions if and only if you cannot use kstrto<foo>, because * the conversion ends on the first non-digit character, which may be far * beyond the supported range. It might be useful to parse the strings like * 10x50 or 12:21 without altering original string or temporary buffer in use. * Keep in mind above caveat. */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); #endif /* _LINUX_KSTRTOX_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 // SPDX-License-Identifier: GPL-2.0-only /* * VFIO core * * Copyright (C) 2012 Red Hat, Inc. All rights reserved. * Author: Alex Williamson <alex.williamson@redhat.com> * * Derived from original vfio: * Copyright 2010 Cisco Systems, Inc. All rights reserved. * Author: Tom Lyon, pugs@cisco.com */ #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/idr.h> #include <linux/iommu.h> #if IS_ENABLED(CONFIG_KVM) #include <linux/kvm_host.h> #endif #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/mutex.h> #include <linux/pci.h> #include <linux/pseudo_fs.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> #include <linux/pm_runtime.h> #include <linux/interval_tree.h> #include <linux/iova_bitmap.h> #include <linux/iommufd.h> #include "vfio.h" #define DRIVER_VERSION "0.3" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO - User Level meta-driver" #define VFIO_MAGIC 0x5646494f /* "VFIO" */ static struct vfio { struct class *device_class; struct ida device_ida; struct vfsmount *vfs_mount; int fs_count; } vfio; #ifdef CONFIG_VFIO_NOIOMMU bool vfio_noiommu __read_mostly; module_param_named(enable_unsafe_noiommu_mode, vfio_noiommu, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); #endif static DEFINE_XARRAY(vfio_device_set_xa); int vfio_assign_device_set(struct vfio_device *device, void *set_id) { unsigned long idx = (unsigned long)set_id; struct vfio_device_set *new_dev_set; struct vfio_device_set *dev_set; if (WARN_ON(!set_id)) return -EINVAL; /* * Atomically acquire a singleton object in the xarray for this set_id */ xa_lock(&vfio_device_set_xa); dev_set = xa_load(&vfio_device_set_xa, idx); if (dev_set) goto found_get_ref; xa_unlock(&vfio_device_set_xa); new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); if (!new_dev_set) return -ENOMEM; mutex_init(&new_dev_set->lock); INIT_LIST_HEAD(&new_dev_set->device_list); new_dev_set->set_id = set_id; xa_lock(&vfio_device_set_xa); dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, GFP_KERNEL); if (!dev_set) { dev_set = new_dev_set; goto found_get_ref; } kfree(new_dev_set); if (xa_is_err(dev_set)) { xa_unlock(&vfio_device_set_xa); return xa_err(dev_set); } found_get_ref: dev_set->device_count++; xa_unlock(&vfio_device_set_xa); mutex_lock(&dev_set->lock); device->dev_set = dev_set; list_add_tail(&device->dev_set_list, &dev_set->device_list); mutex_unlock(&dev_set->lock); return 0; } EXPORT_SYMBOL_GPL(vfio_assign_device_set); static void vfio_release_device_set(struct vfio_device *device) { struct vfio_device_set *dev_set = device->dev_set; if (!dev_set) return; mutex_lock(&dev_set->lock); list_del(&device->dev_set_list); mutex_unlock(&dev_set->lock); xa_lock(&vfio_device_set_xa); if (!--dev_set->device_count) { __xa_erase(&vfio_device_set_xa, (unsigned long)dev_set->set_id); mutex_destroy(&dev_set->lock); kfree(dev_set); } xa_unlock(&vfio_device_set_xa); } unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) { struct vfio_device *cur; unsigned int open_count = 0; lockdep_assert_held(&dev_set->lock); list_for_each_entry(cur, &dev_set->device_list, dev_set_list) open_count += cur->open_count; return open_count; } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); struct vfio_device * vfio_find_device_in_devset(struct vfio_device_set *dev_set, struct device *dev) { struct vfio_device *cur; lockdep_assert_held(&dev_set->lock); list_for_each_entry(cur, &dev_set->device_list, dev_set_list) if (cur->dev == dev) return cur; return NULL; } EXPORT_SYMBOL_GPL(vfio_find_device_in_devset); /* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ void vfio_device_put_registration(struct vfio_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } bool vfio_device_try_get_registration(struct vfio_device *device) { return refcount_inc_not_zero(&device->refcount); } /* * VFIO driver API */ /* Release helper called by vfio_put_device() */ static void vfio_device_release(struct device *dev) { struct vfio_device *device = container_of(dev, struct vfio_device, device); vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); if (device->ops->release) device->ops->release(device); iput(device->inode); simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); kvfree(device); } static int vfio_init_device(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); /* * Allocate and initialize vfio_device so it can be registered to vfio * core. * * Drivers should use the wrapper vfio_alloc_device() for allocation. * @size is the size of the structure to be allocated, including any * private data used by the driver. * * Driver may provide an @init callback to cover device private data. * * Use vfio_put_device() to release the structure after success return. */ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, const struct vfio_device_ops *ops) { struct vfio_device *device; int ret; if (WARN_ON(size < sizeof(struct vfio_device))) return ERR_PTR(-EINVAL); device = kvzalloc(size, GFP_KERNEL); if (!device) return ERR_PTR(-ENOMEM); ret = vfio_init_device(device, dev, ops); if (ret) goto out_free; return device; out_free: kvfree(device); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(_vfio_alloc_device); static int vfio_fs_init_fs_context(struct fs_context *fc) { return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; } static struct file_system_type vfio_fs_type = { .name = "vfio", .owner = THIS_MODULE, .init_fs_context = vfio_fs_init_fs_context, .kill_sb = kill_anon_super, }; static struct inode *vfio_fs_inode_new(void) { struct inode *inode; int ret; ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count); if (ret) return ERR_PTR(ret); inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb); if (IS_ERR(inode)) simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); return inode; } /* * Initialize a vfio_device so it can be registered to vfio core. */ static int vfio_init_device(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops) { int ret; ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); if (ret < 0) { dev_dbg(dev, "Error to alloc index\n"); return ret; } device->index = ret; init_completion(&device->comp); device->dev = dev; device->ops = ops; device->inode = vfio_fs_inode_new(); if (IS_ERR(device->inode)) { ret = PTR_ERR(device->inode); goto out_inode; } if (ops->init) { ret = ops->init(device); if (ret) goto out_uninit; } device_initialize(&device->device); device->device.release = vfio_device_release; device->device.class = vfio.device_class; device->device.parent = device->dev; return 0; out_uninit: iput(device->inode); simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); out_inode: vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); return ret; } static int __vfio_register_dev(struct vfio_device *device, enum vfio_group_type type) { int ret; if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && (!device->ops->bind_iommufd || !device->ops->unbind_iommufd || !device->ops->attach_ioas || !device->ops->detach_ioas))) return -EINVAL; /* * If the driver doesn't specify a set then the device is added to a * singleton set just for itself. */ if (!device->dev_set) vfio_assign_device_set(device, device); ret = dev_set_name(&device->device, "vfio%d", device->index); if (ret) return ret; ret = vfio_device_set_group(device, type); if (ret) return ret; /* * VFIO always sets IOMMU_CACHE because we offer no way for userspace to * restore cache coherency. It has to be checked here because it is only * valid for cases where we are using iommu groups. */ if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) && !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) { ret = -EINVAL; goto err_out; } ret = vfio_device_add(device); if (ret) goto err_out; /* Refcounting can't start until the driver calls register */ refcount_set(&device->refcount, 1); vfio_device_group_register(device); vfio_device_debugfs_init(device); return 0; err_out: vfio_device_remove_group(device); return ret; } int vfio_register_group_dev(struct vfio_device *device) { return __vfio_register_dev(device, VFIO_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_group_dev); /* * Register a virtual device without IOMMU backing. The user of this * device must not be able to directly trigger unmediated DMA. */ int vfio_register_emulated_iommu_dev(struct vfio_device *device) { return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); /* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ void vfio_unregister_group_dev(struct vfio_device *device) { unsigned int i = 0; bool interrupted = false; long rc; /* * Prevent new device opened by userspace via the * VFIO_GROUP_GET_DEVICE_FD in the group path. */ vfio_device_group_unregister(device); /* * Balances vfio_device_add() in register path, also prevents * new device opened by userspace in the cdev path. */ vfio_device_del(device); vfio_device_put_registration(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { if (device->ops->request) device->ops->request(device, i++); if (interrupted) { rc = wait_for_completion_timeout(&device->comp, HZ * 10); } else { rc = wait_for_completion_interruptible_timeout( &device->comp, HZ * 10); if (rc < 0) { interrupted = true; dev_warn(device->dev, "Device is currently in use, task" " \"%s\" (%d) " "blocked until device is released", current->comm, task_pid_nr(current)); } } } vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); #if IS_ENABLED(CONFIG_KVM) void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) { void (*pfn)(struct kvm *kvm); bool (*fn)(struct kvm *kvm); bool ret; lockdep_assert_held(&device->dev_set->lock); if (!kvm) return; pfn = symbol_get(kvm_put_kvm); if (WARN_ON(!pfn)) return; fn = symbol_get(kvm_get_kvm_safe); if (WARN_ON(!fn)) { symbol_put(kvm_put_kvm); return; } ret = fn(kvm); symbol_put(kvm_get_kvm_safe); if (!ret) { symbol_put(kvm_put_kvm); return; } device->put_kvm = pfn; device->kvm = kvm; } void vfio_device_put_kvm(struct vfio_device *device) { lockdep_assert_held(&device->dev_set->lock); if (!device->kvm) return; if (WARN_ON(!device->put_kvm)) goto clear; device->put_kvm(device->kvm); device->put_kvm = NULL; symbol_put(kvm_put_kvm); clear: device->kvm = NULL; } #endif /* true if the vfio_device has open_device() called but not close_device() */ static bool vfio_assert_device_open(struct vfio_device *device) { return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } struct vfio_device_file * vfio_allocate_device_file(struct vfio_device *device) { struct vfio_device_file *df; df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT); if (!df) return ERR_PTR(-ENOMEM); df->device = device; spin_lock_init(&df->kvm_ref_lock); return df; } static int vfio_df_device_first_open(struct vfio_device_file *df) { struct vfio_device *device = df->device; struct iommufd_ctx *iommufd = df->iommufd; int ret; lockdep_assert_held(&device->dev_set->lock); if (!try_module_get(device->dev->driver->owner)) return -ENODEV; if (iommufd) ret = vfio_df_iommufd_bind(df); else ret = vfio_device_group_use_iommu(device); if (ret) goto err_module_put; if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) goto err_unuse_iommu; } return 0; err_unuse_iommu: if (iommufd) vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); err_module_put: module_put(device->dev->driver->owner); return ret; } static void vfio_df_device_last_close(struct vfio_device_file *df) { struct vfio_device *device = df->device; struct iommufd_ctx *iommufd = df->iommufd; lockdep_assert_held(&device->dev_set->lock); if (device->ops->close_device) device->ops->close_device(device); if (iommufd) vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); module_put(device->dev->driver->owner); } int vfio_df_open(struct vfio_device_file *df) { struct vfio_device *device = df->device; int ret = 0; lockdep_assert_held(&device->dev_set->lock); /* * Only the group path allows the device to be opened multiple * times. The device cdev path doesn't have a secure way for it. */ if (device->open_count != 0 && !df->group) return -EINVAL; device->open_count++; if (device->open_count == 1) { ret = vfio_df_device_first_open(df); if (ret) device->open_count--; } return ret; } void vfio_df_close(struct vfio_device_file *df) { struct vfio_device *device = df->device; lockdep_assert_held(&device->dev_set->lock); if (!vfio_assert_device_open(device)) return; if (device->open_count == 1) vfio_df_device_last_close(df); device->open_count--; } /* * Wrapper around pm_runtime_resume_and_get(). * Return error code on failure or 0 on success. */ static inline int vfio_device_pm_runtime_get(struct vfio_device *device) { struct device *dev = device->dev; if (dev->driver && dev->driver->pm) { int ret; ret = pm_runtime_resume_and_get(dev); if (ret) { dev_info_ratelimited(dev, "vfio: runtime resume failed %d\n", ret); return -EIO; } } return 0; } /* * Wrapper around pm_runtime_put(). */ static inline void vfio_device_pm_runtime_put(struct vfio_device *device) { struct device *dev = device->dev; if (dev->driver && dev->driver->pm) pm_runtime_put(dev); } /* * VFIO Device fd */ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; if (df->group) vfio_df_group_close(df); else vfio_df_unbind_iommufd(df); vfio_device_put_registration(device); kfree(df); return 0; } /* * vfio_mig_get_next_state - Compute the next step in the FSM * @cur_fsm - The current state the device is in * @new_fsm - The target state to reach * @next_fsm - Pointer to the next step to get to new_fsm * * Return 0 upon success, otherwise -errno * Upon success the next step in the state progression between cur_fsm and * new_fsm will be set in next_fsm. * * This breaks down requests for combination transitions into smaller steps and * returns the next step to get to new_fsm. The function may need to be called * multiple times before reaching new_fsm. * */ int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm) { enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; /* * The coding in this table requires the driver to implement the * following FSM arcs: * RESUMING -> STOP * STOP -> RESUMING * STOP -> STOP_COPY * STOP_COPY -> STOP * * If P2P is supported then the driver must also implement these FSM * arcs: * RUNNING -> RUNNING_P2P * RUNNING_P2P -> RUNNING * RUNNING_P2P -> STOP * STOP -> RUNNING_P2P * * If precopy is supported then the driver must support these additional * FSM arcs: * RUNNING -> PRE_COPY * PRE_COPY -> RUNNING * PRE_COPY -> STOP_COPY * However, if precopy and P2P are supported together then the driver * must support these additional arcs beyond the P2P arcs above: * PRE_COPY -> RUNNING * PRE_COPY -> PRE_COPY_P2P * PRE_COPY_P2P -> PRE_COPY * PRE_COPY_P2P -> RUNNING_P2P * PRE_COPY_P2P -> STOP_COPY * RUNNING -> PRE_COPY * RUNNING_P2P -> PRE_COPY_P2P * * Without P2P and precopy the driver must implement: * RUNNING -> STOP * STOP -> RUNNING * * The coding will step through multiple states for some combination * transitions; if all optional features are supported, this means the * following ones: * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY * PRE_COPY -> RUNNING -> RUNNING_P2P * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING * PRE_COPY_P2P -> RUNNING_P2P -> STOP * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING * RESUMING -> STOP -> RUNNING_P2P * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P * RESUMING -> STOP -> RUNNING_P2P -> RUNNING * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * RESUMING -> STOP -> STOP_COPY * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P * RUNNING -> RUNNING_P2P -> STOP * RUNNING -> RUNNING_P2P -> STOP -> RESUMING * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY * RUNNING_P2P -> RUNNING -> PRE_COPY * RUNNING_P2P -> STOP -> RESUMING * RUNNING_P2P -> STOP -> STOP_COPY * STOP -> RUNNING_P2P -> PRE_COPY_P2P * STOP -> RUNNING_P2P -> RUNNING * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * STOP_COPY -> STOP -> RESUMING * STOP_COPY -> STOP -> RUNNING_P2P * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING * * The following transitions are blocked: * STOP_COPY -> PRE_COPY * STOP_COPY -> PRE_COPY_P2P */ static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RUNNING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_PRE_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_STOP_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RESUMING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RUNNING_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_ERROR] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, }; static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, [VFIO_DEVICE_STATE_ERROR] = ~0U, }; if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || (state_flags_table[cur_fsm] & device->migration_flags) != state_flags_table[cur_fsm])) return -EINVAL; if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || (state_flags_table[new_fsm] & device->migration_flags) != state_flags_table[new_fsm]) return -EINVAL; /* * Arcs touching optional and unsupported states are skipped over. The * driver will instead see an arc from the original state to the next * logical state, as per the above comment. */ *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; while ((state_flags_table[*next_fsm] & device->migration_flags) != state_flags_table[*next_fsm]) *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; } EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); /* * Convert the drivers's struct file into a FD number and return it to userspace */ static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, struct vfio_device_feature_mig_state *mig) { int ret; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { ret = fd; goto out_fput; } mig->data_fd = fd; if (copy_to_user(arg, mig, sizeof(*mig))) { ret = -EFAULT; goto out_put_unused; } fd_install(fd, filp); return 0; out_put_unused: put_unused_fd(fd); out_fput: fput(filp); return ret; } static int vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_mig_state, data_fd); struct vfio_device_feature_mig_state mig; struct file *filp = NULL; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_GET, sizeof(mig)); if (ret != 1) return ret; if (copy_from_user(&mig, arg, minsz)) return -EFAULT; if (flags & VFIO_DEVICE_FEATURE_GET) { enum vfio_device_mig_state curr_state; ret = device->mig_ops->migration_get_state(device, &curr_state); if (ret) return ret; mig.device_state = curr_state; goto out_copy; } /* Handle the VFIO_DEVICE_FEATURE_SET */ filp = device->mig_ops->migration_set_state(device, mig.device_state); if (IS_ERR(filp) || !filp) goto out_copy; return vfio_ioct_mig_return_fd(filp, arg, &mig); out_copy: mig.data_fd = -1; if (copy_to_user(arg, &mig, sizeof(mig))) return -EFAULT; if (IS_ERR(filp)) return PTR_ERR(filp); return 0; } static int vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { struct vfio_device_feature_mig_data_size data_size = {}; unsigned long stop_copy_length; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(data_size)); if (ret != 1) return ret; ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); if (ret) return ret; data_size.stop_copy_length = stop_copy_length; if (copy_to_user(arg, &data_size, sizeof(data_size))) return -EFAULT; return 0; } static int vfio_ioctl_device_feature_migration(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { struct vfio_device_feature_migration mig = { .flags = device->migration_flags, }; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(mig)); if (ret != 1) return ret; if (copy_to_user(arg, &mig, sizeof(mig))) return -EFAULT; return 0; } void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, u32 req_nodes) { struct interval_tree_node *prev, *curr, *comb_start, *comb_end; unsigned long min_gap, curr_gap; /* Special shortcut when a single range is required */ if (req_nodes == 1) { unsigned long last; comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); /* Empty list */ if (WARN_ON_ONCE(!comb_start)) return; curr = comb_start; while (curr) { last = curr->last; prev = curr; curr = interval_tree_iter_next(curr, 0, ULONG_MAX); if (prev != comb_start) interval_tree_remove(prev, root); } comb_start->last = last; return; } /* Combine ranges which have the smallest gap */ while (cur_nodes > req_nodes) { prev = NULL; min_gap = ULONG_MAX; curr = interval_tree_iter_first(root, 0, ULONG_MAX); while (curr) { if (prev) { curr_gap = curr->start - prev->last; if (curr_gap < min_gap) { min_gap = curr_gap; comb_start = prev; comb_end = curr; } } prev = curr; curr = interval_tree_iter_next(curr, 0, ULONG_MAX); } /* Empty list or no nodes to combine */ if (WARN_ON_ONCE(min_gap == ULONG_MAX)) break; comb_start->last = comb_end->last; interval_tree_remove(comb_end, root); cur_nodes--; } } EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges); /* Ranges should fit into a single kernel page */ #define LOG_MAX_RANGES \ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) static int vfio_ioctl_device_feature_logging_start(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_dma_logging_control, ranges); struct vfio_device_feature_dma_logging_range __user *ranges; struct vfio_device_feature_dma_logging_control control; struct vfio_device_feature_dma_logging_range range; struct rb_root_cached root = RB_ROOT_CACHED; struct interval_tree_node *nodes; u64 iova_end; u32 nnodes; int i, ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, sizeof(control)); if (ret != 1) return ret; if (copy_from_user(&control, arg, minsz)) return -EFAULT; nnodes = control.num_ranges; if (!nnodes) return -EINVAL; if (nnodes > LOG_MAX_RANGES) return -E2BIG; ranges = u64_to_user_ptr(control.ranges); nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), GFP_KERNEL); if (!nodes) return -ENOMEM; for (i = 0; i < nnodes; i++) { if (copy_from_user(&range, &ranges[i], sizeof(range))) { ret = -EFAULT; goto end; } if (!IS_ALIGNED(range.iova, control.page_size) || !IS_ALIGNED(range.length, control.page_size)) { ret = -EINVAL; goto end; } if (check_add_overflow(range.iova, range.length, &iova_end) || iova_end > ULONG_MAX) { ret = -EOVERFLOW; goto end; } nodes[i].start = range.iova; nodes[i].last = range.iova + range.length - 1; if (interval_tree_iter_first(&root, nodes[i].start, nodes[i].last)) { /* Range overlapping */ ret = -EINVAL; goto end; } interval_tree_insert(nodes + i, &root); } ret = device->log_ops->log_start(device, &root, nnodes, &control.page_size); if (ret) goto end; if (copy_to_user(arg, &control, sizeof(control))) { ret = -EFAULT; device->log_ops->log_stop(device); } end: kfree(nodes); return ret; } static int vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { int ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); if (ret != 1) return ret; return device->log_ops->log_stop(device); } static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, unsigned long iova, size_t length, void *opaque) { struct vfio_device *device = opaque; return device->log_ops->log_read_and_clear(device, iova, length, iter); } static int vfio_ioctl_device_feature_logging_report(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_dma_logging_report, bitmap); struct vfio_device_feature_dma_logging_report report; struct iova_bitmap *iter; u64 iova_end; int ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(report)); if (ret != 1) return ret; if (copy_from_user(&report, arg, minsz)) return -EFAULT; if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) return -EINVAL; if (check_add_overflow(report.iova, report.length, &iova_end) || iova_end > ULONG_MAX) return -EOVERFLOW; iter = iova_bitmap_alloc(report.iova, report.length, report.page_size, u64_to_user_ptr(report.bitmap)); if (IS_ERR(iter)) return PTR_ERR(iter); ret = iova_bitmap_for_each(iter, device, vfio_device_log_read_and_clear); iova_bitmap_free(iter); return ret; } static int vfio_ioctl_device_feature(struct vfio_device *device, struct vfio_device_feature __user *arg) { size_t minsz = offsetofend(struct vfio_device_feature, flags); struct vfio_device_feature feature; if (copy_from_user(&feature, arg, minsz)) return -EFAULT; if (feature.argsz < minsz) return -EINVAL; /* Check unknown flags */ if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) return -EINVAL; /* GET & SET are mutually exclusive except with PROBE */ if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && (feature.flags & VFIO_DEVICE_FEATURE_SET) && (feature.flags & VFIO_DEVICE_FEATURE_GET)) return -EINVAL; switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { case VFIO_DEVICE_FEATURE_MIGRATION: return vfio_ioctl_device_feature_migration( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: return vfio_ioctl_device_feature_mig_device_state( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: return vfio_ioctl_device_feature_logging_start( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: return vfio_ioctl_device_feature_logging_stop( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: return vfio_ioctl_device_feature_logging_report( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: return vfio_ioctl_device_feature_migration_data_size( device, feature.flags, arg->data, feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) return -ENOTTY; return device->ops->device_feature(device, feature.flags, arg->data, feature.argsz - minsz); } } static long vfio_device_fops_unl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; void __user *uptr = (void __user *)arg; int ret; if (cmd == VFIO_DEVICE_BIND_IOMMUFD) return vfio_df_ioctl_bind_iommufd(df, uptr); /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; ret = vfio_device_pm_runtime_get(device); if (ret) return ret; /* cdev only ioctls */ if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) { switch (cmd) { case VFIO_DEVICE_ATTACH_IOMMUFD_PT: ret = vfio_df_ioctl_attach_pt(df, uptr); goto out; case VFIO_DEVICE_DETACH_IOMMUFD_PT: ret = vfio_df_ioctl_detach_pt(df, uptr); goto out; } } switch (cmd) { case VFIO_DEVICE_FEATURE: ret = vfio_ioctl_device_feature(device, uptr); break; default: if (unlikely(!device->ops->ioctl)) ret = -EINVAL; else ret = device->ops->ioctl(device, cmd, arg); break; } out: vfio_device_pm_runtime_put(device); return ret; } static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->read)) return -EINVAL; return device->ops->read(device, buf, count, ppos); } static ssize_t vfio_device_fops_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->write)) return -EINVAL; return device->ops->write(device, buf, count, ppos); } static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->mmap)) return -EINVAL; return device->ops->mmap(device, vma); } #ifdef CONFIG_PROC_FS static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep) { char *path; struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; path = kobject_get_path(&device->dev->kobj, GFP_KERNEL); if (!path) return; seq_printf(m, "vfio-device-syspath: /sys%s\n", path); kfree(path); } #endif const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .open = vfio_device_fops_cdev_open, .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, .unlocked_ioctl = vfio_device_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, #ifdef CONFIG_PROC_FS .show_fdinfo = vfio_device_show_fdinfo, #endif }; static struct vfio_device *vfio_device_from_file(struct file *file) { struct vfio_device_file *df = file->private_data; if (file->f_op != &vfio_device_fops) return NULL; return df->device; } /** * vfio_file_is_valid - True if the file is valid vfio file * @file: VFIO group file or VFIO device file */ bool vfio_file_is_valid(struct file *file) { return vfio_group_from_file(file) || vfio_device_from_file(file); } EXPORT_SYMBOL_GPL(vfio_file_is_valid); /** * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file * is always CPU cache coherent * @file: VFIO group file or VFIO device file * * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop * bit in DMA transactions. A return of false indicates that the user has * rights to access additional instructions such as wbinvd on x86. */ bool vfio_file_enforced_coherent(struct file *file) { struct vfio_device *device; struct vfio_group *group; group = vfio_group_from_file(file); if (group) return vfio_group_enforced_coherent(group); device = vfio_device_from_file(file); if (device) return device_iommu_capable(device->dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); return true; } EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm) { struct vfio_device_file *df = file->private_data; /* * The kvm is first recorded in the vfio_device_file, and will * be propagated to vfio_device::kvm when the file is bound to * iommufd successfully in the vfio device cdev path. */ spin_lock(&df->kvm_ref_lock); df->kvm = kvm; spin_unlock(&df->kvm_ref_lock); } /** * vfio_file_set_kvm - Link a kvm with VFIO drivers * @file: VFIO group file or VFIO device file * @kvm: KVM to link * * When a VFIO device is first opened the KVM will be available in * device->kvm if one was associated with the file. */ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) { struct vfio_group *group; group = vfio_group_from_file(file); if (group) vfio_group_set_kvm(group, kvm); if (vfio_device_from_file(file)) vfio_device_file_set_kvm(file, kvm); } EXPORT_SYMBOL_GPL(vfio_file_set_kvm); /* * Sub-module support */ /* * Helper for managing a buffer of info chain capabilities, allocate or * reallocate a buffer with additional @size, filling in @id and @version * of the capability. A pointer to the new capability is returned. * * NB. The chain is based at the head of the buffer, so new entries are * added to the tail, vfio_info_cap_shift() should be called to fixup the * next offsets prior to copying to the user buffer. */ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, size_t size, u16 id, u16 version) { void *buf; struct vfio_info_cap_header *header, *tmp; /* Ensure that the next capability struct will be aligned */ size = ALIGN(size, sizeof(u64)); buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); if (!buf) { kfree(caps->buf); caps->buf = NULL; caps->size = 0; return ERR_PTR(-ENOMEM); } caps->buf = buf; header = buf + caps->size; /* Eventually copied to user buffer, zero */ memset(header, 0, size); header->id = id; header->version = version; /* Add to the end of the capability chain */ for (tmp = buf; tmp->next; tmp = buf + tmp->next) ; /* nothing */ tmp->next = caps->size; caps->size += size; return header; } EXPORT_SYMBOL_GPL(vfio_info_cap_add); void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) { struct vfio_info_cap_header *tmp; void *buf = (void *)caps->buf; /* Capability structs should start with proper alignment */ WARN_ON(!IS_ALIGNED(offset, sizeof(u64))); for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) tmp->next += offset; } EXPORT_SYMBOL(vfio_info_cap_shift); int vfio_info_add_capability(struct vfio_info_cap *caps, struct vfio_info_cap_header *cap, size_t size) { struct vfio_info_cap_header *header; header = vfio_info_cap_add(caps, size, cap->id, cap->version); if (IS_ERR(header)) return PTR_ERR(header); memcpy(header + 1, cap + 1, size - sizeof(*header)); return 0; } EXPORT_SYMBOL(vfio_info_add_capability); int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, int max_irq_type, size_t *data_size) { unsigned long minsz; size_t size; minsz = offsetofend(struct vfio_irq_set, count); if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || (hdr->count >= (U32_MAX - hdr->start)) || (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | VFIO_IRQ_SET_ACTION_TYPE_MASK))) return -EINVAL; if (data_size) *data_size = 0; if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) return -EINVAL; switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { case VFIO_IRQ_SET_DATA_NONE: size = 0; break; case VFIO_IRQ_SET_DATA_BOOL: size = sizeof(uint8_t); break; case VFIO_IRQ_SET_DATA_EVENTFD: size = sizeof(int32_t); break; default: return -EINVAL; } if (size) { if (hdr->argsz - minsz < hdr->count * size) return -EINVAL; if (!data_size) return -EINVAL; *data_size = hdr->count * size; } return 0; } EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* * Pin contiguous user pages and return their associated host pages for local * domain only. * @device [in] : device * @iova [in] : starting IOVA of user pages to be pinned. * @npage [in] : count of pages to be pinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags * @pages[out] : array of host pages * Return error or number of pages pinned. * * A driver may only call this function if the vfio_device was created * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). */ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, struct page **pages) { /* group->container cannot change while a vfio device is open */ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) return -EINVAL; if (!device->ops->dma_unmap) return -EINVAL; if (vfio_device_has_container(device)) return vfio_device_container_pin_pages(device, iova, npage, prot, pages); if (device->iommufd_access) { int ret; if (iova > ULONG_MAX) return -EINVAL; /* * VFIO ignores the sub page offset, npages is from the start of * a PAGE_SIZE chunk of IOVA. The caller is expected to recover * the sub page offset by doing: * pages[0] + (iova % PAGE_SIZE) */ ret = iommufd_access_pin_pages( device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), npage * PAGE_SIZE, pages, (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); if (ret) return ret; return npage; } return -EINVAL; } EXPORT_SYMBOL(vfio_pin_pages); /* * Unpin contiguous host pages for local domain only. * @device [in] : device * @iova [in] : starting address of user pages to be unpinned. * @npage [in] : count of pages to be unpinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. */ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) { if (WARN_ON(!vfio_assert_device_open(device))) return; if (WARN_ON(!device->ops->dma_unmap)) return; if (vfio_device_has_container(device)) { vfio_device_container_unpin_pages(device, iova, npage); return; } if (device->iommufd_access) { if (WARN_ON(iova > ULONG_MAX)) return; iommufd_access_unpin_pages(device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), npage * PAGE_SIZE); return; } } EXPORT_SYMBOL(vfio_unpin_pages); /* * This interface allows the CPUs to perform some sort of virtual DMA on * behalf of the device. * * CPUs read/write from/into a range of IOVAs pointing to user space memory * into/from a kernel buffer. * * As the read/write of user space memory is conducted via the CPUs and is * not a real device DMA, it is not necessary to pin the user space memory. * * @device [in] : VFIO device * @iova [in] : base IOVA of a user space buffer * @data [in] : pointer to kernel buffer * @len [in] : kernel buffer length * @write : indicate read or write * Return error code on failure or 0 on success. */ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write) { if (!data || len <= 0 || !vfio_assert_device_open(device)) return -EINVAL; if (vfio_device_has_container(device)) return vfio_device_container_dma_rw(device, iova, data, len, write); if (device->iommufd_access) { unsigned int flags = 0; if (iova > ULONG_MAX) return -EINVAL; /* VFIO historically tries to auto-detect a kthread */ if (!current->mm) flags |= IOMMUFD_ACCESS_RW_KTHREAD; if (write) flags |= IOMMUFD_ACCESS_RW_WRITE; return iommufd_access_rw(device->iommufd_access, iova, data, len, flags); } return -EINVAL; } EXPORT_SYMBOL(vfio_dma_rw); /* * Module/class support */ static int __init vfio_init(void) { int ret; ida_init(&vfio.device_ida); ret = vfio_group_init(); if (ret) return ret; ret = vfio_virqfd_init(); if (ret) goto err_virqfd; /* /sys/class/vfio-dev/vfioX */ vfio.device_class = class_create("vfio-dev"); if (IS_ERR(vfio.device_class)) { ret = PTR_ERR(vfio.device_class); goto err_dev_class; } ret = vfio_cdev_init(vfio.device_class); if (ret) goto err_alloc_dev_chrdev; vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; err_alloc_dev_chrdev: class_destroy(vfio.device_class); vfio.device_class = NULL; err_dev_class: vfio_virqfd_exit(); err_virqfd: vfio_group_cleanup(); return ret; } static void __exit vfio_cleanup(void) { vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); vfio_cdev_cleanup(); class_destroy(vfio.device_class); vfio.device_class = NULL; vfio_virqfd_exit(); vfio_group_cleanup(); xa_destroy(&vfio_device_set_xa); } module_init(vfio_init); module_exit(vfio_cleanup); MODULE_IMPORT_NS("IOMMUFD"); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
278 180 272 274 273 829 826 829 357 356 358 330 332 335 331 353 530 1 1 1 532 528 849 839 842 558 532 531 525 361 533 45 45 45 20 20 880 334 879 881 880 882 58 873 857 468 840 839 552 855 854 853 855 850 852 850 60 60 857 841 4 856 589 45 853 860 856 271 274 274 271 852 881 830 854 855 853 857 2 1 1 1 2 3 3 1 2 3 3 10 10 10 9 9 9 10 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 // SPDX-License-Identifier: GPL-2.0 /* * kernel userspace event delivery * * Copyright (C) 2004 Red Hat, Inc. All rights reserved. * Copyright (C) 2004 Novell, Inc. All rights reserved. * Copyright (C) 2004 IBM, Inc. All rights reserved. * * Authors: * Robert Love <rml@novell.com> * Kay Sievers <kay.sievers@vrfy.org> * Arjan van de Ven <arjanv@redhat.com> * Greg Kroah-Hartman <greg@kroah.com> */ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/kobject.h> #include <linux/export.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/uidgid.h> #include <linux/uuid.h> #include <linux/ctype.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> atomic64_t uevent_seqnum; #ifdef CONFIG_UEVENT_HELPER char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; #endif struct uevent_sock { struct list_head list; struct sock *sk; }; #ifdef CONFIG_NET static LIST_HEAD(uevent_sock_list); /* This lock protects uevent_sock_list */ static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ static const char *kobject_actions[] = { [KOBJ_ADD] = "add", [KOBJ_REMOVE] = "remove", [KOBJ_CHANGE] = "change", [KOBJ_MOVE] = "move", [KOBJ_ONLINE] = "online", [KOBJ_OFFLINE] = "offline", [KOBJ_BIND] = "bind", [KOBJ_UNBIND] = "unbind", }; static int kobject_action_type(const char *buf, size_t count, enum kobject_action *type, const char **args) { enum kobject_action action; size_t count_first; const char *args_start; int ret = -EINVAL; if (count && (buf[count-1] == '\n' || buf[count-1] == '\0')) count--; if (!count) goto out; args_start = strnchr(buf, count, ' '); if (args_start) { count_first = args_start - buf; args_start = args_start + 1; } else count_first = count; for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) { if (strncmp(kobject_actions[action], buf, count_first) != 0) continue; if (kobject_actions[action][count_first] != '\0') continue; if (args) *args = args_start; *type = action; ret = 0; break; } out: return ret; } static const char *action_arg_word_end(const char *buf, const char *buf_end, char delim) { const char *next = buf; while (next <= buf_end && *next != delim) if (!isalnum(*next++)) return NULL; if (next == buf) return NULL; return next; } static int kobject_action_args(const char *buf, size_t count, struct kobj_uevent_env **ret_env) { struct kobj_uevent_env *env = NULL; const char *next, *buf_end, *key; int key_len; int r = -EINVAL; if (count && (buf[count - 1] == '\n' || buf[count - 1] == '\0')) count--; if (!count) return -EINVAL; env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) return -ENOMEM; /* first arg is UUID */ if (count < UUID_STRING_LEN || !uuid_is_valid(buf) || add_uevent_var(env, "SYNTH_UUID=%.*s", UUID_STRING_LEN, buf)) goto out; /* * the rest are custom environment variables in KEY=VALUE * format with ' ' delimiter between each KEY=VALUE pair */ next = buf + UUID_STRING_LEN; buf_end = buf + count - 1; while (next <= buf_end) { if (*next != ' ') goto out; /* skip the ' ', key must follow */ key = ++next; if (key > buf_end) goto out; buf = next; next = action_arg_word_end(buf, buf_end, '='); if (!next || next > buf_end || *next != '=') goto out; key_len = next - buf; /* skip the '=', value must follow */ if (++next > buf_end) goto out; buf = next; next = action_arg_word_end(buf, buf_end, ' '); if (!next) goto out; if (add_uevent_var(env, "SYNTH_ARG_%.*s=%.*s", key_len, key, (int) (next - buf), buf)) goto out; } r = 0; out: if (r) kfree(env); else *ret_env = env; return r; } /** * kobject_synth_uevent - send synthetic uevent with arguments * * @kobj: struct kobject for which synthetic uevent is to be generated * @buf: buffer containing action type and action args, newline is ignored * @count: length of buffer * * Returns 0 if kobject_synthetic_uevent() is completed with success or the * corresponding error when it fails. */ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) { char *no_uuid_envp[] = { "SYNTH_UUID=0", NULL }; enum kobject_action action; const char *action_args; struct kobj_uevent_env *env; const char *msg = NULL, *devpath; int r; r = kobject_action_type(buf, count, &action, &action_args); if (r) { msg = "unknown uevent action string"; goto out; } if (!action_args) { r = kobject_uevent_env(kobj, action, no_uuid_envp); goto out; } r = kobject_action_args(action_args, count - (action_args - buf), &env); if (r == -EINVAL) { msg = "incorrect uevent action arguments"; goto out; } if (r) goto out; r = kobject_uevent_env(kobj, action, env->envp); kfree(env); out: if (r) { devpath = kobject_get_path(kobj, GFP_KERNEL); pr_warn("synth uevent: %s: %s\n", devpath ?: "unknown device", msg ?: "failed to send uevent"); kfree(devpath); } return r; } #ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); if (ops) { const void *init_ns, *ns; ns = kobj->ktype->namespace(kobj); init_ns = ops->initial_ns(); return ns != init_ns; } return 0; } static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) { int buffer_size = sizeof(env->buf) - env->buflen; int len; len = strscpy(&env->buf[env->buflen], subsystem, buffer_size); if (len < 0) { pr_warn("%s: insufficient buffer space (%u left) for %s\n", __func__, buffer_size, subsystem); return -ENOMEM; } env->argv[0] = uevent_helper; env->argv[1] = &env->buf[env->buflen]; env->argv[2] = NULL; env->buflen += len + 1; return 0; } static void cleanup_uevent_env(struct subprocess_info *info) { kfree(info->data); } #endif #ifdef CONFIG_NET static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct netlink_skb_parms *parms; struct sk_buff *skb = NULL; char *scratch; size_t len; /* allocate message with maximum possible size */ len = strlen(action_string) + strlen(devpath) + 2; skb = alloc_skb(len + env->buflen, GFP_KERNEL); if (!skb) return NULL; /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); skb_put_data(skb, env->buf, env->buflen); parms = &NETLINK_CB(skb); parms->creds.uid = GLOBAL_ROOT_UID; parms->creds.gid = GLOBAL_ROOT_GID; parms->dst_group = 1; parms->portid = 0; return skb; } static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; int retval = 0; /* send netlink message */ mutex_lock(&uevent_sock_mutex); list_for_each_entry(ue_sk, &uevent_sock_list, list) { struct sock *uevent_sock = ue_sk->sk; if (!netlink_has_listeners(uevent_sock, 1)) continue; if (!skb) { retval = -ENOMEM; skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) continue; } retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS || retval == -ESRCH) retval = 0; } mutex_unlock(&uevent_sock_mutex); consume_skb(skb); return retval; } static int uevent_net_broadcast_tagged(struct sock *usk, struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct user_namespace *owning_user_ns = sock_net(usk)->user_ns; struct sk_buff *skb = NULL; int ret = 0; skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) return -ENOMEM; /* fix credentials */ if (owning_user_ns != &init_user_ns) { struct netlink_skb_parms *parms = &NETLINK_CB(skb); kuid_t root_uid; kgid_t root_gid; /* fix uid */ root_uid = make_kuid(owning_user_ns, 0); if (uid_valid(root_uid)) parms->creds.uid = root_uid; /* fix gid */ root_gid = make_kgid(owning_user_ns, 0); if (gid_valid(root_gid)) parms->creds.gid = root_gid; } ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (ret == -ENOBUFS || ret == -ESRCH) ret = 0; return ret; } #endif static int kobject_uevent_net_broadcast(struct kobject *kobj, struct kobj_uevent_env *env, const char *action_string, const char *devpath) { int ret = 0; #ifdef CONFIG_NET const struct kobj_ns_type_operations *ops; const struct net *net = NULL; ops = kobj_ns_ops(kobj); if (!ops && kobj->kset) { struct kobject *ksobj = &kobj->kset->kobj; if (ksobj->parent != NULL) ops = kobj_ns_ops(ksobj->parent); } /* kobjects currently only carry network namespace tags and they * are the only tag relevant here since we want to decide which * network namespaces to broadcast the uevent into. */ if (ops && ops->netlink_ns && kobj->ktype->namespace) if (ops->type == KOBJ_NS_TYPE_NET) net = kobj->ktype->namespace(kobj); if (!net) ret = uevent_net_broadcast_untagged(env, action_string, devpath); else ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, action_string, devpath); #endif return ret; } static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; size_t len; int i, j; for (i = 0; i < env->envp_idx;) { if (strncmp(env->envp[i], modalias_prefix, sizeof(modalias_prefix) - 1)) { i++; continue; } len = strlen(env->envp[i]) + 1; if (i != env->envp_idx - 1) { /* @env->envp[] contains pointers to @env->buf[] * with @env->buflen chars, and we are removing * variable MODALIAS here pointed by @env->envp[i] * with length @len as shown below: * * 0 @env->buf[] @env->buflen * --------------------------------------------- * ^ ^ ^ ^ * | |-> @len <-| target block | * @env->envp[0] @env->envp[i] @env->envp[i + 1] * * so the "target block" indicated above is moved * backward by @len, and its right size is * @env->buflen - (@env->envp[i + 1] - @env->envp[0]). */ memmove(env->envp[i], env->envp[i + 1], env->buflen - (env->envp[i + 1] - env->envp[0])); for (j = i; j < env->envp_idx - 1; j++) env->envp[j] = env->envp[j + 1] - len; } env->envp_idx--; env->buflen -= len; } } /** * kobject_uevent_env - send an uevent with environmental data * * @kobj: struct kobject that the action is happening to * @action: action that is happening * @envp_ext: pointer to environmental data * * Returns 0 if kobject_uevent_env() is completed with success or the * corresponding error when it fails. */ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp_ext[]) { struct kobj_uevent_env *env; const char *action_string = kobject_actions[action]; const char *devpath = NULL; const char *subsystem; struct kobject *top_kobj; struct kset *kset; const struct kset_uevent_ops *uevent_ops; int i = 0; int retval = 0; /* * Mark "remove" event done regardless of result, for some subsystems * do not want to re-trigger "remove" event via automatic cleanup. */ if (action == KOBJ_REMOVE) kobj->state_remove_uevent_sent = 1; pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); /* search the kset we belong to */ top_kobj = kobj; while (!top_kobj->kset && top_kobj->parent) top_kobj = top_kobj->parent; if (!top_kobj->kset) { pr_debug("kobject: '%s' (%p): %s: attempted to send uevent " "without kset!\n", kobject_name(kobj), kobj, __func__); return -EINVAL; } kset = top_kobj->kset; uevent_ops = kset->uevent_ops; /* skip the event, if uevent_suppress is set*/ if (kobj->uevent_suppress) { pr_debug("kobject: '%s' (%p): %s: uevent_suppress " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) if (!uevent_ops->filter(kobj)) { pr_debug("kobject: '%s' (%p): %s: filter function " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* originating subsystem */ if (uevent_ops && uevent_ops->name) subsystem = uevent_ops->name(kobj); else subsystem = kobject_name(&kset->kobj); if (!subsystem) { pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the " "event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* environment buffer */ env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); if (!env) return -ENOMEM; /* complete object path */ devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { retval = -ENOENT; goto exit; } /* default keys */ retval = add_uevent_var(env, "ACTION=%s", action_string); if (retval) goto exit; retval = add_uevent_var(env, "DEVPATH=%s", devpath); if (retval) goto exit; retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem); if (retval) goto exit; /* keys passed in from the caller */ if (envp_ext) { for (i = 0; envp_ext[i]; i++) { retval = add_uevent_var(env, "%s", envp_ext[i]); if (retval) goto exit; } } /* let the kset specific function add its stuff */ if (uevent_ops && uevent_ops->uevent) { retval = uevent_ops->uevent(kobj, env); if (retval) { pr_debug("kobject: '%s' (%p): %s: uevent() returned " "%d\n", kobject_name(kobj), kobj, __func__, retval); goto exit; } } switch (action) { case KOBJ_ADD: /* * Mark "add" event so we can make sure we deliver "remove" * event to userspace during automatic cleanup. If * the object did send an "add" event, "remove" will * automatically generated by the core, if not already done * by the caller. */ kobj->state_add_uevent_sent = 1; break; case KOBJ_UNBIND: zap_modalias_env(env); break; default: break; } /* we will send an event, so request a new sequence number */ retval = add_uevent_var(env, "SEQNUM=%llu", atomic64_inc_return(&uevent_seqnum)); if (retval) goto exit; retval = kobject_uevent_net_broadcast(kobj, env, action_string, devpath); #ifdef CONFIG_UEVENT_HELPER /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { struct subprocess_info *info; retval = add_uevent_var(env, "HOME=/"); if (retval) goto exit; retval = add_uevent_var(env, "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); if (retval) goto exit; retval = init_uevent_argv(env, subsystem); if (retval) goto exit; retval = -ENOMEM; info = call_usermodehelper_setup(env->argv[0], env->argv, env->envp, GFP_KERNEL, NULL, cleanup_uevent_env, env); if (info) { retval = call_usermodehelper_exec(info, UMH_NO_WAIT); env = NULL; /* freed by cleanup_uevent_env */ } } #endif exit: kfree(devpath); kfree(env); return retval; } EXPORT_SYMBOL_GPL(kobject_uevent_env); /** * kobject_uevent - notify userspace by sending an uevent * * @kobj: struct kobject that the action is happening to * @action: action that is happening * * Returns 0 if kobject_uevent() is completed with success or the * corresponding error when it fails. */ int kobject_uevent(struct kobject *kobj, enum kobject_action action) { return kobject_uevent_env(kobj, action, NULL); } EXPORT_SYMBOL_GPL(kobject_uevent); /** * add_uevent_var - add key value string to the environment buffer * @env: environment buffer structure * @format: printf format for the key=value pair * * Returns 0 if environment variable was added successfully or -ENOMEM * if no space was available. */ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) { va_list args; int len; if (env->envp_idx >= ARRAY_SIZE(env->envp)) { WARN(1, KERN_ERR "add_uevent_var: too many keys\n"); return -ENOMEM; } va_start(args, format); len = vsnprintf(&env->buf[env->buflen], sizeof(env->buf) - env->buflen, format, args); va_end(args); if (len >= (sizeof(env->buf) - env->buflen)) { WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n"); return -ENOMEM; } env->envp[env->envp_idx++] = &env->buf[env->buflen]; env->buflen += len + 1; return 0; } EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb, struct netlink_ext_ack *extack) { /* u64 to chars: 2^64 - 1 = 21 chars */ char buf[sizeof("SEQNUM=") + 21]; struct sk_buff *skbc; int ret; /* bump and prepare sequence number */ ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", atomic64_inc_return(&uevent_seqnum)); if (ret < 0 || (size_t)ret >= sizeof(buf)) return -ENOMEM; ret++; /* verify message does not overflow */ if ((skb->len + ret) > UEVENT_BUFFER_SIZE) { NL_SET_ERR_MSG(extack, "uevent message too big"); return -EINVAL; } /* copy skb and extend to accommodate sequence number */ skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL); if (!skbc) return -ENOMEM; /* append sequence number */ skb_put_data(skbc, buf, ret); /* remove msg header */ skb_pull(skbc, NLMSG_HDRLEN); /* set portid 0 to inform userspace message comes from kernel */ NETLINK_CB(skbc).portid = 0; NETLINK_CB(skbc).dst_group = 1; ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (ret == -ENOBUFS || ret == -ESRCH) ret = 0; return ret; } static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net; int ret; if (!nlmsg_data(nlh)) return -EINVAL; /* * Verify that we are allowed to send messages to the target * network namespace. The caller must have CAP_SYS_ADMIN in the * owning user namespace of the target network namespace. */ net = sock_net(NETLINK_CB(skb).sk); if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) { NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability"); return -EPERM; } ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack); return ret; } static void uevent_net_rcv(struct sk_buff *skb) { netlink_rcv_skb(skb, &uevent_net_rcv_skb); } static int uevent_net_init(struct net *net) { struct uevent_sock *ue_sk; struct netlink_kernel_cfg cfg = { .groups = 1, .input = uevent_net_rcv, .flags = NL_CFG_F_NONROOT_RECV }; ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); if (!ue_sk) return -ENOMEM; ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg); if (!ue_sk->sk) { pr_err("kobject_uevent: unable to create netlink socket!\n"); kfree(ue_sk); return -ENODEV; } net->uevent_sock = ue_sk; /* Restrict uevents to initial user namespace. */ if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { mutex_lock(&uevent_sock_mutex); list_add_tail(&ue_sk->list, &uevent_sock_list); mutex_unlock(&uevent_sock_mutex); } return 0; } static void uevent_net_exit(struct net *net) { struct uevent_sock *ue_sk = net->uevent_sock; if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { mutex_lock(&uevent_sock_mutex); list_del(&ue_sk->list); mutex_unlock(&uevent_sock_mutex); } netlink_kernel_release(ue_sk->sk); kfree(ue_sk); } static struct pernet_operations uevent_net_ops = { .init = uevent_net_init, .exit = uevent_net_exit, }; static int __init kobject_uevent_init(void) { return register_pernet_subsys(&uevent_net_ops); } postcore_initcall(kobject_uevent_init); #endif #ifdef CONFIG_UEVENT_HELPER static const struct ctl_table uevent_helper_sysctl_table[] = { { .procname = "hotplug", .data = &uevent_helper, .maxlen = UEVENT_HELPER_PATH_LEN, .mode = 0644, .proc_handler = proc_dostring, }, }; static int __init init_uevent_helper_sysctl(void) { register_sysctl_init("kernel", uevent_helper_sysctl_table); return 0; } postcore_initcall(init_uevent_helper_sysctl); #endif
18 16 16 9 9 7 10 16 1 15 17 14 2 2 14 13 13 13 12 1 12 9 9 9 9 3 2 2 1 12 13 19 4 4 2 2 17 1 1 13 19 19 19 18 18 15 13 13 13 13 7 7 7 7 6 6 4 2 12 12 12 9 6 6 6 3 6 3 6 3 1 1 3 3 5 2 1 1 2 2 12 12 9 9 9 9 9 9 6 3 3 3 3 3 3 6 11 11 4 4 12 12 12 12 1 5 5 5 5 5 5 5 5 1 1 1 1 1 1 1 1 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 // SPDX-License-Identifier: GPL-2.0-only /* * Connection tracking protocol helper module for SCTP. * * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> * * SCTP is defined in RFC 2960. References to various sections in this code * are to this RFC. */ #include <linux/types.h> #include <linux/timer.h> #include <linux/netfilter.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/sctp.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <net/sctp/checksum.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> static const char *const sctp_conntrack_names[] = { [SCTP_CONNTRACK_NONE] = "NONE", [SCTP_CONNTRACK_CLOSED] = "CLOSED", [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = secs_to_jiffies(10), [SCTP_CONNTRACK_COOKIE_WAIT] = secs_to_jiffies(3), [SCTP_CONNTRACK_COOKIE_ECHOED] = secs_to_jiffies(3), [SCTP_CONNTRACK_ESTABLISHED] = secs_to_jiffies(210), [SCTP_CONNTRACK_SHUTDOWN_SENT] = secs_to_jiffies(3), [SCTP_CONNTRACK_SHUTDOWN_RECD] = secs_to_jiffies(3), [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = secs_to_jiffies(3), [SCTP_CONNTRACK_HEARTBEAT_SENT] = secs_to_jiffies(30), }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 #define sNO SCTP_CONNTRACK_NONE #define sCL SCTP_CONNTRACK_CLOSED #define sCW SCTP_CONNTRACK_COOKIE_WAIT #define sCE SCTP_CONNTRACK_COOKIE_ECHOED #define sES SCTP_CONNTRACK_ESTABLISHED #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT #define sIV SCTP_CONNTRACK_MAX /* These are the descriptions of the states: NOTE: These state names are tantalizingly similar to the states of an SCTP endpoint. But the interpretation of the states is a little different, considering that these are the states of the connection and not of an end point. Please note the subtleties. -Kiran NONE - Nothing so far. COOKIE WAIT - We have seen an INIT chunk in the original direction, or also an INIT_ACK chunk in the reply direction. COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply direction. SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite to that of the SHUTDOWN chunk. CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. */ /* TODO - I have assumed that the first INIT is in the original direction. This messes things when an INIT comes in the reply direction in CLOSED state. - Check the error type in the reply dir before transitioning from cookie echoed to closed. - Sec 5.2.4 of RFC 2960 - Full Multi Homing support. */ /* SCTP conntrack state transitions */ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ /* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW}, /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ /* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, }, { /* REPLY */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ /* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, /* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, } }; #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); } #endif /* do_basic_checks ensures sch->length > 0, do not use before */ #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ (offset) < (skb)->len && \ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) /* Some validity checks to make sure the chunks are fine */ static int do_basic_checks(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned long *map, const struct nf_hook_state *state) { u_int32_t offset, count; struct sctp_chunkhdr _sch, *sch; int flag; flag = 0; for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { if (sch->type == SCTP_CID_INIT || sch->type == SCTP_CID_INIT_ACK || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) flag = 1; /* * Cookie Ack/Echo chunks not the first OR * Init / Init Ack / Shutdown compl chunks not the only chunks * OR zero-length. */ if (((sch->type == SCTP_CID_COOKIE_ACK || sch->type == SCTP_CID_COOKIE_ECHO || flag) && count != 0) || !sch->length) { nf_ct_l4proto_log_invalid(skb, ct, state, "%s failed. chunk num %d, type %d, len %d flag %d\n", __func__, count, sch->type, sch->length, flag); return 1; } if (map) set_bit(sch->type, map); } return count == 0; } static int sctp_new_state(enum ip_conntrack_dir dir, enum sctp_conntrack cur_state, int chunk_type) { int i; switch (chunk_type) { case SCTP_CID_INIT: i = 0; break; case SCTP_CID_INIT_ACK: i = 1; break; case SCTP_CID_ABORT: i = 2; break; case SCTP_CID_SHUTDOWN: i = 3; break; case SCTP_CID_SHUTDOWN_ACK: i = 4; break; case SCTP_CID_ERROR: i = 5; break; case SCTP_CID_COOKIE_ECHO: i = 6; break; case SCTP_CID_COOKIE_ACK: i = 7; break; case SCTP_CID_SHUTDOWN_COMPLETE: i = 8; break; case SCTP_CID_HEARTBEAT: i = 9; break; case SCTP_CID_HEARTBEAT_ACK: i = 10; break; default: /* Other chunks like DATA or SACK do not change the state */ pr_debug("Unknown chunk type %d, Will stay in %s\n", chunk_type, sctp_conntrack_names[cur_state]); return cur_state; } return sctp_conntracks[dir][i][cur_state]; } /* Don't need lock here: this conntrack not in circulation yet */ static noinline bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, const struct sctphdr *sh, unsigned int dataoff) { enum sctp_conntrack new_state; const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; u32 offset, count; memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); new_state = SCTP_CONNTRACK_MAX; for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) { new_state = sctp_new_state(IP_CT_DIR_ORIGINAL, SCTP_CONNTRACK_NONE, sch->type); /* Invalid: delete conntrack */ if (new_state == SCTP_CONNTRACK_NONE || new_state == SCTP_CONNTRACK_MAX) { pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); return false; } /* Copy the vtag into the state info */ if (sch->type == SCTP_CID_INIT) { struct sctp_inithdr _inithdr, *ih; /* Sec 8.5.1 (A) */ if (sh->vtag) return false; ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(_inithdr), &_inithdr); if (!ih) return false; pr_debug("Setting vtag %x for new conn\n", ih->init_tag); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; } else if (sch->type == SCTP_CID_HEARTBEAT) { pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; } ct->proto.sctp.state = SCTP_CONNTRACK_NONE; } return true; } static bool sctp_error(struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { const struct sctphdr *sh; const char *logmsg; if (skb->len < dataoff + sizeof(struct sctphdr)) { logmsg = "nf_ct_sctp: short packet "; goto out_invalid; } if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && skb->ip_summed == CHECKSUM_NONE) { if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) { logmsg = "nf_ct_sctp: failed to read header "; goto out_invalid; } sh = (const struct sctphdr *)(skb->data + dataoff); if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { logmsg = "nf_ct_sctp: bad CRC "; goto out_invalid; } skb->ip_summed = CHECKSUM_UNNECESSARY; } return false; out_invalid: nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg); return true; } /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ int nf_conntrack_sctp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { enum sctp_conntrack new_state, old_state; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); const struct sctphdr *sh; struct sctphdr _sctph; const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; u_int32_t offset, count; unsigned int *timeouts; unsigned long map[256 / sizeof(unsigned long)] = { 0 }; bool ignore = false; if (sctp_error(skb, dataoff, state)) return -NF_ACCEPT; sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); if (sh == NULL) goto out; if (do_basic_checks(ct, skb, dataoff, map, state) != 0) goto out; if (!nf_ct_is_confirmed(ct)) { /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ if (test_bit(SCTP_CID_ABORT, map) || test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) || test_bit(SCTP_CID_COOKIE_ACK, map)) return -NF_ACCEPT; if (!sctp_new(ct, skb, sh, dataoff)) return -NF_ACCEPT; } /* Check the verification tag (Sec 8.5) */ if (!test_bit(SCTP_CID_INIT, map) && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && !test_bit(SCTP_CID_COOKIE_ECHO, map) && !test_bit(SCTP_CID_ABORT, map) && !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && !test_bit(SCTP_CID_HEARTBEAT, map) && !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && sh->vtag != ct->proto.sctp.vtag[dir]) { nf_ct_l4proto_log_invalid(skb, ct, state, "verification tag check failed %x vs %x for dir %d", sh->vtag, ct->proto.sctp.vtag[dir], dir); goto out; } old_state = new_state = SCTP_CONNTRACK_NONE; spin_lock_bh(&ct->lock); for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { /* (B) vtag MUST match own vtag if T flag is unset OR * MUST match peer's vtag if T flag is set */ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && sh->vtag != ct->proto.sctp.vtag[dir]) || ((sch->flags & SCTP_CHUNK_FLAG_T) && sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { /* (C) vtag MUST match own vtag if T flag is unset OR * MUST match peer's vtag if T flag is set */ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && sh->vtag != ct->proto.sctp.vtag[dir]) || ((sch->flags & SCTP_CHUNK_FLAG_T) && sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ACK) { ct->proto.sctp.init[dir] = 0; ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); ct->proto.sctp.vtag[dir] = sh->vtag; } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { if (test_bit(SCTP_CID_DATA, map) || ignore) goto out_unlock; ct->proto.sctp.flags |= SCTP_FLAG_HEARTBEAT_VTAG_FAILED; ct->proto.sctp.last_dir = dir; ignore = true; continue; } else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) { ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED; } } else if (sch->type == SCTP_CID_HEARTBEAT_ACK) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir); ct->proto.sctp.vtag[dir] = sh->vtag; } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { if (test_bit(SCTP_CID_DATA, map) || ignore) goto out_unlock; if ((ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) == 0 || ct->proto.sctp.last_dir == dir) goto out_unlock; ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED; ct->proto.sctp.vtag[dir] = sh->vtag; ct->proto.sctp.vtag[!dir] = 0; } else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) { ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED; } } old_state = ct->proto.sctp.state; new_state = sctp_new_state(dir, old_state, sch->type); /* Invalid */ if (new_state == SCTP_CONNTRACK_MAX) { nf_ct_l4proto_log_invalid(skb, ct, state, "Invalid, old_state %d, dir %d, type %d", old_state, dir, sch->type); goto out_unlock; } /* If it is an INIT or an INIT ACK note down the vtag */ if (sch->type == SCTP_CID_INIT) { struct sctp_inithdr _ih, *ih; ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); if (!ih) goto out_unlock; if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) ct->proto.sctp.init[!dir] = 0; ct->proto.sctp.init[dir] = 1; pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so * port reuse by client or NAT middlebox cannot * keep entry alive indefinitely (incl. nat info). */ if (new_state == SCTP_CONNTRACK_CLOSED && old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; } else if (sch->type == SCTP_CID_INIT_ACK) { struct sctp_inithdr _ih, *ih; __be32 vtag; ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); if (!ih) goto out_unlock; vtag = ct->proto.sctp.vtag[!dir]; if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) goto out_unlock; /* collision */ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && vtag != ih->init_tag) goto out_unlock; pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; if (old_state != new_state) { nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (new_state == SCTP_CONNTRACK_ESTABLISHED && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } } spin_unlock_bh(&ct->lock); /* allow but do not refresh timeout */ if (ignore) return NF_ACCEPT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; out_unlock: spin_unlock_bh(&ct->lock); out: return -NF_ACCEPT; } static bool sctp_can_early_drop(const struct nf_conn *ct) { switch (ct->proto.sctp.state) { case SCTP_CONNTRACK_SHUTDOWN_SENT: case SCTP_CONNTRACK_SHUTDOWN_RECD: case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: return true; default: break; } return false; } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct, bool destroy) { struct nlattr *nest_parms; spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state)) goto nla_put_failure; if (destroy) goto skip_state; if (nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) || nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, ct->proto.sctp.vtag[IP_CT_DIR_REPLY])) goto nla_put_failure; skip_state: spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: spin_unlock_bh(&ct->lock); return -1; } static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, }; #define SCTP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 4) + \ NLA_ALIGN(NLA_HDRLEN + 4)) static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; int err; /* updates may not contain the internal protocol info, skip parsing */ if (!attr) return 0; err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_SCTP_MAX, attr, sctp_nla_policy, NULL); if (err < 0) return err; if (!tb[CTA_PROTOINFO_SCTP_STATE] || !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) return -EINVAL; spin_lock_bh(&ct->lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); spin_unlock_bh(&ct->lock); return 0; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; struct nf_sctp_net *sn = nf_sctp_pernet(net); int i; if (!timeouts) timeouts = sn->timeouts; /* set default SCTP timeouts. */ for (i=0; i<SCTP_CONNTRACK_MAX; i++) timeouts[i] = sn->timeouts[i]; /* there's a 1:1 mapping between attributes and protocol states. */ for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { if (tb[i]) { timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; } } timeouts[CTA_TIMEOUT_SCTP_UNSPEC] = timeouts[CTA_TIMEOUT_SCTP_CLOSED]; return 0; } static int sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; int i; for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) goto nla_put_failure; } return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { [CTA_TIMEOUT_SCTP_CLOSED] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_COOKIE_WAIT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_COOKIE_ECHOED] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_ESTABLISHED] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_sctp_init_net(struct net *net) { struct nf_sctp_net *sn = nf_sctp_pernet(net); int i; for (i = 0; i < SCTP_CONNTRACK_MAX; i++) sn->timeouts[i] = sctp_timeouts[i]; /* timeouts[0] is unused, init it so ->timeouts[0] contains * 'new' timeout, like udp or icmp. */ sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { .l4proto = IPPROTO_SCTP, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, #endif .can_early_drop = sctp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_size = SCTP_NLATTR_SIZE, .to_nlattr = sctp_to_nlattr, .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = sctp_timeout_nlattr_to_obj, .obj_to_nlattr = sctp_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_SCTP_MAX, .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 // SPDX-License-Identifier: GPL-2.0-or-later /* * CBC: Cipher Block Chaining mode * * Copyright (c) 2006-2016 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/module.h> static int crypto_cbc_encrypt_segment(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned nbytes, u8 *iv) { unsigned int bsize = crypto_lskcipher_blocksize(tfm); for (; nbytes >= bsize; src += bsize, dst += bsize, nbytes -= bsize) { crypto_xor(iv, src, bsize); crypto_lskcipher_encrypt(tfm, iv, dst, bsize, NULL); memcpy(iv, dst, bsize); } return nbytes; } static int crypto_cbc_encrypt_inplace(struct crypto_lskcipher *tfm, u8 *src, unsigned nbytes, u8 *oiv) { unsigned int bsize = crypto_lskcipher_blocksize(tfm); u8 *iv = oiv; if (nbytes < bsize) goto out; do { crypto_xor(src, iv, bsize); crypto_lskcipher_encrypt(tfm, src, src, bsize, NULL); iv = src; src += bsize; } while ((nbytes -= bsize) >= bsize); memcpy(oiv, iv, bsize); out: return nbytes; } static int crypto_cbc_encrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, u32 flags) { struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); bool final = flags & CRYPTO_LSKCIPHER_FLAG_FINAL; struct crypto_lskcipher *cipher = *ctx; int rem; if (src == dst) rem = crypto_cbc_encrypt_inplace(cipher, dst, len, iv); else rem = crypto_cbc_encrypt_segment(cipher, src, dst, len, iv); return rem && final ? -EINVAL : rem; } static int crypto_cbc_decrypt_segment(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned nbytes, u8 *oiv) { unsigned int bsize = crypto_lskcipher_blocksize(tfm); const u8 *iv = oiv; if (nbytes < bsize) goto out; do { crypto_lskcipher_decrypt(tfm, src, dst, bsize, NULL); crypto_xor(dst, iv, bsize); iv = src; src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); memcpy(oiv, iv, bsize); out: return nbytes; } static int crypto_cbc_decrypt_inplace(struct crypto_lskcipher *tfm, u8 *src, unsigned nbytes, u8 *iv) { unsigned int bsize = crypto_lskcipher_blocksize(tfm); u8 last_iv[MAX_CIPHER_BLOCKSIZE]; if (nbytes < bsize) goto out; /* Start of the last block. */ src += nbytes - (nbytes & (bsize - 1)) - bsize; memcpy(last_iv, src, bsize); for (;;) { crypto_lskcipher_decrypt(tfm, src, src, bsize, NULL); if ((nbytes -= bsize) < bsize) break; crypto_xor(src, src - bsize, bsize); src -= bsize; } crypto_xor(src, iv, bsize); memcpy(iv, last_iv, bsize); out: return nbytes; } static int crypto_cbc_decrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, u32 flags) { struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); bool final = flags & CRYPTO_LSKCIPHER_FLAG_FINAL; struct crypto_lskcipher *cipher = *ctx; int rem; if (src == dst) rem = crypto_cbc_decrypt_inplace(cipher, dst, len, iv); else rem = crypto_cbc_decrypt_segment(cipher, src, dst, len, iv); return rem && final ? -EINVAL : rem; } static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) { struct lskcipher_instance *inst; int err; inst = lskcipher_alloc_instance_simple(tmpl, tb); if (IS_ERR(inst)) return PTR_ERR(inst); err = -EINVAL; if (!is_power_of_2(inst->alg.co.base.cra_blocksize)) goto out_free_inst; if (inst->alg.co.statesize) goto out_free_inst; inst->alg.encrypt = crypto_cbc_encrypt; inst->alg.decrypt = crypto_cbc_decrypt; err = lskcipher_register_instance(tmpl, inst); if (err) { out_free_inst: inst->free(inst); } return err; } static struct crypto_template crypto_cbc_tmpl = { .name = "cbc", .create = crypto_cbc_create, .module = THIS_MODULE, }; static int __init crypto_cbc_module_init(void) { return crypto_register_template(&crypto_cbc_tmpl); } static void __exit crypto_cbc_module_exit(void) { crypto_unregister_template(&crypto_cbc_tmpl); } module_init(crypto_cbc_module_init); module_exit(crypto_cbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CBC block cipher mode of operation"); MODULE_ALIAS_CRYPTO("cbc");
5871 5895 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> #include <linux/string.h> #include <uapi/linux/if_ether.h> #include <uapi/linux/pkt_cls.h> struct bpf_prog; struct net; struct sk_buff; /** * struct flow_dissector_key_control: * @thoff: Transport header offset * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_* * @flags: Key flags. * Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*) */ struct flow_dissector_key_control { u16 thoff; u16 addr_type; u32 flags; }; /* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those * flags are exposed to userspace in some error paths, ie. unsupported flags. */ enum flow_dissector_ctrl_flags { FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, FLOW_DIS_F_TUNNEL_CSUM = TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, FLOW_DIS_F_TUNNEL_DONT_FRAGMENT = TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, FLOW_DIS_F_TUNNEL_OAM = TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_CRIT_OPT = TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, /* These flags are internal to the kernel */ FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), }; enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, FLOW_DISSECT_RET_OUT_BAD, FLOW_DISSECT_RET_PROTO_AGAIN, FLOW_DISSECT_RET_IPPROTO_AGAIN, FLOW_DISSECT_RET_CONTINUE, }; /** * struct flow_dissector_key_basic: * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) * @padding: Unused */ struct flow_dissector_key_basic { __be16 n_proto; u8 ip_proto; u8 padding; }; struct flow_dissector_key_tags { u32 flow_label; }; struct flow_dissector_key_vlan { union { struct { u16 vlan_id:12, vlan_dei:1, vlan_priority:3; }; __be16 vlan_tci; }; __be16 vlan_tpid; __be16 vlan_eth_type; u16 padding; }; struct flow_dissector_mpls_lse { u32 mpls_ttl:8, mpls_bos:1, mpls_tc:3, mpls_label:20; }; #define FLOW_DIS_MPLS_MAX 7 struct flow_dissector_key_mpls { struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */ u8 used_lses; /* One bit set for each Label Stack Entry in use */ }; static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls, int lse_index) { mpls->used_lses |= 1 << lse_index; } #define FLOW_DIS_TUN_OPTS_MAX 255 /** * struct flow_dissector_key_enc_opts: * @data: tunnel option data * @len: length of tunnel option data * @dst_opt_type: tunnel option type */ struct flow_dissector_key_enc_opts { u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired * here but seems difficult to #include */ u8 len; u32 dst_opt_type; }; struct flow_dissector_key_keyid { __be32 keyid; }; /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; /** * struct flow_dissector_key_ipv6_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv6_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ struct in6_addr src; struct in6_addr dst; }; /** * struct flow_dissector_key_tipc: * @key: source node address combined with selector */ struct flow_dissector_key_tipc { __be32 key; }; /** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses * @v6addrs: IPv6 addresses * @tipckey: TIPC key */ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; struct flow_dissector_key_tipc tipckey; }; }; /** * struct flow_dissector_key_arp: * @sip: Sender IP address * @tip: Target IP address * @op: Operation * @sha: Sender hardware address * @tha: Target hardware address */ struct flow_dissector_key_arp { __u32 sip; __u32 tip; __u8 op; unsigned char sha[ETH_ALEN]; unsigned char tha[ETH_ALEN]; }; /** * struct flow_dissector_key_ports: * @ports: port numbers of Transport header * @src: source port number * @dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; struct { __be16 src; __be16 dst; }; }; }; /** * struct flow_dissector_key_ports_range * @tp: port number from packet * @tp_min: min port number in range * @tp_max: max port number in range */ struct flow_dissector_key_ports_range { union { struct flow_dissector_key_ports tp; struct { struct flow_dissector_key_ports tp_min; struct flow_dissector_key_ports tp_max; }; }; }; /** * struct flow_dissector_key_icmp: * @type: ICMP type * @code: ICMP code * @id: Session identifier */ struct flow_dissector_key_icmp { struct { u8 type; u8 code; }; u16 id; }; /** * struct flow_dissector_key_eth_addrs: * @src: source Ethernet address * @dst: destination Ethernet address */ struct flow_dissector_key_eth_addrs { /* (dst,src) must be grouped, in the same way than in ETH header */ unsigned char dst[ETH_ALEN]; unsigned char src[ETH_ALEN]; }; /** * struct flow_dissector_key_tcp: * @flags: flags */ struct flow_dissector_key_tcp { __be16 flags; }; /** * struct flow_dissector_key_ip: * @tos: tos * @ttl: ttl */ struct flow_dissector_key_ip { __u8 tos; __u8 ttl; }; /** * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex * @ingress_iftype: ingress interface type * @l2_miss: packet did not match an L2 entry during forwarding */ struct flow_dissector_key_meta { int ingress_ifindex; u16 ingress_iftype; u8 l2_miss; }; /** * struct flow_dissector_key_ct: * @ct_state: conntrack state after converting with map * @ct_mark: conttrack mark * @ct_zone: conntrack zone * @ct_labels: conntrack labels */ struct flow_dissector_key_ct { u16 ct_state; u16 ct_zone; u32 ct_mark; u32 ct_labels[4]; }; /** * struct flow_dissector_key_hash: * @hash: hash value */ struct flow_dissector_key_hash { u32 hash; }; /** * struct flow_dissector_key_num_of_vlans: * @num_of_vlans: num_of_vlans value */ struct flow_dissector_key_num_of_vlans { u8 num_of_vlans; }; /** * struct flow_dissector_key_pppoe: * @session_id: pppoe session id * @ppp_proto: ppp protocol * @type: pppoe eth type */ struct flow_dissector_key_pppoe { __be16 session_id; __be16 ppp_proto; __be16 type; }; /** * struct flow_dissector_key_l2tpv3: * @session_id: identifier for a l2tp session */ struct flow_dissector_key_l2tpv3 { __be32 session_id; }; /** * struct flow_dissector_key_ipsec: * @spi: identifier for a ipsec connection */ struct flow_dissector_key_ipsec { __be32 spi; }; /** * struct flow_dissector_key_cfm * @mdl_ver: maintenance domain level (mdl) and cfm protocol version * @opcode: code specifying a type of cfm protocol packet * * See 802.1ag, ITU-T G.8013/Y.1731 * 1 2 * |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | mdl | version | opcode | * +-----+---------+-+-+-+-+-+-+-+-+ */ struct flow_dissector_key_cfm { u8 mdl_ver; u8 opcode; }; #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ FLOW_DISSECTOR_KEY_MAX, }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) #define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3) struct flow_dissector_key { enum flow_dissector_key_id key_id; size_t offset; /* offset of struct flow_dissector_key_* in target the struct */ }; struct flow_dissector { unsigned long long used_keys; /* each bit represents presence of one key id */ unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; struct flow_keys_basic { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; }; struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_icmp icmp; /* 'addrs' must be the last member */ struct flow_dissector_key_addrs addrs; }; #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) __be32 flow_get_u32_src(const struct flow_keys *flow); __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_basic_dissector; /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a * larger "hash" of a flow to allow definitively matching specific flows where * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so * that it can be used in CB of skb (see sch_choke for an example). */ #define FLOW_KEYS_DIGEST_LEN 16 struct flow_keys_digest { u8 data[FLOW_KEYS_DIGEST_LEN]; }; void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } u32 flow_hash_from_keys(struct flow_keys *keys); u32 flow_hash_from_keys_seed(struct flow_keys *keys, const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) { return flow_dissector->used_keys & (1ULL << key_id); } static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id, void *target_container) { return ((char *)target_container) + flow_dissector->offset[key_id]; } struct bpf_flow_dissector { struct bpf_flow_keys *flow_keys; const struct sk_buff *skb; const void *data; const void *data_end; }; static inline void flow_dissector_init_keys(struct flow_dissector_key_control *key_control, struct flow_dissector_key_basic *key_basic) { memset(key_control, 0, sizeof(*key_control)); memset(key_basic, 0, sizeof(*key_basic)); } #ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif
57 52 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_ENCODER_H__ #define __DRM_ENCODER_H__ #include <linux/list.h> #include <linux/ctype.h> #include <drm/drm_crtc.h> #include <drm/drm_mode.h> #include <drm/drm_mode_object.h> #include <drm/drm_util.h> struct drm_encoder; /** * struct drm_encoder_funcs - encoder controls * * Encoders sit between CRTCs and connectors. */ struct drm_encoder_funcs { /** * @reset: * * Reset encoder hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. */ void (*reset)(struct drm_encoder *encoder); /** * @destroy: * * Clean up encoder resources. This is only called at driver unload time * through drm_mode_config_cleanup() since an encoder cannot be * hotplugged in DRM. */ void (*destroy)(struct drm_encoder *encoder); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the encoder. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_encoder *encoder); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the encoder from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_encoder *encoder); /** * @debugfs_init: * * Allows encoders to create encoder-specific debugfs files. */ void (*debugfs_init)(struct drm_encoder *encoder, struct dentry *root); }; /** * struct drm_encoder - central DRM encoder structure * @dev: parent DRM device * @head: list management * @base: base KMS object * @name: human readable name, can be overwritten by the driver * @funcs: control functions, can be NULL for simple managed encoders * @helper_private: mid-layer private data * * CRTCs drive pixels to encoders, which convert them into signals * appropriate for a given connector or set of connectors. */ struct drm_encoder { struct drm_device *dev; struct list_head head; struct drm_mode_object base; char *name; /** * @encoder_type: * * One of the DRM_MODE_ENCODER_<foo> types in drm_mode.h. The following * encoder types are defined thus far: * * - DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A. * * - DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort. * * - DRM_MODE_ENCODER_LVDS for display panels, or in general any panel * with a proprietary parallel connector. * * - DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video, * Component, SCART). * * - DRM_MODE_ENCODER_VIRTUAL for virtual machine displays * * - DRM_MODE_ENCODER_DSI for panels connected using the DSI serial bus. * * - DRM_MODE_ENCODER_DPI for panels connected using the DPI parallel * bus. * * - DRM_MODE_ENCODER_DPMST for special fake encoders used to allow * mutliple DP MST streams to share one physical encoder. */ int encoder_type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the encoder. */ unsigned index; /** * @possible_crtcs: Bitmask of potential CRTC bindings, using * drm_crtc_index() as the index into the bitfield. The driver must set * the bits for all &drm_crtc objects this encoder can be connected to * before calling drm_dev_register(). * * You will get a WARN if you get this wrong in the driver. * * Note that since CRTC objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_crtcs; /** * @possible_clones: Bitmask of potential sibling encoders for cloning, * using drm_encoder_index() as the index into the bitfield. The driver * must set the bits for all &drm_encoder objects which can clone a * &drm_crtc together with this encoder before calling * drm_dev_register(). Drivers should set the bit representing the * encoder itself, too. Cloning bits should be set such that when two * encoders can be used in a cloned configuration, they both should have * each another bits set. * * As an exception to the above rule if the driver doesn't implement * any cloning it can leave @possible_clones set to 0. The core will * automagically fix this up by setting the bit for the encoder itself. * * You will get a WARN if you get this wrong in the driver. * * Note that since encoder objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_clones; /** * @crtc: Currently bound CRTC, only really meaningful for non-atomic * drivers. Atomic drivers should instead check * &drm_connector_state.crtc. */ struct drm_crtc *crtc; /** * @bridge_chain: Bridges attached to this encoder. Drivers shall not * access this field directly. */ struct list_head bridge_chain; const struct drm_encoder_funcs *funcs; const struct drm_encoder_helper_funcs *helper_private; /** * @debugfs_entry: * * Debugfs directory for this CRTC. */ struct dentry *debugfs_entry; }; #define obj_to_encoder(x) container_of(x, struct drm_encoder, base) __printf(5, 6) int drm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); __printf(5, 6) int drmm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); __printf(6, 7) void *__drmm_encoder_alloc(struct drm_device *dev, size_t size, size_t offset, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); /** * drmm_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @type: the type of the struct which contains struct &drm_encoder * @member: the name of the &drm_encoder within @type * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * Allocates and initializes an encoder. Encoder should be subclassed as part of * driver encoder objects. Cleanup is automatically handled through registering * drm_encoder_cleanup() with drmm_add_action(). * * The @drm_encoder_funcs.destroy hook must be NULL. * * Returns: * Pointer to new encoder, or ERR_PTR on failure. */ #define drmm_encoder_alloc(dev, type, member, funcs, encoder_type, name, ...) \ ((type *)__drmm_encoder_alloc(dev, sizeof(type), \ offsetof(type, member), funcs, \ encoder_type, name, ##__VA_ARGS__)) /** * drmm_plain_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * This is a simplified version of drmm_encoder_alloc(), which only allocates * and returns a struct drm_encoder instance, with no subclassing. * * Returns: * Pointer to the new drm_encoder struct, or ERR_PTR on failure. */ #define drmm_plain_encoder_alloc(dev, funcs, encoder_type, name, ...) \ ((struct drm_encoder *) \ __drmm_encoder_alloc(dev, sizeof(struct drm_encoder), \ 0, funcs, encoder_type, name, ##__VA_ARGS__)) /** * drm_encoder_index - find the index of a registered encoder * @encoder: encoder to find index for * * Given a registered encoder, return the index of that encoder within a DRM * device's list of encoders. */ static inline unsigned int drm_encoder_index(const struct drm_encoder *encoder) { return encoder->index; } /** * drm_encoder_mask - find the mask of a registered encoder * @encoder: encoder to find mask for * * Given a registered encoder, return the mask bit of that encoder for an * encoder's possible_clones field. */ static inline u32 drm_encoder_mask(const struct drm_encoder *encoder) { return 1 << drm_encoder_index(encoder); } /** * drm_encoder_crtc_ok - can a given crtc drive a given encoder? * @encoder: encoder to test * @crtc: crtc to test * * Returns false if @encoder can't be driven by @crtc, true otherwise. */ static inline bool drm_encoder_crtc_ok(struct drm_encoder *encoder, struct drm_crtc *crtc) { return !!(encoder->possible_crtcs & drm_crtc_mask(crtc)); } /** * drm_encoder_find - find a &drm_encoder * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: encoder id * * Returns the encoder with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_encoder *drm_encoder_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_ENCODER); return mo ? obj_to_encoder(mo) : NULL; } void drm_encoder_cleanup(struct drm_encoder *encoder); /** * drm_for_each_encoder_mask - iterate over encoders specified by bitmask * @encoder: the loop cursor * @dev: the DRM device * @encoder_mask: bitmask of encoder indices * * Iterate over all encoders specified by bitmask. */ #define drm_for_each_encoder_mask(encoder, dev, encoder_mask) \ list_for_each_entry((encoder), &(dev)->mode_config.encoder_list, head) \ for_each_if ((encoder_mask) & drm_encoder_mask(encoder)) /** * drm_for_each_encoder - iterate over all encoders * @encoder: the loop cursor * @dev: the DRM device * * Iterate over all encoders of @dev. */ #define drm_for_each_encoder(encoder, dev) \ list_for_each_entry(encoder, &(dev)->mode_config.encoder_list, head) #endif
109 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ALSA sequencer Ports * Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl> */ #ifndef __SND_SEQ_PORTS_H #define __SND_SEQ_PORTS_H #include <sound/seq_kernel.h> #include <sound/ump_convert.h> #include "seq_lock.h" /* list of 'exported' ports */ /* Client ports that are not exported are still accessible, but are anonymous ports. If a port supports SUBSCRIPTION, that port can send events to all subscribersto a special address, with address (queue==SNDRV_SEQ_ADDRESS_SUBSCRIBERS). The message is then send to all recipients that are registered in the subscription list. A typical application for these SUBSCRIPTION events is handling of incoming MIDI data. The port doesn't 'know' what other clients are interested in this message. If for instance a MIDI recording application would like to receive the events from that port, it will first have to subscribe with that port. */ struct snd_seq_subscribers { struct snd_seq_port_subscribe info; /* additional info */ struct list_head src_list; /* link of sources */ struct list_head dest_list; /* link of destinations */ atomic_t ref_count; }; struct snd_seq_port_subs_info { struct list_head list_head; /* list of subscribed ports */ unsigned int count; /* count of subscribers */ unsigned int exclusive: 1; /* exclusive mode */ struct rw_semaphore list_mutex; rwlock_t list_lock; int (*open)(void *private_data, struct snd_seq_port_subscribe *info); int (*close)(void *private_data, struct snd_seq_port_subscribe *info); }; struct snd_seq_client_port { struct snd_seq_addr addr; /* client/port number */ struct module *owner; /* owner of this port */ char name[64]; /* port name */ struct list_head list; /* port list */ snd_use_lock_t use_lock; /* subscribers */ struct snd_seq_port_subs_info c_src; /* read (sender) list */ struct snd_seq_port_subs_info c_dest; /* write (dest) list */ int (*event_input)(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop); void (*private_free)(void *private_data); void *private_data; unsigned int closing : 1; unsigned int timestamping: 1; unsigned int time_real: 1; int time_queue; /* capability, inport, output, sync */ unsigned int capability; /* port capability bits */ unsigned int type; /* port type bits */ /* supported channels */ int midi_channels; int midi_voices; int synth_voices; /* UMP direction and group */ unsigned char direction; unsigned char ump_group; bool is_midi1; /* keep MIDI 1.0 protocol */ #if IS_ENABLED(CONFIG_SND_SEQ_UMP) struct ump_cvt_to_ump_bank midi2_bank[16]; /* per channel */ #endif }; struct snd_seq_client; /* return pointer to port structure and lock port */ struct snd_seq_client_port *snd_seq_port_use_ptr(struct snd_seq_client *client, int num); /* search for next port - port is locked if found */ struct snd_seq_client_port *snd_seq_port_query_nearest(struct snd_seq_client *client, struct snd_seq_port_info *pinfo); /* unlock the port */ #define snd_seq_port_unlock(port) snd_use_lock_free(&(port)->use_lock) DEFINE_FREE(snd_seq_port, struct snd_seq_client_port *, if (!IS_ERR_OR_NULL(_T)) snd_seq_port_unlock(_T)) /* create a port, port number or a negative error code is returned */ int snd_seq_create_port(struct snd_seq_client *client, int port_index, struct snd_seq_client_port **port_ret); /* delete a port */ int snd_seq_delete_port(struct snd_seq_client *client, int port); /* delete all ports */ int snd_seq_delete_all_ports(struct snd_seq_client *client); /* set port info fields */ int snd_seq_set_port_info(struct snd_seq_client_port *port, struct snd_seq_port_info *info); /* get port info fields */ int snd_seq_get_port_info(struct snd_seq_client_port *port, struct snd_seq_port_info *info); /* add subscriber to subscription list */ int snd_seq_port_connect(struct snd_seq_client *caller, struct snd_seq_client *s, struct snd_seq_client_port *sp, struct snd_seq_client *d, struct snd_seq_client_port *dp, struct snd_seq_port_subscribe *info); /* remove subscriber from subscription list */ int snd_seq_port_disconnect(struct snd_seq_client *caller, struct snd_seq_client *s, struct snd_seq_client_port *sp, struct snd_seq_client *d, struct snd_seq_client_port *dp, struct snd_seq_port_subscribe *info); /* subscribe port */ int snd_seq_port_subscribe(struct snd_seq_client_port *port, struct snd_seq_port_subscribe *info); /* get matched subscriber */ int snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp, struct snd_seq_addr *dest_addr, struct snd_seq_port_subscribe *subs); #endif
3 4 1 1 1 4 3 4 3 4 4 1 1 1 1 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 // SPDX-License-Identifier: GPL-2.0-only /* * Cryptographic API * * Michael MIC (IEEE 802.11i/TKIP) keyed digest * * Copyright (c) 2004 Jouni Malinen <j@w1.fi> */ #include <crypto/internal/hash.h> #include <linux/unaligned.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/types.h> struct michael_mic_ctx { u32 l, r; }; struct michael_mic_desc_ctx { __le32 pending; size_t pending_len; u32 l, r; }; static inline u32 xswap(u32 val) { return ((val & 0x00ff00ff) << 8) | ((val & 0xff00ff00) >> 8); } #define michael_block(l, r) \ do { \ r ^= rol32(l, 17); \ l += r; \ r ^= xswap(l); \ l += r; \ r ^= rol32(l, 3); \ l += r; \ r ^= ror32(l, 2); \ l += r; \ } while (0) static int michael_init(struct shash_desc *desc) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); struct michael_mic_ctx *ctx = crypto_shash_ctx(desc->tfm); mctx->pending_len = 0; mctx->l = ctx->l; mctx->r = ctx->r; return 0; } static int michael_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); if (mctx->pending_len) { int flen = 4 - mctx->pending_len; if (flen > len) flen = len; memcpy((u8 *)&mctx->pending + mctx->pending_len, data, flen); mctx->pending_len += flen; data += flen; len -= flen; if (mctx->pending_len < 4) return 0; mctx->l ^= le32_to_cpu(mctx->pending); michael_block(mctx->l, mctx->r); mctx->pending_len = 0; } while (len >= 4) { mctx->l ^= get_unaligned_le32(data); michael_block(mctx->l, mctx->r); data += 4; len -= 4; } if (len > 0) { mctx->pending_len = len; memcpy(&mctx->pending, data, len); } return 0; } static int michael_final(struct shash_desc *desc, u8 *out) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); u8 *data = (u8 *)&mctx->pending; /* Last block and padding (0x5a, 4..7 x 0) */ switch (mctx->pending_len) { case 0: mctx->l ^= 0x5a; break; case 1: mctx->l ^= data[0] | 0x5a00; break; case 2: mctx->l ^= data[0] | (data[1] << 8) | 0x5a0000; break; case 3: mctx->l ^= data[0] | (data[1] << 8) | (data[2] << 16) | 0x5a000000; break; } michael_block(mctx->l, mctx->r); /* l ^= 0; */ michael_block(mctx->l, mctx->r); put_unaligned_le32(mctx->l, out); put_unaligned_le32(mctx->r, out + 4); return 0; } static int michael_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct michael_mic_ctx *mctx = crypto_shash_ctx(tfm); if (keylen != 8) return -EINVAL; mctx->l = get_unaligned_le32(key); mctx->r = get_unaligned_le32(key + 4); return 0; } static struct shash_alg alg = { .digestsize = 8, .setkey = michael_setkey, .init = michael_init, .update = michael_update, .final = michael_final, .descsize = sizeof(struct michael_mic_desc_ctx), .base = { .cra_name = "michael_mic", .cra_driver_name = "michael_mic-generic", .cra_blocksize = 8, .cra_ctxsize = sizeof(struct michael_mic_ctx), .cra_module = THIS_MODULE, } }; static int __init michael_mic_init(void) { return crypto_register_shash(&alg); } static void __exit michael_mic_exit(void) { crypto_unregister_shash(&alg); } module_init(michael_mic_init); module_exit(michael_mic_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Michael MIC"); MODULE_AUTHOR("Jouni Malinen <j@w1.fi>"); MODULE_ALIAS_CRYPTO("michael_mic");
13 13 6 16 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/posix_acl_xattr.h Extended attribute system call representation of Access Control Lists. Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> */ #ifndef _POSIX_ACL_XATTR_H #define _POSIX_ACL_XATTR_H #include <uapi/linux/xattr.h> #include <uapi/linux/posix_acl_xattr.h> #include <linux/posix_acl.h> static inline size_t posix_acl_xattr_size(int count) { return (sizeof(struct posix_acl_xattr_header) + (count * sizeof(struct posix_acl_xattr_entry))); } static inline int posix_acl_xattr_count(size_t size) { if (size < sizeof(struct posix_acl_xattr_header)) return -1; size -= sizeof(struct posix_acl_xattr_header); if (size % sizeof(struct posix_acl_xattr_entry)) return -1; return size / sizeof(struct posix_acl_xattr_entry); } #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); #else static inline struct posix_acl * posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size) { return ERR_PTR(-EOPNOTSUPP); } #endif int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); static inline const char *posix_acl_xattr_name(int type) { switch (type) { case ACL_TYPE_ACCESS: return XATTR_NAME_POSIX_ACL_ACCESS; case ACL_TYPE_DEFAULT: return XATTR_NAME_POSIX_ACL_DEFAULT; } return ""; } static inline int posix_acl_type(const char *name) { if (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) return ACL_TYPE_ACCESS; else if (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0) return ACL_TYPE_DEFAULT; return -1; } /* These are legacy handlers. Don't use them for new code. */ extern const struct xattr_handler nop_posix_acl_access; extern const struct xattr_handler nop_posix_acl_default; #endif /* _POSIX_ACL_XATTR_H */
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Public Key Signature Algorithm * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_SIG_H #define _CRYPTO_SIG_H #include <linux/crypto.h> /** * struct crypto_sig - user-instantiated objects which encapsulate * algorithms and core processing logic * * @base: Common crypto API algorithm data structure */ struct crypto_sig { struct crypto_tfm base; }; /** * struct sig_alg - generic public key signature algorithm * * @sign: Function performs a sign operation as defined by public key * algorithm. On success, the signature size is returned. * Optional. * @verify: Function performs a complete verify operation as defined by * public key algorithm, returning verification status. Optional. * @set_pub_key: Function invokes the algorithm specific set public key * function, which knows how to decode and interpret * the BER encoded public key and parameters. Mandatory. * @set_priv_key: Function invokes the algorithm specific set private key * function, which knows how to decode and interpret * the BER encoded private key and parameters. Optional. * @key_size: Function returns key size. Mandatory. * @digest_size: Function returns maximum digest size. Optional. * @max_size: Function returns maximum signature size. Optional. * @init: Initialize the cryptographic transformation object. * This function is used to initialize the cryptographic * transformation object. This function is called only once at * the instantiation time, right after the transformation context * was allocated. In case the cryptographic hardware has some * special requirements which need to be handled by software, this * function shall check for the precise requirement of the * transformation and put any software fallbacks in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * * @base: Common crypto API algorithm data structure */ struct sig_alg { int (*sign)(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen); int (*verify)(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *digest, unsigned int dlen); int (*set_pub_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); int (*set_priv_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); unsigned int (*key_size)(struct crypto_sig *tfm); unsigned int (*digest_size)(struct crypto_sig *tfm); unsigned int (*max_size)(struct crypto_sig *tfm); int (*init)(struct crypto_sig *tfm); void (*exit)(struct crypto_sig *tfm); struct crypto_alg base; }; /** * DOC: Generic Public Key Signature API * * The Public Key Signature API is used with the algorithms of type * CRYPTO_ALG_TYPE_SIG (listed as type "sig" in /proc/crypto) */ /** * crypto_alloc_sig() - allocate signature tfm handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * signing algorithm e.g. "ecdsa" * @type: specifies the type of the algorithm * @mask: specifies the mask for the algorithm * * Allocate a handle for public key signature algorithm. The returned struct * crypto_sig is the handle that is required for any subsequent * API invocation for signature operations. * * Return: allocated handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_sig_tfm(struct crypto_sig *tfm) { return &tfm->base; } static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_sig, base); } static inline struct sig_alg *__crypto_sig_alg(struct crypto_alg *alg) { return container_of(alg, struct sig_alg, base); } static inline struct sig_alg *crypto_sig_alg(struct crypto_sig *tfm) { return __crypto_sig_alg(crypto_sig_tfm(tfm)->__crt_alg); } /** * crypto_free_sig() - free signature tfm handle * * @tfm: signature tfm handle allocated with crypto_alloc_sig() * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_sig(struct crypto_sig *tfm) { crypto_destroy_tfm(tfm, crypto_sig_tfm(tfm)); } /** * crypto_sig_keysize() - Get key size * * Function returns the key size in bits. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->key_size(tfm); } /** * crypto_sig_digestsize() - Get maximum digest size * * Function returns the maximum digest size in bytes. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ static inline unsigned int crypto_sig_digestsize(struct crypto_sig *tfm) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->digest_size(tfm); } /** * crypto_sig_maxsize() - Get maximum signature size * * Function returns the maximum signature size in bytes. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ static inline unsigned int crypto_sig_maxsize(struct crypto_sig *tfm) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->max_size(tfm); } /** * crypto_sig_sign() - Invoke signing operation * * Function invokes the specific signing operation for a given algorithm * * @tfm: signature tfm handle allocated with crypto_alloc_sig() * @src: source buffer * @slen: source length * @dst: destination obuffer * @dlen: destination length * * Return: signature size on success; error code in case of error */ static inline int crypto_sig_sign(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->sign(tfm, src, slen, dst, dlen); } /** * crypto_sig_verify() - Invoke signature verification * * Function invokes the specific signature verification operation * for a given algorithm. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() * @src: source buffer * @slen: source length * @digest: digest * @dlen: digest length * * Return: zero on verification success; error code in case of error. */ static inline int crypto_sig_verify(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *digest, unsigned int dlen) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->verify(tfm, src, slen, digest, dlen); } /** * crypto_sig_set_pubkey() - Invoke set public key operation * * Function invokes the algorithm specific set key function, which knows * how to decode and interpret the encoded key and parameters * * @tfm: tfm handle * @key: BER encoded public key, algo OID, paramlen, BER encoded * parameters * @keylen: length of the key (not including other data) * * Return: zero on success; error code in case of error */ static inline int crypto_sig_set_pubkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->set_pub_key(tfm, key, keylen); } /** * crypto_sig_set_privkey() - Invoke set private key operation * * Function invokes the algorithm specific set key function, which knows * how to decode and interpret the encoded key and parameters * * @tfm: tfm handle * @key: BER encoded private key, algo OID, paramlen, BER encoded * parameters * @keylen: length of the key (not including other data) * * Return: zero on success; error code in case of error */ static inline int crypto_sig_set_privkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { struct sig_alg *alg = crypto_sig_alg(tfm); return alg->set_priv_key(tfm, key, keylen); } #endif
2 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 2 10 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 // SPDX-License-Identifier: GPL-2.0+ /* * IPv6 IOAM implementation * * Author: * Justin Iurman <justin.iurman@uliege.be> */ #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/net.h> #include <linux/ioam6.h> #include <linux/ioam6_genl.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/genetlink.h> #include <net/ioam6.h> #include <net/sch_generic.h> static void ioam6_ns_release(struct ioam6_namespace *ns) { kfree_rcu(ns, rcu); } static void ioam6_sc_release(struct ioam6_schema *sc) { kfree_rcu(sc, rcu); } static void ioam6_free_ns(void *ptr, void *arg) { struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; if (ns) ioam6_ns_release(ns); } static void ioam6_free_sc(void *ptr, void *arg) { struct ioam6_schema *sc = (struct ioam6_schema *)ptr; if (sc) ioam6_sc_release(sc); } static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ioam6_namespace *ns = obj; return (ns->id != *(__be16 *)arg->key); } static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ioam6_schema *sc = obj; return (sc->id != *(u32 *)arg->key); } static const struct rhashtable_params rht_ns_params = { .key_len = sizeof(__be16), .key_offset = offsetof(struct ioam6_namespace, id), .head_offset = offsetof(struct ioam6_namespace, head), .automatic_shrinking = true, .obj_cmpfn = ioam6_ns_cmpfn, }; static const struct rhashtable_params rht_sc_params = { .key_len = sizeof(u32), .key_offset = offsetof(struct ioam6_schema, id), .head_offset = offsetof(struct ioam6_schema, head), .automatic_shrinking = true, .obj_cmpfn = ioam6_sc_cmpfn, }; static struct genl_family ioam6_genl_family; static const struct nla_policy ioam6_genl_policy_addns[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, }; static const struct nla_policy ioam6_genl_policy_delns[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, }; static const struct nla_policy ioam6_genl_policy_addsc[] = { [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, .len = IOAM6_MAX_SCHEMA_DATA_LEN }, }; static const struct nla_policy ioam6_genl_policy_delsc[] = { [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, }; static const struct nla_policy ioam6_genl_policy_ns_sc[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, }; static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; u64 data64; u32 data32; __be16 id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID]) return -EINVAL; id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); if (ns) { err = -EEXIST; goto out_unlock; } ns = kzalloc(sizeof(*ns), GFP_KERNEL); if (!ns) { err = -ENOMEM; goto out_unlock; } ns->id = id; data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA], IOAM6_U32_UNAVAILABLE); data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE], IOAM6_U64_UNAVAILABLE); ns->data = cpu_to_be32(data32); ns->data_wide = cpu_to_be64(data64); err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, rht_ns_params); if (err) kfree(ns); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; struct ioam6_schema *sc; __be16 id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID]) return -EINVAL; id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); if (!ns) { err = -ENOENT; goto out_unlock; } sc = rcu_dereference_protected(ns->schema, lockdep_is_held(&nsdata->lock)); err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, rht_ns_params); if (err) goto out_unlock; if (sc) rcu_assign_pointer(sc->ns, NULL); ioam6_ns_release(ns); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { struct ioam6_schema *sc; u64 data64; u32 data32; void *hdr; hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); if (!hdr) return -ENOMEM; data32 = be32_to_cpu(ns->data); data64 = be64_to_cpu(ns->data_wide); if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || (data32 != IOAM6_U32_UNAVAILABLE && nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || (data64 != IOAM6_U64_UNAVAILABLE && nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, data64, IOAM6_ATTR_PAD))) goto nla_put_failure; rcu_read_lock(); sc = rcu_dereference(ns->schema); if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { rcu_read_unlock(); goto nla_put_failure; } rcu_read_unlock(); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int ioam6_genl_dumpns_start(struct netlink_callback *cb) { struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; if (!iter) { iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long)iter; } rhashtable_walk_enter(&nsdata->namespaces, iter); return 0; } static int ioam6_genl_dumpns_done(struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_exit(iter); kfree(iter); return 0; } static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter; struct ioam6_namespace *ns; int err; iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_start(iter); for (;;) { ns = rhashtable_walk_next(iter); if (IS_ERR(ns)) { if (PTR_ERR(ns) == -EAGAIN) continue; err = PTR_ERR(ns); goto done; } else if (!ns) { break; } err = __ioam6_genl_dumpns_element(ns, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, IOAM6_CMD_DUMP_NAMESPACES); if (err) goto done; } err = skb->len; done: rhashtable_walk_stop(iter); return err; } static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; int len, len_aligned, err; struct ioam6_schema *sc; u32 id; if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) return -EINVAL; id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); if (sc) { err = -EEXIST; goto out_unlock; } len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); len_aligned = ALIGN(len, 4); sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); if (!sc) { err = -ENOMEM; goto out_unlock; } sc->id = id; sc->len = len_aligned; sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, rht_sc_params); if (err) goto free_sc; out_unlock: mutex_unlock(&nsdata->lock); return err; free_sc: kfree(sc); goto out_unlock; } static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; struct ioam6_schema *sc; int err; u32 id; if (!info->attrs[IOAM6_ATTR_SC_ID]) return -EINVAL; id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); if (!sc) { err = -ENOENT; goto out_unlock; } ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, rht_sc_params); if (err) goto out_unlock; if (ns) rcu_assign_pointer(ns->schema, NULL); ioam6_sc_release(sc); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { struct ioam6_namespace *ns; void *hdr; hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); if (!hdr) return -ENOMEM; if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) goto nla_put_failure; rcu_read_lock(); ns = rcu_dereference(sc->ns); if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { rcu_read_unlock(); goto nla_put_failure; } rcu_read_unlock(); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) { struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; if (!iter) { iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long)iter; } rhashtable_walk_enter(&nsdata->schemas, iter); return 0; } static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_exit(iter); kfree(iter); return 0; } static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter; struct ioam6_schema *sc; int err; iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_start(iter); for (;;) { sc = rhashtable_walk_next(iter); if (IS_ERR(sc)) { if (PTR_ERR(sc) == -EAGAIN) continue; err = PTR_ERR(sc); goto done; } else if (!sc) { break; } err = __ioam6_genl_dumpsc_element(sc, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, IOAM6_CMD_DUMP_SCHEMAS); if (err) goto done; } err = skb->len; done: rhashtable_walk_stop(iter); return err; } static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) { struct ioam6_namespace *ns, *ns_ref; struct ioam6_schema *sc, *sc_ref; struct ioam6_pernet_data *nsdata; __be16 ns_id; u32 sc_id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID] || (!info->attrs[IOAM6_ATTR_SC_ID] && !info->attrs[IOAM6_ATTR_SC_NONE])) return -EINVAL; ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); if (!ns) { err = -ENOENT; goto out_unlock; } if (info->attrs[IOAM6_ATTR_SC_NONE]) { sc = NULL; } else { sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, rht_sc_params); if (!sc) { err = -ENOENT; goto out_unlock; } } sc_ref = rcu_dereference_protected(ns->schema, lockdep_is_held(&nsdata->lock)); if (sc_ref) rcu_assign_pointer(sc_ref->ns, NULL); rcu_assign_pointer(ns->schema, sc); if (sc) { ns_ref = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); if (ns_ref) rcu_assign_pointer(ns_ref->schema, NULL); rcu_assign_pointer(sc->ns, ns); } err = 0; out_unlock: mutex_unlock(&nsdata->lock); return err; } static const struct genl_ops ioam6_genl_ops[] = { { .cmd = IOAM6_CMD_ADD_NAMESPACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_addns, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_addns, .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, }, { .cmd = IOAM6_CMD_DEL_NAMESPACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_delns, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_delns, .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, }, { .cmd = IOAM6_CMD_DUMP_NAMESPACES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = ioam6_genl_dumpns_start, .dumpit = ioam6_genl_dumpns, .done = ioam6_genl_dumpns_done, .flags = GENL_ADMIN_PERM, }, { .cmd = IOAM6_CMD_ADD_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_addsc, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_addsc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, }, { .cmd = IOAM6_CMD_DEL_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_delsc, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_delsc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, }, { .cmd = IOAM6_CMD_DUMP_SCHEMAS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = ioam6_genl_dumpsc_start, .dumpit = ioam6_genl_dumpsc, .done = ioam6_genl_dumpsc_done, .flags = GENL_ADMIN_PERM, }, { .cmd = IOAM6_CMD_NS_SET_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_ns_set_schema, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_ns_sc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, }, }; #define IOAM6_GENL_EV_GRP_OFFSET 0 static const struct genl_multicast_group ioam6_mcgrps[] = { [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME, .flags = GENL_MCAST_CAP_NET_ADMIN }, }; static int ioam6_event_put_trace(struct sk_buff *skb, struct ioam6_trace_hdr *trace, unsigned int len) { if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE, be16_to_cpu(trace->namespace_id)) || nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) || nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE, be32_to_cpu(trace->type_be32)) || nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA, len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4, trace->data + trace->remlen * 4)) return 1; return 0; } void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, void *opt, unsigned int opt_len) { struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&ioam6_genl_family, net, IOAM6_GENL_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type); if (!nlh) goto nla_put_failure; switch (type) { case IOAM6_EVENT_UNSPEC: WARN_ON_ONCE(1); break; case IOAM6_EVENT_TRACE: if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt, opt_len)) goto nla_put_failure; break; } genlmsg_end(skb, nlh); genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0, IOAM6_GENL_EV_GRP_OFFSET, gfp); return; nla_put_failure: nlmsg_free(skb); } static struct genl_family ioam6_genl_family __ro_after_init = { .name = IOAM6_GENL_NAME, .version = IOAM6_GENL_VERSION, .netnsok = true, .parallel_ops = true, .ops = ioam6_genl_ops, .n_ops = ARRAY_SIZE(ioam6_genl_ops), .resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1, .mcgrps = ioam6_mcgrps, .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps), .module = THIS_MODULE, }; struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) { struct ioam6_pernet_data *nsdata = ioam6_pernet(net); return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); } static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, u8 sclen, bool is_input) { struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; ktime_t tstamp; u64 raw64; u32 raw32; u16 raw16; u8 *data; u8 byte; data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; if (is_input) byte--; raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id; *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); } /* ingress_if_id and egress_if_id */ if (trace->type.bit1) { if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); if (dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); } /* timestamp seconds */ if (trace->type.bit2) { if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { tstamp = skb_tstamp_cond(skb, true); ts = ktime_to_timespec64(tstamp); *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); } data += sizeof(__be32); } /* timestamp subseconds */ if (trace->type.bit3) { if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { if (!trace->type.bit2) { tstamp = skb_tstamp_cond(skb, true); ts = ktime_to_timespec64(tstamp); } *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC)); } data += sizeof(__be32); } /* transit delay */ if (trace->type.bit4) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* namespace data */ if (trace->type.bit5) { *(__be32 *)data = ns->data; data += sizeof(__be32); } /* queue depth */ if (trace->type.bit6) { struct netdev_queue *queue; struct Qdisc *qdisc; __u32 qlen, backlog; if (dev->flags & IFF_LOOPBACK) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { queue = skb_get_tx_queue(dev, skb); qdisc = rcu_dereference(queue->qdisc); qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); *(__be32 *)data = cpu_to_be32(backlog); } data += sizeof(__be32); } /* checksum complement */ if (trace->type.bit7) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* hop_lim and node_id (wide) */ if (trace->type.bit8) { byte = ipv6_hdr(skb)->hop_limit; if (is_input) byte--; raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide; *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); } /* ingress_if_id and egress_if_id (wide) */ if (trace->type.bit9) { if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); if (dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); } /* namespace data (wide) */ if (trace->type.bit10) { *(__be64 *)data = ns->data_wide; data += sizeof(__be64); } /* buffer occupancy */ if (trace->type.bit11) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit12 undefined: filled with empty value */ if (trace->type.bit12) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit13 undefined: filled with empty value */ if (trace->type.bit13) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit14 undefined: filled with empty value */ if (trace->type.bit14) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit15 undefined: filled with empty value */ if (trace->type.bit15) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit16 undefined: filled with empty value */ if (trace->type.bit16) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit17 undefined: filled with empty value */ if (trace->type.bit17) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit18 undefined: filled with empty value */ if (trace->type.bit18) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit19 undefined: filled with empty value */ if (trace->type.bit19) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit20 undefined: filled with empty value */ if (trace->type.bit20) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit21 undefined: filled with empty value */ if (trace->type.bit21) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* opaque state snapshot */ if (trace->type.bit22) { if (!sc) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); } else { *(__be32 *)data = sc->hdr; data += sizeof(__be32); memcpy(data, sc->data, sc->len); } } } /* called with rcu_read_lock() */ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, bool is_input) { struct ioam6_schema *sc; u8 sclen = 0; /* Skip if Overflow flag is set */ if (trace->overflow) return; /* NodeLen does not include Opaque State Snapshot length. We need to * take it into account if the corresponding bit is set (bit 22) and * if the current IOAM namespace has an active schema attached to it */ sc = rcu_dereference(ns->schema); if (trace->type.bit22) { sclen = sizeof_field(struct ioam6_schema, hdr) / 4; if (sc) sclen += sc->len / 4; } /* If there is no space remaining, we set the Overflow flag and we * skip without filling the trace */ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { trace->overflow = 1; return; } __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input); trace->remlen -= trace->nodelen + sclen; } static int __net_init ioam6_net_init(struct net *net) { struct ioam6_pernet_data *nsdata; int err = -ENOMEM; nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); if (!nsdata) goto out; mutex_init(&nsdata->lock); net->ipv6.ioam6_data = nsdata; err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); if (err) goto free_nsdata; err = rhashtable_init(&nsdata->schemas, &rht_sc_params); if (err) goto free_rht_ns; out: return err; free_rht_ns: rhashtable_destroy(&nsdata->namespaces); free_nsdata: kfree(nsdata); net->ipv6.ioam6_data = NULL; goto out; } static void __net_exit ioam6_net_exit(struct net *net) { struct ioam6_pernet_data *nsdata = ioam6_pernet(net); rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); kfree(nsdata); } static struct pernet_operations ioam6_net_ops = { .init = ioam6_net_init, .exit = ioam6_net_exit, }; int __init ioam6_init(void) { int err = register_pernet_subsys(&ioam6_net_ops); if (err) goto out; err = genl_register_family(&ioam6_genl_family); if (err) goto out_unregister_pernet_subsys; #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL err = ioam6_iptunnel_init(); if (err) goto out_unregister_genl; #endif pr_info("In-situ OAM (IOAM) with IPv6\n"); out: return err; #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL out_unregister_genl: genl_unregister_family(&ioam6_genl_family); #endif out_unregister_pernet_subsys: unregister_pernet_subsys(&ioam6_net_ops); goto out; } void ioam6_exit(void) { #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL ioam6_iptunnel_exit(); #endif genl_unregister_family(&ioam6_genl_family); unregister_pernet_subsys(&ioam6_net_ops); }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Weighted Least-Connection Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Changes: * Wensong Zhang : changed the ip_vs_wlc_schedule to return dest * Wensong Zhang : changed to use the inactconns in scheduling * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_wlc_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> /* * Weighted Least Connection scheduling */ static struct ip_vs_dest * ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); /* * We calculate the load of each dest server as follows: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! * The comparison of h1*w2 > h2*w1 is equivalent to that of * h1/w1 > h2/w2 * if every weight is larger than zero. * * The server with weight=0 is quiesced and will not receive any * new connections. */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } ip_vs_scheduler_err(svc, "no destination available"); return NULL; /* * Find the destination with the least load. */ nextstage: list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); if ((__s64)loh * atomic_read(&dest->weight) > (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } } IP_VS_DBG_BUF(6, "WLC: server %s:%u " "activeconns %d refcnt %d weight %d overhead %d\n", IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; } static struct ip_vs_scheduler ip_vs_wlc_scheduler = { .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), .schedule = ip_vs_wlc_schedule, }; static int __init ip_vs_wlc_init(void) { return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); synchronize_rcu(); } module_init(ip_vs_wlc_init); module_exit(ip_vs_wlc_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
1337 4 22766 9849 11149 11184 9563 9662 9568 11133 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_CONTEXT_H #define __X86_KERNEL_FPU_CONTEXT_H #include <asm/fpu/xstate.h> #include <asm/trace/fpu.h> /* Functions related to FPU context tracking */ /* * The in-register FPU state for an FPU context on a CPU is assumed to be * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx * matches the FPU. * * If the FPU register state is valid, the kernel can skip restoring the * FPU state from memory. * * Any code that clobbers the FPU registers or updates the in-memory * FPU state for a task MUST let the rest of the kernel know that the * FPU registers are no longer valid for this task. * * Invalidate a resource you control: CPU if using the CPU for something else * (with preemption disabled), FPU for the current task, or a task that * is prevented from running by the current task. */ static inline void __cpu_invalidate_fpregs_state(void) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) { fpu->last_cpu = -1; } static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } static inline void fpregs_deactivate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } /* Internal helper for switch_fpu_return() and signal frame setup */ static inline void fpregs_restore_userregs(void) { struct fpu *fpu = x86_task_fpu(current); int cpu = smp_processor_id(); if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { /* * This restores _all_ xstate which has not been * established yet. * * If PKRU is enabled, then the PKRU value is already * correct because it was either set in switch_to() or in * flush_thread(). So it is excluded because it might be * not up to date in current->thread.fpu->xsave state. * * XFD state is handled in restore_fpregs_from_fpstate(). */ restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); fpregs_activate(fpu); fpu->last_cpu = cpu; } clear_thread_flag(TIF_NEED_FPU_LOAD); } #endif
9 9 9 9 7 9 9 3 2 2 2 2 3 10 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * seq_oss_writeq.c - write queue and sync * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_writeq.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "../seq_lock.h" #include "../seq_clientmgr.h" #include <linux/wait.h> #include <linux/slab.h> #include <linux/sched/signal.h> /* * create a write queue record */ struct seq_oss_writeq * snd_seq_oss_writeq_new(struct seq_oss_devinfo *dp, int maxlen) { struct seq_oss_writeq *q; struct snd_seq_client_pool pool; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->dp = dp; q->maxlen = maxlen; spin_lock_init(&q->sync_lock); q->sync_event_put = 0; q->sync_time = 0; init_waitqueue_head(&q->sync_sleep); memset(&pool, 0, sizeof(pool)); pool.client = dp->cseq; pool.output_pool = maxlen; pool.output_room = maxlen / 2; snd_seq_oss_control(dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); return q; } /* * delete the write queue */ void snd_seq_oss_writeq_delete(struct seq_oss_writeq *q) { if (q) { snd_seq_oss_writeq_clear(q); /* to be sure */ kfree(q); } } /* * reset the write queue */ void snd_seq_oss_writeq_clear(struct seq_oss_writeq *q) { struct snd_seq_remove_events reset; memset(&reset, 0, sizeof(reset)); reset.remove_mode = SNDRV_SEQ_REMOVE_OUTPUT; /* remove all */ snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_REMOVE_EVENTS, &reset); /* wake up sleepers if any */ snd_seq_oss_writeq_wakeup(q, 0); } /* * wait until the write buffer has enough room */ int snd_seq_oss_writeq_sync(struct seq_oss_writeq *q) { struct seq_oss_devinfo *dp = q->dp; abstime_t time; time = snd_seq_oss_timer_cur_tick(dp->timer); if (q->sync_time >= time) return 0; /* already finished */ if (! q->sync_event_put) { struct snd_seq_event ev; union evrec *rec; /* put echoback event */ memset(&ev, 0, sizeof(ev)); ev.flags = 0; ev.type = SNDRV_SEQ_EVENT_ECHO; ev.time.tick = time; /* echo back to itself */ snd_seq_oss_fill_addr(dp, &ev, dp->addr.client, dp->addr.port); rec = (union evrec *)&ev.data; rec->t.code = SEQ_SYNCTIMER; rec->t.time = time; q->sync_event_put = 1; snd_seq_kernel_client_enqueue(dp->cseq, &ev, NULL, true); } wait_event_interruptible_timeout(q->sync_sleep, ! q->sync_event_put, HZ); if (signal_pending(current)) /* interrupted - return 0 to finish sync */ q->sync_event_put = 0; if (! q->sync_event_put || q->sync_time >= time) return 0; return 1; } /* * wake up sync - echo event was catched */ void snd_seq_oss_writeq_wakeup(struct seq_oss_writeq *q, abstime_t time) { guard(spinlock_irqsave)(&q->sync_lock); q->sync_time = time; q->sync_event_put = 0; wake_up(&q->sync_sleep); } /* * return the unused pool size */ int snd_seq_oss_writeq_get_free_size(struct seq_oss_writeq *q) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); return pool.output_free; } /* * set output threshold size from ioctl */ void snd_seq_oss_writeq_set_output(struct seq_oss_writeq *q, int val) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); pool.output_room = val; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); }
79 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_STAT_H #define BLK_STAT_H #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/ktime.h> #include <linux/rcupdate.h> #include <linux/timer.h> /** * struct blk_stat_callback - Block statistics callback. * * A &struct blk_stat_callback is associated with a &struct request_queue. While * @timer is active, that queue's request completion latencies are sorted into * buckets by @bucket_fn and added to a per-cpu buffer, @cpu_stat. When the * timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked. */ struct blk_stat_callback { /* * @list: RCU list of callbacks for a &struct request_queue. */ struct list_head list; /** * @timer: Timer for the next callback invocation. */ struct timer_list timer; /** * @cpu_stat: Per-cpu statistics buckets. */ struct blk_rq_stat __percpu *cpu_stat; /** * @bucket_fn: Given a request, returns which statistics bucket it * should be accounted under. Return -1 for no bucket for this * request. */ int (*bucket_fn)(const struct request *); /** * @buckets: Number of statistics buckets. */ unsigned int buckets; /** * @stat: Array of statistics buckets. */ struct blk_rq_stat *stat; /** * @fn: Callback function. */ void (*timer_fn)(struct blk_stat_callback *); /** * @data: Private pointer for the user. */ void *data; struct rcu_head rcu; }; struct blk_queue_stats *blk_alloc_queue_stats(void); void blk_free_queue_stats(struct blk_queue_stats *); void blk_stat_add(struct request *rq, u64 now); /* record time/size info in request but not add a callback */ void blk_stat_enable_accounting(struct request_queue *q); void blk_stat_disable_accounting(struct request_queue *q); /** * blk_stat_alloc_callback() - Allocate a block statistics callback. * @timer_fn: Timer callback function. * @bucket_fn: Bucket callback function. * @buckets: Number of statistics buckets. * @data: Value for the @data field of the &struct blk_stat_callback. * * See &struct blk_stat_callback for details on the callback functions. * * Return: &struct blk_stat_callback on success or NULL on ENOMEM. */ struct blk_stat_callback * blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *), int (*bucket_fn)(const struct request *), unsigned int buckets, void *data); /** * blk_stat_add_callback() - Add a block statistics callback to be run on a * request queue. * @q: The request queue. * @cb: The callback. * * Note that a single &struct blk_stat_callback can only be added to a single * &struct request_queue. */ void blk_stat_add_callback(struct request_queue *q, struct blk_stat_callback *cb); /** * blk_stat_remove_callback() - Remove a block statistics callback from a * request queue. * @q: The request queue. * @cb: The callback. * * When this returns, the callback is not running on any CPUs and will not be * called again unless readded. */ void blk_stat_remove_callback(struct request_queue *q, struct blk_stat_callback *cb); /** * blk_stat_free_callback() - Free a block statistics callback. * @cb: The callback. * * @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must * not be associated with a request queue. I.e., if it was previously added with * blk_stat_add_callback(), it must also have been removed since then with * blk_stat_remove_callback(). */ void blk_stat_free_callback(struct blk_stat_callback *cb); /** * blk_stat_is_active() - Check if a block statistics callback is currently * gathering statistics. * @cb: The callback. */ static inline bool blk_stat_is_active(struct blk_stat_callback *cb) { return timer_pending(&cb->timer); } /** * blk_stat_activate_nsecs() - Gather block statistics during a time window in * nanoseconds. * @cb: The callback. * @nsecs: Number of nanoseconds to gather statistics for. * * The timer callback will be called when the window expires. */ static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb, u64 nsecs) { mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs)); } static inline void blk_stat_deactivate(struct blk_stat_callback *cb) { timer_delete_sync(&cb->timer); } /** * blk_stat_activate_msecs() - Gather block statistics during a time window in * milliseconds. * @cb: The callback. * @msecs: Number of milliseconds to gather statistics for. * * The timer callback will be called when the window expires. */ static inline void blk_stat_activate_msecs(struct blk_stat_callback *cb, unsigned int msecs) { mod_timer(&cb->timer, jiffies + msecs_to_jiffies(msecs)); } void blk_rq_stat_add(struct blk_rq_stat *, u64); void blk_rq_stat_sum(struct blk_rq_stat *, struct blk_rq_stat *); void blk_rq_stat_init(struct blk_rq_stat *); #endif
1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> */ #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_SYNPROXY.h> #include <net/netfilter/nf_synproxy.h> static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); if (th == NULL) return NF_DROP; if (!synproxy_parse_options(skb, par->thoff, th, &opts)) return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ this_cpu_inc(snet->stats->syn_received); if (th->ece && th->cwr) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; opts.mss_encode = opts.mss_option; opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack_ipv6(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts, ntohl(th->seq))) { consume_skb(skb); return NF_STOLEN; } else { return NF_DROP; } } return XT_CONTINUE; } static int synproxy_tg6_check(const struct xt_tgchk_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); const struct ip6t_entry *e = par->entryinfo; int err; if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; err = nf_ct_netns_get(par->net, par->family); if (err) return err; err = nf_synproxy_ipv6_init(snet, par->net); if (err) { nf_ct_netns_put(par->net, par->family); return err; } return err; } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); nf_synproxy_ipv6_fini(snet, par->net); nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg6_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg6, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg6_check, .destroy = synproxy_tg6_destroy, .me = THIS_MODULE, }; static int __init synproxy_tg6_init(void) { return xt_register_target(&synproxy_tg6_reg); } static void __exit synproxy_tg6_exit(void) { xt_unregister_target(&synproxy_tg6_reg); } module_init(synproxy_tg6_init); module_exit(synproxy_tg6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies");
26 26 26 25 26 26 21 5 26 23 18 25 3 3 2 2 2 26 23 23 8 18 4 4 8 5 1 21 18 26 26 3 25 21 20 26 26 26 28 26 28 28 28 28 28 26 28 26 3 28 1 1 1 1 28 24 29 29 29 29 29 5 5 5 1 1 1 1 1 1 24 22 23 22 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 11 12 13 13 13 4 13 13 13 4 2 2 2 2 2 2 2 2 2 2 2 2 2 13 13 13 13 13 13 13 13 13 13 1 12 12 13 3 2 2 2 1 10 11 13 13 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 // SPDX-License-Identifier: GPL-2.0-only /* * Scanning implementation * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/sch_generic.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/random.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "mesh.h" #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) #define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 9) void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss) { if (!bss) return; cfg80211_put_bss(local->hw.wiphy, container_of((void *)bss, struct cfg80211_bss, priv)); } static bool is_uapsd_supported(struct ieee802_11_elems *elems) { u8 qos_info; if (elems->wmm_info && elems->wmm_info_len == 7 && elems->wmm_info[5] == 1) qos_info = elems->wmm_info[6]; else if (elems->wmm_param && elems->wmm_param_len == 24 && elems->wmm_param[5] == 1) qos_info = elems->wmm_param[6]; else /* no valid wmm information or parameter element found */ return false; return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD; } struct inform_bss_update_data { struct ieee80211_rx_status *rx_status; bool beacon; }; void ieee80211_inform_bss(struct wiphy *wiphy, struct cfg80211_bss *cbss, const struct cfg80211_bss_ies *ies, void *data) { struct ieee80211_local *local = wiphy_priv(wiphy); struct inform_bss_update_data *update_data = data; struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_rx_status *rx_status; struct ieee802_11_elems *elems; int clen, srlen; /* This happens while joining an IBSS */ if (!update_data) return; elems = ieee802_11_parse_elems(ies->data, ies->len, false, NULL); if (!elems) return; rx_status = update_data->rx_status; if (update_data->beacon) bss->device_ts_beacon = rx_status->device_timestamp; else bss->device_ts_presp = rx_status->device_timestamp; if (elems->parse_error) { if (update_data->beacon) bss->corrupt_data |= IEEE80211_BSS_CORRUPT_BEACON; else bss->corrupt_data |= IEEE80211_BSS_CORRUPT_PROBE_RESP; } else { if (update_data->beacon) bss->corrupt_data &= ~IEEE80211_BSS_CORRUPT_BEACON; else bss->corrupt_data &= ~IEEE80211_BSS_CORRUPT_PROBE_RESP; } /* save the ERP value so that it is available at association time */ if (elems->erp_info && (!elems->parse_error || !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) { bss->erp_value = elems->erp_info[0]; bss->has_erp_value = true; if (!elems->parse_error) bss->valid_data |= IEEE80211_BSS_VALID_ERP; } /* replace old supported rates if we get new values */ if (!elems->parse_error || !(bss->valid_data & IEEE80211_BSS_VALID_RATES)) { srlen = 0; if (elems->supp_rates) { clen = IEEE80211_MAX_SUPP_RATES; if (clen > elems->supp_rates_len) clen = elems->supp_rates_len; memcpy(bss->supp_rates, elems->supp_rates, clen); srlen += clen; } if (elems->ext_supp_rates) { clen = IEEE80211_MAX_SUPP_RATES - srlen; if (clen > elems->ext_supp_rates_len) clen = elems->ext_supp_rates_len; memcpy(bss->supp_rates + srlen, elems->ext_supp_rates, clen); srlen += clen; } if (srlen) { bss->supp_rates_len = srlen; if (!elems->parse_error) bss->valid_data |= IEEE80211_BSS_VALID_RATES; } } if (!elems->parse_error || !(bss->valid_data & IEEE80211_BSS_VALID_WMM)) { bss->wmm_used = elems->wmm_param || elems->wmm_info; bss->uapsd_supported = is_uapsd_supported(elems); if (!elems->parse_error) bss->valid_data |= IEEE80211_BSS_VALID_WMM; } if (update_data->beacon) { struct ieee80211_supported_band *sband = local->hw.wiphy->bands[rx_status->band]; if (!(rx_status->encoding == RX_ENC_HT) && !(rx_status->encoding == RX_ENC_VHT)) bss->beacon_rate = &sband->bitrates[rx_status->rate_idx]; } if (elems->vht_cap_elem) bss->vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); else bss->vht_cap_info = 0; kfree(elems); } struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_channel *channel) { bool beacon = ieee80211_is_beacon(mgmt->frame_control) || ieee80211_is_s1g_beacon(mgmt->frame_control); struct cfg80211_bss *cbss; struct inform_bss_update_data update_data = { .rx_status = rx_status, .beacon = beacon, }; struct cfg80211_inform_bss bss_meta = { .boottime_ns = rx_status->boottime_ns, .drv_data = (void *)&update_data, }; bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL) bss_meta.signal = 0; /* invalid signal indication */ else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) bss_meta.signal = rx_status->signal * 100; else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; bss_meta.chan = channel; rcu_read_lock(); scan_sdata = rcu_dereference(local->scan_sdata); if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION && scan_sdata->vif.cfg.assoc && ieee80211_have_rx_timestamp(rx_status)) { struct ieee80211_bss_conf *link_conf = NULL; /* for an MLO connection, set the TSF data only in case we have * an indication on which of the links the frame was received */ if (ieee80211_vif_is_mld(&scan_sdata->vif)) { if (rx_status->link_valid) { s8 link_id = rx_status->link_id; link_conf = rcu_dereference(scan_sdata->vif.link_conf[link_id]); } } else { link_conf = &scan_sdata->vif.bss_conf; } if (link_conf) { bss_meta.parent_tsf = ieee80211_calculate_rx_timestamp(local, rx_status, len + FCS_LEN, 24); ether_addr_copy(bss_meta.parent_bssid, link_conf->bssid); } } rcu_read_unlock(); cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt, len, GFP_ATOMIC); if (!cbss) return NULL; /* In case the signal is invalid update the status */ signal_valid = channel == cbss->channel; if (!signal_valid) rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; return (void *)cbss->priv; } static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, u32 scan_flags, const u8 *da) { struct ieee80211_link_data *link_sdata; u8 link_id; if (!sdata) return false; /* accept broadcast on 6 GHz and for OCE */ if (is_broadcast_ether_addr(da) && (channel->band == NL80211_BAND_6GHZ || scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP)) return true; if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) return true; if (ether_addr_equal(da, sdata->vif.addr)) return true; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { link_sdata = rcu_dereference(sdata->link[link_id]); if (!link_sdata) continue; if (ether_addr_equal(da, link_sdata->conf->addr)) return true; } return false; } void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; struct ieee80211_ext *ext; size_t min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); if (!ieee80211_is_probe_resp(mgmt->frame_control) && !ieee80211_is_beacon(mgmt->frame_control) && !ieee80211_is_s1g_beacon(mgmt->frame_control)) return; if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (struct ieee80211_ext *)mgmt; min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + ieee80211_s1g_optional_len(ext->frame_control); } if (skb->len < min_hdr_len) return; if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) { /* * we were passive scanning because of radar/no-IR, but * the beacon/proberesp rx gives us an opportunity to upgrade * to active scan */ set_bit(SCAN_BEACON_DONE, &local->scanning); wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } channel = ieee80211_get_channel_khz(local->hw.wiphy, ieee80211_rx_status_to_khz(rx_status)); if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct ieee80211_sub_if_data *sdata1, *sdata2; struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; u32 scan_req_flags = 0, sched_scan_req_flags = 0; sdata1 = rcu_dereference(local->scan_sdata); sdata2 = rcu_dereference(local->sched_scan_sdata); if (likely(!sdata1 && !sdata2)) return; scan_req = rcu_dereference(local->scan_req); sched_scan_req = rcu_dereference(local->sched_scan_req); if (scan_req) scan_req_flags = scan_req->flags; if (sched_scan_req) sched_scan_req_flags = sched_scan_req->flags; /* ignore ProbeResp to foreign address or non-bcast (OCE) * unless scanning with randomised address */ if (!ieee80211_scan_accept_presp(sdata1, channel, scan_req_flags, mgmt->da) && !ieee80211_scan_accept_presp(sdata2, channel, sched_scan_req_flags, mgmt->da)) return; } else { /* Beacons are expected only with broadcast address */ if (!is_broadcast_ether_addr(mgmt->da)) return; } /* Do not update the BSS table in case of only monitor interfaces */ if (local->open_count == local->monitors) return; bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, channel); if (bss) ieee80211_rx_bss_put(local, bss); } static void ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef) { memset(chandef, 0, sizeof(*chandef)); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; } /* return false if no more work */ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct cfg80211_scan_request *req; struct cfg80211_chan_def chandef; u8 bands_used = 0; int i, ielen; u32 *n_chans; u32 flags = 0; req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { local->hw_scan_req->req.n_channels = req->n_channels; for (i = 0; i < req->n_channels; i++) { local->hw_scan_req->req.channels[i] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); } } else { do { if (local->hw_scan_band == NUM_NL80211_BANDS) return false; n_chans = &local->hw_scan_req->req.n_channels; *n_chans = 0; for (i = 0; i < req->n_channels; i++) { if (req->channels[i]->band != local->hw_scan_band) continue; local->hw_scan_req->req.channels[(*n_chans)++] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); } local->hw_scan_band++; } while (!*n_chans); } ieee80211_prepare_scan_chandef(&chandef); if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; ielen = ieee80211_build_preq_ies(sdata, (u8 *)local->hw_scan_req->req.ie, local->hw_scan_ies_bufsize, &local->hw_scan_req->ies, req->ie, req->ie_len, bands_used, req->rates, &chandef, flags); if (ielen < 0) return false; local->hw_scan_req->req.ie_len = ielen; local->hw_scan_req->req.no_cck = req->no_cck; ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr); ether_addr_copy(local->hw_scan_req->req.mac_addr_mask, req->mac_addr_mask); ether_addr_copy(local->hw_scan_req->req.bssid, req->bssid); return true; } static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); bool hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); bool was_scanning = local->scanning; struct cfg80211_scan_request *scan_req; struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_sub_if_data *sdata; lockdep_assert_wiphy(local->hw.wiphy); /* * It's ok to abort a not-yet-running scan (that * we have one at all will be verified by checking * local->scan_req next), but not to complete it * successfully. */ if (WARN_ON(!local->scanning && !aborted)) aborted = true; if (WARN_ON(!local->scan_req)) return; scan_sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)); if (hw_scan && !aborted && !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) && ieee80211_prep_hw_scan(scan_sdata)) { int rc; rc = drv_hw_scan(local, rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)), local->hw_scan_req); if (rc == 0) return; /* HW scan failed and is going to be reported as aborted, * so clear old scan info. */ memset(&local->scan_info, 0, sizeof(local->scan_info)); aborted = true; } kfree(local->hw_scan_req); local->hw_scan_req = NULL; scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; synchronize_rcu(); if (scan_req != local->int_scan_req) { local->scan_info.aborted = aborted; cfg80211_scan_done(scan_req, &local->scan_info); } /* Set power back to normal operating levels. */ ieee80211_hw_conf_chan(local); if (!hw_scan && was_scanning) { ieee80211_configure_filter(local); drv_sw_scan_complete(local, scan_sdata); ieee80211_offchannel_return(local); } ieee80211_recalc_idle(local); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); /* Requeue all the work that might have been ignored while * the scan was in progress; if there was none this will * just be a no-op for the particular interface. */ list_for_each_entry(sdata, &local->interfaces, list) { if (ieee80211_sdata_running(sdata)) wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } if (was_scanning) ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, struct cfg80211_scan_info *info) { struct ieee80211_local *local = hw_to_local(hw); trace_api_scan_completed(local, info->aborted); set_bit(SCAN_COMPLETED, &local->scanning); if (info->aborted) set_bit(SCAN_ABORTED, &local->scanning); memcpy(&local->scan_info, info, sizeof(*info)); wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } EXPORT_SYMBOL(ieee80211_scan_completed); static int ieee80211_start_sw_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { /* Software scan is not supported in multi-channel cases */ if (!local->emulate_chanctx) return -EOPNOTSUPP; /* * Hardware/driver doesn't support hw_scan, so use software * scanning instead. First send a nullfunc frame with power save * bit on so that AP will buffer the frames for us while we are not * listening, then send probe requests to each channel and wait for * the responses. After all channels are scanned, tune back to the * original channel and send a nullfunc frame with power save bit * off to trigger the AP to send us all the buffered frames. * * Note that while local->sw_scanning is true everything else but * nullfunc frames and probe requests will be dropped in * ieee80211_tx_h_check_assoc(). */ drv_sw_scan_start(local, sdata, local->scan_addr); local->leave_oper_channel_time = jiffies; local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ ieee80211_flush_queues(local, NULL, false); ieee80211_configure_filter(local); /* We need to set power level at maximum rate for scanning. */ ieee80211_hw_conf_chan(local); wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); return 0; } static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; unsigned int link_id; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_is_radar_required(local, req)) return true; if (!regulatory_pre_cac_allowed(local->hw.wiphy)) return false; list_for_each_entry(sdata_iter, &local->interfaces, list) { for_each_valid_link(&sdata_iter->wdev, link_id) if (sdata_iter->wdev.links[link_id].cac_started) return false; } return true; } static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { if (!__ieee80211_can_leave_ch(sdata, req)) return false; if (!list_empty(&local->roc_list)) return false; if (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.flags & IEEE80211_STA_CONNECTION_POLL) return false; return true; } void ieee80211_run_deferred_scan(struct ieee80211_local *local) { struct cfg80211_scan_request *req; lockdep_assert_wiphy(local->hw.wiphy); if (!local->scan_req || local->scanning) return; req = wiphy_dereference(local->hw.wiphy, local->scan_req); if (!ieee80211_can_scan(local, rcu_dereference_protected( local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)), req)) return; wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, round_jiffies_relative(0)); } static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, const u8 *src, const u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, u32 ratemask, u32 flags, u32 tx_flags, struct ieee80211_channel *channel) { struct sk_buff *skb; skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel, ssid, ssid_len, ie, ie_len, flags); if (skb) { if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) { struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 sn = get_random_u16(); info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO; hdr->seq_ctrl = cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, unsigned long *next_delay) { int i; struct ieee80211_sub_if_data *sdata; struct cfg80211_scan_request *scan_req; enum nl80211_band band = local->hw.conf.chandef.chan->band; u32 flags = 0, tx_flags; scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (scan_req->no_cck) tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE; if (scan_req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_SN) flags |= IEEE80211_PROBE_FLAG_RANDOM_SN; sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)); for (i = 0; i < scan_req->n_ssids; i++) ieee80211_send_scan_probe_req( sdata, local->scan_addr, scan_req->bssid, scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len, scan_req->ie, scan_req->ie_len, scan_req->rates[band], flags, tx_flags, local->hw.conf.chandef.chan); /* * After sending probe requests, wait for probe responses * on the channel. */ *next_delay = msecs_to_jiffies(scan_req->duration) > IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME ? msecs_to_jiffies(scan_req->duration) - IEEE80211_PROBE_DELAY : IEEE80211_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; } static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { struct ieee80211_local *local = sdata->local; bool hw_scan = local->ops->hw_scan; int rc; lockdep_assert_wiphy(local->hw.wiphy); if (local->scan_req) return -EBUSY; /* For an MLO connection, if a link ID was specified, validate that it * is indeed active. */ if (ieee80211_vif_is_mld(&sdata->vif) && req->tsf_report_link_id >= 0 && !(sdata->vif.active_links & BIT(req->tsf_report_link_id))) return -EINVAL; if (!__ieee80211_can_leave_ch(sdata, req)) return -EBUSY; if (!ieee80211_can_scan(local, sdata, req)) { /* wait for the work to finish/time out */ rcu_assign_pointer(local->scan_req, req); rcu_assign_pointer(local->scan_sdata, sdata); return 0; } again: if (hw_scan) { u8 *ies; local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { int i, n_bands = 0; u8 bands_counted = 0; for (i = 0; i < req->n_channels; i++) { if (bands_counted & BIT(req->channels[i]->band)) continue; bands_counted |= BIT(req->channels[i]->band); n_bands++; } local->hw_scan_ies_bufsize *= n_bands; } local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req, req.channels, req->n_channels) + local->hw_scan_ies_bufsize, GFP_KERNEL); if (!local->hw_scan_req) return -ENOMEM; local->hw_scan_req->req.ssids = req->ssids; local->hw_scan_req->req.n_ssids = req->n_ssids; /* None of the channels are actually set * up but let UBSAN know the boundaries. */ local->hw_scan_req->req.n_channels = req->n_channels; ies = (u8 *)local->hw_scan_req + sizeof(*local->hw_scan_req) + req->n_channels * sizeof(req->channels[0]); local->hw_scan_req->req.ie = ies; local->hw_scan_req->req.flags = req->flags; eth_broadcast_addr(local->hw_scan_req->req.bssid); local->hw_scan_req->req.duration = req->duration; local->hw_scan_req->req.duration_mandatory = req->duration_mandatory; local->hw_scan_req->req.tsf_report_link_id = req->tsf_report_link_id; local->hw_scan_band = 0; local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params; local->hw_scan_req->req.scan_6ghz_params = req->scan_6ghz_params; local->hw_scan_req->req.scan_6ghz = req->scan_6ghz; local->hw_scan_req->req.first_part = req->first_part; /* * After allocating local->hw_scan_req, we must * go through until ieee80211_prep_hw_scan(), so * anything that might be changed here and leave * this function early must not go after this * allocation. */ } rcu_assign_pointer(local->scan_req, req); rcu_assign_pointer(local->scan_sdata, sdata); if (req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) get_random_mask_addr(local->scan_addr, req->mac_addr, req->mac_addr_mask); else memcpy(local->scan_addr, sdata->vif.addr, ETH_ALEN); if (hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && (req->channels[0] == local->hw.conf.chandef.chan)) { /* * If we are scanning only on the operating channel * then we do not need to stop normal activities */ unsigned long next_delay; __set_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning); ieee80211_recalc_idle(local); /* Notify driver scan is starting, keep order of operations * same as normal software scan, in case that matters. */ drv_sw_scan_start(local, sdata, local->scan_addr); ieee80211_configure_filter(local); /* accept probe-responses */ /* We need to ensure power level is at max for scanning. */ ieee80211_hw_conf_chan(local); if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) || !req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; if (req->n_ssids) set_bit(SCAN_BEACON_WAIT, &local->scanning); } else { ieee80211_scan_state_send_probe(local, &next_delay); next_delay = IEEE80211_CHANNEL_TIME; } /* Now, just wait a bit and we are all done! */ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, next_delay); return 0; } else { /* Do normal software scan */ __set_bit(SCAN_SW_SCANNING, &local->scanning); } ieee80211_recalc_idle(local); if (hw_scan) { WARN_ON(!ieee80211_prep_hw_scan(sdata)); rc = drv_hw_scan(local, sdata, local->hw_scan_req); } else { rc = ieee80211_start_sw_scan(local, sdata); } if (rc) { kfree(local->hw_scan_req); local->hw_scan_req = NULL; local->scanning = 0; ieee80211_recalc_idle(local); local->scan_req = NULL; RCU_INIT_POINTER(local->scan_sdata, NULL); } if (hw_scan && rc == 1) { /* * we can't fall back to software for P2P-GO * as it must update NoA etc. */ if (ieee80211_vif_type_p2p(&sdata->vif) == NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; hw_scan = false; goto again; } return rc; } static unsigned long ieee80211_scan_get_channel_time(struct ieee80211_channel *chan) { /* * TODO: channel switching also consumes quite some time, * add that delay as well to get a better estimation */ if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) return IEEE80211_PASSIVE_CHANNEL_TIME; return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; } static void ieee80211_scan_state_decision(struct ieee80211_local *local, unsigned long *next_delay) { bool associated = false; bool tx_empty = true; bool bad_latency; struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *next_chan; enum mac80211_scan_state next_scan_state; struct cfg80211_scan_request *scan_req; lockdep_assert_wiphy(local->hw.wiphy); /* * check if at least one STA interface is associated, * check if at least one STA interface has pending tx frames * and grab the lowest used beacon interval */ list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) { associated = true; if (!qdisc_all_tx_empty(sdata->dev)) { tx_empty = false; break; } } } } scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); next_chan = scan_req->channels[local->scan_channel_idx]; /* * we're currently scanning a different channel, let's * see if we can scan another channel without interfering * with the current traffic situation. * * Keep good latency, do not stay off-channel more than 125 ms. */ bad_latency = time_after(jiffies + ieee80211_scan_get_channel_time(next_chan), local->leave_oper_channel_time + HZ / 8); if (associated && !tx_empty) { if (scan_req->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) next_scan_state = SCAN_ABORT; else next_scan_state = SCAN_SUSPEND; } else if (associated && bad_latency) { next_scan_state = SCAN_SUSPEND; } else { next_scan_state = SCAN_SET_CHANNEL; } local->next_scan_state = next_scan_state; *next_delay = 0; } static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, unsigned long *next_delay) { int skip; struct ieee80211_channel *chan; struct cfg80211_scan_request *scan_req; scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); skip = 0; chan = scan_req->channels[local->scan_channel_idx]; local->scan_chandef.chan = chan; local->scan_chandef.center_freq1 = chan->center_freq; local->scan_chandef.freq1_offset = chan->freq_offset; local->scan_chandef.center_freq2 = 0; /* For S1G, only scan the 1MHz primaries. */ if (chan->band == NL80211_BAND_S1GHZ) { local->scan_chandef.width = NL80211_CHAN_WIDTH_1; local->scan_chandef.s1g_primary_2mhz = false; goto set_channel; } /* * If scanning on oper channel, use whatever channel-type * is currently in use. */ if (chan == local->hw.conf.chandef.chan) local->scan_chandef = local->hw.conf.chandef; else local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; set_channel: if (ieee80211_hw_conf_chan(local)) skip = 1; /* advance state machine to next channel/band */ local->scan_channel_idx++; if (skip) { /* if we skip this channel return to the decision state */ local->next_scan_state = SCAN_DECISION; return; } /* * Probe delay is used to update the NAV, cf. 11.1.3.2.2 * (which unfortunately doesn't say _why_ step a) is done, * but it waits for the probe delay or until a frame is * received - and the received frame would update the NAV). * For now, we do not support waiting until a frame is * received. * * In any case, it is not necessary for a passive scan. */ if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) || !scan_req->n_ssids) { *next_delay = max(msecs_to_jiffies(scan_req->duration), IEEE80211_PASSIVE_CHANNEL_TIME); local->next_scan_state = SCAN_DECISION; if (scan_req->n_ssids) set_bit(SCAN_BEACON_WAIT, &local->scanning); return; } /* active scan, send probes */ *next_delay = IEEE80211_PROBE_DELAY; local->next_scan_state = SCAN_SEND_PROBE; } static void ieee80211_scan_state_suspend(struct ieee80211_local *local, unsigned long *next_delay) { /* switch back to the operating channel */ local->scan_chandef.chan = NULL; ieee80211_hw_conf_chan(local); /* disable PS */ ieee80211_offchannel_return(local); *next_delay = HZ / 5; /* afterwards, resume scan & go to next channel */ local->next_scan_state = SCAN_RESUME; } static void ieee80211_scan_state_resume(struct ieee80211_local *local, unsigned long *next_delay) { ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { ieee80211_flush_queues(local, NULL, false); *next_delay = 0; } else *next_delay = HZ / 10; /* remember when we left the operating channel */ local->leave_oper_channel_time = jiffies; /* advance to the next channel to be scanned */ local->next_scan_state = SCAN_SET_CHANNEL; } void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); struct ieee80211_sub_if_data *sdata; struct cfg80211_scan_request *scan_req; unsigned long next_delay = 0; bool aborted; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_can_run_worker(local)) { aborted = true; goto out_complete; } sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)); scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); /* When scanning on-channel, the first-callback means completed. */ if (test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning)) { aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); goto out_complete; } if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); goto out_complete; } if (!sdata || !scan_req) return; if (!local->scanning) { int rc; RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, scan_req); if (!rc) return; /* need to complete scan in cfg80211 */ rcu_assign_pointer(local->scan_req, scan_req); aborted = true; goto out_complete; } clear_bit(SCAN_BEACON_WAIT, &local->scanning); /* * as long as no delay is required advance immediately * without scheduling a new work */ do { if (!ieee80211_sdata_running(sdata)) { aborted = true; goto out_complete; } if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) && local->next_scan_state == SCAN_DECISION) local->next_scan_state = SCAN_SEND_PROBE; switch (local->next_scan_state) { case SCAN_DECISION: /* if no more bands/channels left, complete scan */ if (local->scan_channel_idx >= scan_req->n_channels) { aborted = false; goto out_complete; } ieee80211_scan_state_decision(local, &next_delay); break; case SCAN_SET_CHANNEL: ieee80211_scan_state_set_channel(local, &next_delay); break; case SCAN_SEND_PROBE: ieee80211_scan_state_send_probe(local, &next_delay); break; case SCAN_SUSPEND: ieee80211_scan_state_suspend(local, &next_delay); break; case SCAN_RESUME: ieee80211_scan_state_resume(local, &next_delay); break; case SCAN_ABORT: aborted = true; goto out_complete; } } while (next_delay == 0); wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, next_delay); return; out_complete: __ieee80211_scan_completed(&local->hw, aborted); } int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { lockdep_assert_wiphy(sdata->local->hw.wiphy); return __ieee80211_start_scan(sdata, req); } int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, unsigned int n_channels) { struct ieee80211_local *local = sdata->local; int i, n_ch = 0; enum nl80211_band band; lockdep_assert_wiphy(local->hw.wiphy); /* busy scanning */ if (local->scan_req) return -EBUSY; /* fill internal scan request */ if (!channels) { int max_n; for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band] || band == NL80211_BAND_6GHZ || band == NL80211_BAND_S1GHZ) continue; max_n = local->hw.wiphy->bands[band]->n_channels; for (i = 0; i < max_n; i++) { struct ieee80211_channel *tmp_ch = &local->hw.wiphy->bands[band]->channels[i]; if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED) || !cfg80211_wdev_channel_allowed(&sdata->wdev, tmp_ch)) continue; local->int_scan_req->channels[n_ch] = tmp_ch; n_ch++; } } if (WARN_ON_ONCE(n_ch == 0)) return -EINVAL; local->int_scan_req->n_channels = n_ch; } else { for (i = 0; i < n_channels; i++) { if (channels[i]->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED) || !cfg80211_wdev_channel_allowed(&sdata->wdev, channels[i])) continue; local->int_scan_req->channels[n_ch] = channels[i]; n_ch++; } if (n_ch == 0) return -EINVAL; local->int_scan_req->n_channels = n_ch; } local->int_scan_req->ssids = &local->scan_ssid; local->int_scan_req->n_ssids = 1; memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); local->int_scan_req->ssids[0].ssid_len = ssid_len; return __ieee80211_start_scan(sdata, sdata->local->int_scan_req); } void ieee80211_scan_cancel(struct ieee80211_local *local) { /* ensure a new scan cannot be queued */ lockdep_assert_wiphy(local->hw.wiphy); /* * We are canceling software scan, or deferred scan that was not * yet really started (see __ieee80211_start_scan ). * * Regarding hardware scan: * - we can not call __ieee80211_scan_completed() as when * SCAN_HW_SCANNING bit is set this function change * local->hw_scan_req to operate on 5G band, what race with * driver which can use local->hw_scan_req * * - we can not cancel scan_work since driver can schedule it * by ieee80211_scan_completed(..., true) to finish scan * * Hence we only call the cancel_hw_scan() callback, but the low-level * driver is still responsible for calling ieee80211_scan_completed() * after the scan was completed/aborted. */ if (!local->scan_req) return; /* * We have a scan running and the driver already reported completion, * but the worker hasn't run yet or is stuck on the mutex - mark it as * cancelled. */ if (test_bit(SCAN_HW_SCANNING, &local->scanning) && test_bit(SCAN_COMPLETED, &local->scanning)) { set_bit(SCAN_HW_CANCELLED, &local->scanning); return; } if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { /* * Make sure that __ieee80211_scan_completed doesn't trigger a * scan on another band. */ set_bit(SCAN_HW_CANCELLED, &local->scanning); if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx))); return; } wiphy_delayed_work_cancel(local->hw.wiphy, &local->scan_work); /* and clean up */ memset(&local->scan_info, 0, sizeof(local->scan_info)); __ieee80211_scan_completed(&local->hw, true); } int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_scan_ies sched_scan_ies = {}; struct cfg80211_chan_def chandef; int ret, i, iebufsz, num_bands = 0; u32 rate_masks[NUM_NL80211_BANDS] = {}; u8 bands_used = 0; u8 *ie; u32 flags = 0; lockdep_assert_wiphy(local->hw.wiphy); iebufsz = local->scan_ies_len + req->ie_len; if (!local->ops->sched_scan_start) return -EOPNOTSUPP; for (i = 0; i < NUM_NL80211_BANDS; i++) { if (local->hw.wiphy->bands[i]) { bands_used |= BIT(i); rate_masks[i] = (u32) -1; num_bands++; } } if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; ie = kcalloc(iebufsz, num_bands, GFP_KERNEL); if (!ie) { ret = -ENOMEM; goto out; } ieee80211_prepare_scan_chandef(&chandef); ret = ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, &sched_scan_ies, req->ie, req->ie_len, bands_used, rate_masks, &chandef, flags); if (ret < 0) goto error; ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); if (ret == 0) { rcu_assign_pointer(local->sched_scan_sdata, sdata); rcu_assign_pointer(local->sched_scan_req, req); } error: kfree(ie); out: if (ret) { /* Clean in case of failure after HW restart or upon resume. */ RCU_INIT_POINTER(local->sched_scan_sdata, NULL); RCU_INIT_POINTER(local->sched_scan_req, NULL); } return ret; } int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); if (rcu_access_pointer(local->sched_scan_sdata)) return -EBUSY; return __ieee80211_request_sched_scan_start(sdata, req); } int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sched_scan_sdata; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->sched_scan_stop) return -EOPNOTSUPP; /* We don't want to restart sched scan anymore. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, lockdep_is_held(&local->hw.wiphy->mtx)); if (sched_scan_sdata) { ret = drv_sched_scan_stop(local, sched_scan_sdata); if (!ret) RCU_INIT_POINTER(local->sched_scan_sdata, NULL); } return ret; } void ieee80211_sched_scan_results(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); trace_api_sched_scan_results(local); cfg80211_sched_scan_results(hw->wiphy, 0); } EXPORT_SYMBOL(ieee80211_sched_scan_results); void ieee80211_sched_scan_end(struct ieee80211_local *local) { lockdep_assert_wiphy(local->hw.wiphy); if (!rcu_access_pointer(local->sched_scan_sdata)) return; RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); } void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, sched_scan_stopped_work); ieee80211_sched_scan_end(local); } void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); trace_api_sched_scan_stopped(local); /* * this shouldn't really happen, so for simplicity * simply ignore it, and let mac80211 reconfigure * the sched scan later on. */ if (local->in_reconfig) return; wiphy_work_queue(hw->wiphy, &local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
70 32 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 /* SPDX-License-Identifier: GPL-2.0 */ /* * This header file contains public constants and structures used by * the SCSI initiator code. */ #ifndef _SCSI_SCSI_H #define _SCSI_SCSI_H #include <linux/types.h> #include <asm/param.h> #include <scsi/scsi_common.h> #include <scsi/scsi_proto.h> #include <scsi/scsi_status.h> struct scsi_cmnd; enum scsi_timeouts { SCSI_DEFAULT_EH_TIMEOUT = 10 * HZ, }; /* * DIX-capable adapters effectively support infinite chaining for the * protection information scatterlist */ #define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF /* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. */ #define SCAN_WILD_CARD ~0 /* * standard mode-select header prepended to all mode-select commands */ struct ccs_modesel_head { __u8 _r1; /* reserved */ __u8 medium; /* device-specific medium type */ __u8 _r2; /* reserved */ __u8 block_desc_length; /* block descriptor length */ __u8 density; /* device-specific density code */ __u8 number_blocks_hi; /* number of blocks in this block desc */ __u8 number_blocks_med; __u8 number_blocks_lo; __u8 _r3; __u8 block_length_hi; /* block length for blocks in this desc */ __u8 block_length_med; __u8 block_length_lo; }; /* * The Well Known LUNS (SAM-3) in our int representation of a LUN */ #define SCSI_W_LUN_BASE 0xc100 #define SCSI_W_LUN_REPORT_LUNS (SCSI_W_LUN_BASE + 1) #define SCSI_W_LUN_ACCESS_CONTROL (SCSI_W_LUN_BASE + 2) #define SCSI_W_LUN_TARGET_LOG_PAGE (SCSI_W_LUN_BASE + 3) static inline int scsi_is_wlun(u64 lun) { return (lun & 0xff00) == SCSI_W_LUN_BASE; } /** * scsi_status_is_check_condition - check the status return. * * @status: the status passed up from the driver (including host and * driver components) * * Returns: %true if the status code is SAM_STAT_CHECK_CONDITION. */ static inline int scsi_status_is_check_condition(int status) { if (status < 0) return false; status &= 0xfe; return status == SAM_STAT_CHECK_CONDITION; } /* * Extended message codes. */ #define EXTENDED_MODIFY_DATA_POINTER 0x00 #define EXTENDED_SDTR 0x01 #define EXTENDED_EXTENDED_IDENTIFY 0x02 /* SCSI-I only */ #define EXTENDED_WDTR 0x03 #define EXTENDED_PPR 0x04 #define EXTENDED_MODIFY_BIDI_DATA_PTR 0x05 /* * Internal return values. */ enum scsi_disposition { NEEDS_RETRY = 0x2001, SUCCESS = 0x2002, FAILED = 0x2003, QUEUED = 0x2004, SOFT_ERROR = 0x2005, ADD_TO_MLQUEUE = 0x2006, TIMEOUT_ERROR = 0x2007, SCSI_RETURN_NOT_HANDLED = 0x2008, FAST_IO_FAIL = 0x2009, }; /* * Midlevel queue return values. */ #define SCSI_MLQUEUE_HOST_BUSY 0x1055 #define SCSI_MLQUEUE_DEVICE_BUSY 0x1056 #define SCSI_MLQUEUE_EH_RETRY 0x1057 #define SCSI_MLQUEUE_TARGET_BUSY 0x1058 /* * Use these to separate status msg and our bytes * * These are set by: * * status byte = set from target device * msg_byte (unused) * host_byte = set by low-level driver to indicate status. */ #define status_byte(result) (result & 0xff) #define host_byte(result) (((result) >> 16) & 0xff) #define sense_class(sense) (((sense) >> 4) & 0x7) #define sense_error(sense) ((sense) & 0xf) #define sense_valid(sense) ((sense) & 0x80) /* * default timeouts */ #define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ) #define START_STOP_TIMEOUT (60 * HZ) #define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ) #define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ) #define READ_DEFECT_DATA_TIMEOUT (60 * HZ ) #define IDENTIFY_BASE 0x80 #define IDENTIFY(can_disconnect, lun) (IDENTIFY_BASE |\ ((can_disconnect) ? 0x40 : 0) |\ ((lun) & 0x07)) /* * struct scsi_device::scsi_level values. For SCSI devices other than those * prior to SCSI-2 (i.e. over 12 years old) this value is (resp[2] + 1) * where "resp" is a byte array of the response to an INQUIRY. The scsi_level * variable is visible to the user via sysfs. */ #define SCSI_UNKNOWN 0 #define SCSI_1 1 #define SCSI_1_CCS 2 #define SCSI_2 3 #define SCSI_3 4 /* SPC */ #define SCSI_SPC_2 5 #define SCSI_SPC_3 6 #define SCSI_SPC_4 7 #define SCSI_SPC_5 8 #define SCSI_SPC_6 14 /* * INQ PERIPHERAL QUALIFIERS */ #define SCSI_INQ_PQ_CON 0x00 #define SCSI_INQ_PQ_NOT_CON 0x01 #define SCSI_INQ_PQ_NOT_CAP 0x03 /* * Here are some scsi specific ioctl commands which are sometimes useful. * * Note that include/linux/cdrom.h also defines IOCTL 0x5300 - 0x5395 */ /* Used to obtain PUN and LUN info. Conflicts with CDROMAUDIOBUFSIZ */ #define SCSI_IOCTL_GET_IDLUN 0x5382 /* 0x5383 and 0x5384 were used for SCSI_IOCTL_TAGGED_{ENABLE,DISABLE} */ /* Used to obtain the host number of a device. */ #define SCSI_IOCTL_PROBE_HOST 0x5385 /* Used to obtain the bus number for a device */ #define SCSI_IOCTL_GET_BUS_NUMBER 0x5386 /* Used to obtain the PCI location of a device */ #define SCSI_IOCTL_GET_PCI 0x5387 /** * scsi_status_is_good - check the status return. * * @status: the status passed up from the driver (including host and * driver components) * * Returns: %true for known good conditions that may be treated as * command completed normally */ static inline bool scsi_status_is_good(int status) { if (status < 0) return false; if (host_byte(status) == DID_NO_CONNECT) return false; /* * FIXME: bit0 is listed as reserved in SCSI-2, but is * significant in SCSI-3. For now, we follow the SCSI-2 * behaviour and ignore reserved bits. */ status &= 0xfe; return ((status == SAM_STAT_GOOD) || (status == SAM_STAT_CONDITION_MET) || /* Next two "intermediate" statuses are obsolete in SAM-4 */ (status == SAM_STAT_INTERMEDIATE) || (status == SAM_STAT_INTERMEDIATE_CONDITION_MET) || /* FIXME: this is obsolete in SAM-3 */ (status == SAM_STAT_COMMAND_TERMINATED)); } #endif /* _SCSI_SCSI_H */
50 50 50 49 40 40 40 40 40 40 3 2 1 1 1 1 6 7 6 3 3 3 5 7 3 10 10 1 1 1 1 1 4 4 4 1 1 1 1 4 1 1 1 1 1 2 2 2 7 7 2 3 7 7 1 6 2 2 2 2 6 7 27 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/mount.c - kernfs mount implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> #include <linux/uuid.h> #include <linux/statfs.h> #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache __ro_after_init; struct kmem_cache *kernfs_iattrs_cache __ro_after_init; struct kernfs_global_locks *kernfs_locks __ro_after_init; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_options) return scops->show_options(sf, root); return 0; } static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) { struct kernfs_node *node = kernfs_dentry_node(dentry); struct kernfs_root *root = kernfs_root(node); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_path) return scops->show_path(sf, node, root); seq_dentry(sf, dentry, " \t\n\\"); return 0; } static int kernfs_statfs(struct dentry *dentry, struct kstatfs *buf) { simple_statfs(dentry, buf); buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); return 0; } const struct super_operations kernfs_sops = { .statfs = kernfs_statfs, .drop_inode = inode_just_drop, .evict_inode = kernfs_evict_inode, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, /* * sysfs is built on top of kernfs and sysfs provides the power * management infrastructure to support suspend/hibernate by * writing to various files in /sys/power/. As filesystems may * be automatically frozen during suspend/hibernate implementing * freeze/thaw support for kernfs generically will cause * deadlocks as the suspending/hibernation initiating task will * hold a VFS lock that it will then wait upon to be released. * If freeze/thaw for kernfs is needed talk to the VFS. */ .freeze_fs = NULL, .unfreeze_fs = NULL, .freeze_super = NULL, .thaw_super = NULL, }; static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct kernfs_node *kn = inode->i_private; if (*max_len < 2) { *max_len = 2; return FILEID_INVALID; } *max_len = 2; *(u64 *)fh = kn->id; return FILEID_KERNFS; } static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, bool get_parent) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_node *kn; struct inode *inode; u64 id; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_KERNFS: id = *(u64 *)fid; break; case FILEID_INO32_GEN: case FILEID_INO32_GEN_PARENT: /* * blk_log_action() exposes "LOW32,HIGH32" pair without * type and userland can call us with generic fid * constructed from them. Combine it back to ID. See * blk_log_action(). */ id = ((u64)fid->i32.gen << 32) | fid->i32.ino; break; default: return NULL; } kn = kernfs_find_and_get_node_by_id(info->root, id); if (!kn) return ERR_PTR(-ESTALE); if (get_parent) { struct kernfs_node *parent; parent = kernfs_get_parent(kn); kernfs_put(kn); kn = parent; if (!kn) return ERR_PTR(-ESTALE); } inode = kernfs_get_inode(sb, kn); kernfs_put(kn); return d_obtain_alias(inode); } static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false); } static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true); } static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); struct kernfs_root *root = kernfs_root(kn); guard(rwsem_read)(&root->kernfs_rwsem); return d_obtain_alias(kernfs_get_inode(child->d_sb, kernfs_parent(kn))); } static const struct export_operations kernfs_export_ops = { .encode_fh = kernfs_encode_fh, .fh_to_dentry = kernfs_fh_to_dentry, .fh_to_parent = kernfs_fh_to_parent, .get_parent = kernfs_get_parent_dentry, }; /** * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { if (sb->s_op == &kernfs_sops) return kernfs_info(sb)->root; return NULL; } /* * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, struct kernfs_node *parent) { if (child == parent) { pr_crit_once("BUG in find_next_ancestor: called with parent == child"); return NULL; } while (kernfs_parent(child) != parent) { child = kernfs_parent(child); if (!child) return NULL; } return child; } /** * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block * * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) { struct dentry *dentry; struct kernfs_node *knparent; struct kernfs_root *root; BUG_ON(sb->s_op != &kernfs_sops); dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ if (!rcu_access_pointer(kn->__parent)) return dentry; root = kernfs_root(kn); /* * As long as kn is valid, its parent can not vanish. This is cgroup's * kn so it can't have its parent replaced. Therefore it is safe to use * the ancestor node outside of the RCU or locked section. */ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT))) return ERR_PTR(-EINVAL); scoped_guard(rcu) { knparent = find_next_ancestor(kn, NULL); } if (WARN_ON(!knparent)) { dput(dentry); return ERR_PTR(-EINVAL); } do { struct dentry *dtmp; struct kernfs_node *kntmp; const char *name; if (kn == knparent) return dentry; scoped_guard(rwsem_read, &root->kernfs_rwsem) { kntmp = find_next_ancestor(kn, knparent); if (WARN_ON(!kntmp)) { dput(dentry); return ERR_PTR(-EINVAL); } name = kstrdup(kernfs_rcu_name(kntmp), GFP_KERNEL); } if (!name) { dput(dentry); return ERR_PTR(-ENOMEM); } dtmp = lookup_noperm_positive_unlocked(&QSTR(name), dentry); dput(dentry); kfree(name); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; dentry = dtmp; } while (true); } static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *kf_root = kfc->root; struct inode *inode; struct dentry *root; info->sb = sb; /* Userspace would break if executables or devices appear on sysfs */ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) sb->s_export_op = &kernfs_export_ops; sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); inode = kernfs_get_inode(sb, info->root->kn); up_read(&kf_root->kernfs_rwsem); if (!inode) { pr_debug("kernfs: could not get root inode\n"); return -ENOMEM; } /* instantiate and link root dentry */ root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n", __func__); return -ENOMEM; } sb->s_root = root; set_default_d_op(sb, &kernfs_dops); return 0; } static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; kfc->ns_tag = NULL; return set_anon_super_fc(sb, fc); } /** * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); return info->ns; } /** * kernfs_get_tree - kernfs filesystem access/retrieval helper * @fc: The filesystem context. * * This is to be called from each kernfs user's fs_context->ops->get_tree() * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. * * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->root = kfc->root; info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); fc->s_fs_info = info; sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = kfc->root; kfc->new_sb_created = true; error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); return error; } sb->s_flags |= SB_ACTIVE; uuid_t uuid; uuid_gen(&uuid); super_set_uuid(sb, uuid.b, sizeof(uuid)); down_write(&root->kernfs_supers_rwsem); list_add(&info->node, &info->root->supers); up_write(&root->kernfs_supers_rwsem); } fc->root = dget(sb->s_root); return 0; } void kernfs_free_fs_context(struct fs_context *fc) { /* Note that we don't deal with kfc->ns_tag here. */ kfree(fc->s_fs_info); fc->s_fs_info = NULL; } /** * kernfs_kill_sb - kill_sb for kernfs * @sb: super_block being killed * * This can be used directly for file_system_type->kill_sb(). If a kernfs * user needs extra cleanup, it can implement its own kill_sb() and call * this function at the end. */ void kernfs_kill_sb(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = info->root; down_write(&root->kernfs_supers_rwsem); list_del(&info->node); up_write(&root->kernfs_supers_rwsem); /* * Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing kernfs_super_info. */ kill_anon_super(sb); kfree(info); } static void __init kernfs_mutex_init(void) { int count; for (count = 0; count < NR_KERNFS_LOCKS; count++) mutex_init(&kernfs_locks->open_file_mutex[count]); } static void __init kernfs_lock_init(void) { kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL); WARN_ON(!kernfs_locks); kernfs_mutex_init(); } void __init kernfs_init(void) { kernfs_node_cache = kmem_cache_create("kernfs_node_cache", sizeof(struct kernfs_node), 0, SLAB_PANIC, NULL); /* Creates slab cache for kernfs inode attributes */ kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache", sizeof(struct kernfs_iattrs), 0, SLAB_PANIC, NULL); kernfs_lock_init(); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 /* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/include/linux/clk.h * * Copyright (C) 2004 ARM Limited. * Written by Deep Blue Solutions Limited. * Copyright (C) 2011-2012 Linaro Ltd <mturquette@linaro.org> */ #ifndef __LINUX_CLK_H #define __LINUX_CLK_H #include <linux/err.h> #include <linux/kernel.h> #include <linux/notifier.h> struct device; struct clk; struct device_node; struct of_phandle_args; /** * DOC: clk notifier callback types * * PRE_RATE_CHANGE - called immediately before the clk rate is changed, * to indicate that the rate change will proceed. Drivers must * immediately terminate any operations that will be affected by the * rate change. Callbacks may either return NOTIFY_DONE, NOTIFY_OK, * NOTIFY_STOP or NOTIFY_BAD. * * ABORT_RATE_CHANGE: called if the rate change failed for some reason * after PRE_RATE_CHANGE. In this case, all registered notifiers on * the clk will be called with ABORT_RATE_CHANGE. Callbacks must * always return NOTIFY_DONE or NOTIFY_OK. * * POST_RATE_CHANGE - called after the clk rate change has successfully * completed. Callbacks must always return NOTIFY_DONE or NOTIFY_OK. * */ #define PRE_RATE_CHANGE BIT(0) #define POST_RATE_CHANGE BIT(1) #define ABORT_RATE_CHANGE BIT(2) /** * struct clk_notifier - associate a clk with a notifier * @clk: struct clk * to associate the notifier with * @notifier_head: a blocking_notifier_head for this clk * @node: linked list pointers * * A list of struct clk_notifier is maintained by the notifier code. * An entry is created whenever code registers the first notifier on a * particular @clk. Future notifiers on that @clk are added to the * @notifier_head. */ struct clk_notifier { struct clk *clk; struct srcu_notifier_head notifier_head; struct list_head node; }; /** * struct clk_notifier_data - rate data to pass to the notifier callback * @clk: struct clk * being changed * @old_rate: previous rate of this clk * @new_rate: new rate of this clk * * For a pre-notifier, old_rate is the clk's rate before this rate * change, and new_rate is what the rate will be in the future. For a * post-notifier, old_rate and new_rate are both set to the clk's * current rate (this was done to optimize the implementation). */ struct clk_notifier_data { struct clk *clk; unsigned long old_rate; unsigned long new_rate; }; /** * struct clk_bulk_data - Data used for bulk clk operations. * * @id: clock consumer ID * @clk: struct clk * to store the associated clock * * The CLK APIs provide a series of clk_bulk_() API calls as * a convenience to consumers which require multiple clks. This * structure is used to manage data for these calls. */ struct clk_bulk_data { const char *id; struct clk *clk; }; #ifdef CONFIG_COMMON_CLK /** * clk_notifier_register - register a clock rate-change notifier callback * @clk: clock whose rate we are interested in * @nb: notifier block with callback function pointer * * ProTip: debugging across notifier chains can be frustrating. Make sure that * your notifier callback function prints a nice big warning in case of * failure. */ int clk_notifier_register(struct clk *clk, struct notifier_block *nb); /** * clk_notifier_unregister - unregister a clock rate-change notifier callback * @clk: clock whose rate we are no longer interested in * @nb: notifier block which will be unregistered */ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); /** * devm_clk_notifier_register - register a managed rate-change notifier callback * @dev: device for clock "consumer" * @clk: clock whose rate we are interested in * @nb: notifier block with callback function pointer * * Returns 0 on success, -EERROR otherwise */ int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) * for a clock source. * @clk: clock source * * This gets the clock source accuracy expressed in ppb. * A perfect clock returns 0. */ long clk_get_accuracy(struct clk *clk); /** * clk_set_phase - adjust the phase shift of a clock signal * @clk: clock signal source * @degrees: number of degrees the signal is shifted * * Shifts the phase of a clock signal by the specified degrees. Returns 0 on * success, -EERROR otherwise. */ int clk_set_phase(struct clk *clk, int degrees); /** * clk_get_phase - return the phase shift of a clock signal * @clk: clock signal source * * Returns the phase shift of a clock node in degrees, otherwise returns * -EERROR. */ int clk_get_phase(struct clk *clk); /** * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal * @clk: clock signal source * @num: numerator of the duty cycle ratio to be applied * @den: denominator of the duty cycle ratio to be applied * * Adjust the duty cycle of a clock signal by the specified ratio. Returns 0 on * success, -EERROR otherwise. */ int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den); /** * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal * @clk: clock signal source * @scale: scaling factor to be applied to represent the ratio as an integer * * Returns the duty cycle ratio multiplied by the scale provided, otherwise * returns -EERROR. */ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale); /** * clk_is_match - check if two clk's point to the same hardware clock * @p: clk compared against q * @q: clk compared against p * * Returns true if the two struct clk pointers both point to the same hardware * clock node. Put differently, returns true if @p and @q * share the same &struct clk_core object. * * Returns false otherwise. Note that two NULL clks are treated as matching. */ bool clk_is_match(const struct clk *p, const struct clk *q); /** * clk_rate_exclusive_get - get exclusivity over the rate control of a * producer * @clk: clock source * * This function allows drivers to get exclusive control over the rate of a * provider. It prevents any other consumer to execute, even indirectly, * opereation which could alter the rate of the provider or cause glitches * * If exlusivity is claimed more than once on clock, even by the same driver, * the rate effectively gets locked as exclusivity can't be preempted. * * Must not be called from within atomic context. * * Returns success (0) or negative errno. */ int clk_rate_exclusive_get(struct clk *clk); /** * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get * @dev: device the exclusivity is bound to * @clk: clock source * * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler * on @dev to call clk_rate_exclusive_put(). * * Must not be called from within atomic context. */ int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk); /** * clk_rate_exclusive_put - release exclusivity over the rate control of a * producer * @clk: clock source * * This function allows drivers to release the exclusivity it previously got * from clk_rate_exclusive_get() * * The caller must balance the number of clk_rate_exclusive_get() and * clk_rate_exclusive_put() calls. * * Must not be called from within atomic context. */ void clk_rate_exclusive_put(struct clk *clk); #else static inline int clk_notifier_register(struct clk *clk, struct notifier_block *nb) { return -ENOTSUPP; } static inline int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb) { return -ENOTSUPP; } static inline int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb) { return -ENOTSUPP; } static inline long clk_get_accuracy(struct clk *clk) { return -ENOTSUPP; } static inline long clk_set_phase(struct clk *clk, int phase) { return -ENOTSUPP; } static inline long clk_get_phase(struct clk *clk) { return -ENOTSUPP; } static inline int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den) { return -ENOTSUPP; } static inline unsigned int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale) { return 0; } static inline bool clk_is_match(const struct clk *p, const struct clk *q) { return p == q; } static inline int clk_rate_exclusive_get(struct clk *clk) { return 0; } static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) { return 0; } static inline void clk_rate_exclusive_put(struct clk *clk) {} #endif #ifdef CONFIG_HAVE_CLK_PREPARE /** * clk_prepare - prepare a clock source * @clk: clock source * * This prepares the clock source for use. * * Must not be called from within atomic context. */ int clk_prepare(struct clk *clk); int __must_check clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks); /** * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. * @clk: clock source * * Returns true if clk_prepare() implicitly enables the clock, effectively * making clk_enable()/clk_disable() no-ops, false otherwise. * * This is of interest mainly to the power management code where actually * disabling the clock also requires unpreparing it to have any material * effect. * * Regardless of the value returned here, the caller must always invoke * clk_enable() or clk_prepare_enable() and counterparts for usage counts * to be right. */ bool clk_is_enabled_when_prepared(struct clk *clk); #else static inline int clk_prepare(struct clk *clk) { might_sleep(); return 0; } static inline int __must_check clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks) { might_sleep(); return 0; } static inline bool clk_is_enabled_when_prepared(struct clk *clk) { return false; } #endif /** * clk_unprepare - undo preparation of a clock source * @clk: clock source * * This undoes a previously prepared clock. The caller must balance * the number of prepare and unprepare calls. * * Must not be called from within atomic context. */ #ifdef CONFIG_HAVE_CLK_PREPARE void clk_unprepare(struct clk *clk); void clk_bulk_unprepare(int num_clks, const struct clk_bulk_data *clks); #else static inline void clk_unprepare(struct clk *clk) { might_sleep(); } static inline void clk_bulk_unprepare(int num_clks, const struct clk_bulk_data *clks) { might_sleep(); } #endif #ifdef CONFIG_HAVE_CLK /** * clk_get - lookup and obtain a reference to a clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID * * Returns a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. (IOW, @id may be identical strings, but * clk_get may return different clock producers depending on @dev.) * * Drivers must assume that the clock source is not enabled. * * clk_get should not be called from within interrupt context. */ struct clk *clk_get(struct device *dev, const char *id); /** * clk_bulk_get - lookup and obtain a number of references to clock producer. * @dev: device for clock "consumer" * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * This helper function allows drivers to get several clk consumers in one * operation. If any of the clk cannot be acquired then any clks * that were obtained will be freed before returning to the caller. * * Returns 0 if all clocks specified in clk_bulk_data table are obtained * successfully, or valid IS_ERR() condition containing errno. * The implementation uses @dev and @clk_bulk_data.id to determine the * clock consumer, and thereby the clock producer. * The clock returned is stored in each @clk_bulk_data.clk field. * * Drivers must assume that the clock source is not enabled. * * clk_bulk_get should not be called from within interrupt context. */ int __must_check clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks); /** * clk_bulk_get_all - lookup and obtain all available references to clock * producer. * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * * This helper function allows drivers to get all clk consumers in one * operation. If any of the clk cannot be acquired then any clks * that were obtained will be freed before returning to the caller. * * Returns a positive value for the number of clocks obtained while the * clock references are stored in the clk_bulk_data table in @clks field. * Returns 0 if there're none and a negative value if something failed. * * Drivers must assume that the clock source is not enabled. * * clk_bulk_get should not be called from within interrupt context. */ int __must_check clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** * clk_bulk_get_optional - lookup and obtain a number of references to clock producer * @dev: device for clock "consumer" * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * Behaves the same as clk_bulk_get() except where there is no clock producer. * In this case, instead of returning -ENOENT, the function returns 0 and * NULL for a clk for which a clock producer could not be determined. */ int __must_check clk_bulk_get_optional(struct device *dev, int num_clks, struct clk_bulk_data *clks); /** * devm_clk_bulk_get - managed get multiple clk consumers * @dev: device for clock "consumer" * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * Return 0 on success, an errno on failure. * * This helper function allows drivers to get several clk * consumers in one operation with management, the clks will * automatically be freed when the device is unbound. */ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks); /** * devm_clk_bulk_get_optional - managed get multiple optional consumer clocks * @dev: device for clock "consumer" * @num_clks: the number of clk_bulk_data * @clks: pointer to the clk_bulk_data table of consumer * * Behaves the same as devm_clk_bulk_get() except where there is no clock * producer. In this case, instead of returning -ENOENT, the function returns * NULL for given clk. It is assumed all clocks in clk_bulk_data are optional. * * Returns 0 if all clocks specified in clk_bulk_data table are obtained * successfully or for any clk there was no clk provider available, otherwise * returns valid IS_ERR() condition containing errno. * The implementation uses @dev and @clk_bulk_data.id to determine the * clock consumer, and thereby the clock producer. * The clock returned is stored in each @clk_bulk_data.clk field. * * Drivers must assume that the clock source is not enabled. * * clk_bulk_get should not be called from within interrupt context. */ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, struct clk_bulk_data *clks); /** * devm_clk_bulk_get_all - managed get multiple clk consumers * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * * Returns a positive value for the number of clocks obtained while the * clock references are stored in the clk_bulk_data table in @clks field. * Returns 0 if there're none and a negative value if something failed. * * This helper function allows drivers to get several clk * consumers in one operation with management, the clks will * automatically be freed when the device is unbound. */ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** * devm_clk_bulk_get_all_enabled - Get and enable all clocks of the consumer (managed) * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * * Returns a positive value for the number of clocks obtained while the * clock references are stored in the clk_bulk_data table in @clks field. * Returns 0 if there're none and a negative value if something failed. * * This helper function allows drivers to get all clocks of the * consumer and enables them in one operation with management. * The clks will automatically be disabled and freed when the device * is unbound. */ int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, struct clk_bulk_data **clks); /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. (IOW, @id may be identical strings, but * clk_get may return different clock producers depending on @dev.) * * Drivers must assume that the clock source is neither prepared nor * enabled. * * The clock will automatically be freed when the device is unbound * from the bus. */ struct clk *devm_clk_get(struct device *dev, const char *id); /** * devm_clk_get_prepared - devm_clk_get() + clk_prepare() * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. (IOW, @id may be identical strings, but * clk_get may return different clock producers depending on @dev.) * * The returned clk (if valid) is prepared. Drivers must however assume * that the clock is not enabled. * * The clock will automatically be unprepared and freed when the device * is unbound from the bus. */ struct clk *devm_clk_get_prepared(struct device *dev, const char *id); /** * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable() * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. (IOW, @id may be identical strings, but * clk_get may return different clock producers depending on @dev.) * * The returned clk (if valid) is prepared and enabled. * * The clock will automatically be disabled, unprepared and freed * when the device is unbound from the bus. */ struct clk *devm_clk_get_enabled(struct device *dev, const char *id); /** * devm_clk_get_optional - lookup and obtain a managed reference to an optional * clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. If no such clk is found, it returns NULL * which serves as a dummy clk. That's the only difference compared * to devm_clk_get(). * * Drivers must assume that the clock source is neither prepared nor * enabled. * * The clock will automatically be freed when the device is unbound * from the bus. */ struct clk *devm_clk_get_optional(struct device *dev, const char *id); /** * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare() * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. If no such clk is found, it returns NULL * which serves as a dummy clk. That's the only difference compared * to devm_clk_get_prepared(). * * The returned clk (if valid) is prepared. Drivers must however * assume that the clock is not enabled. * * The clock will automatically be unprepared and freed when the * device is unbound from the bus. */ struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); /** * devm_clk_get_optional_enabled - devm_clk_get_optional() + * clk_prepare_enable() * @dev: device for clock "consumer" * @id: clock consumer ID * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. If no such clk is found, it returns NULL * which serves as a dummy clk. That's the only difference compared * to devm_clk_get_enabled(). * * The returned clk (if valid) is prepared and enabled. * * The clock will automatically be disabled, unprepared and freed * when the device is unbound from the bus. */ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); /** * devm_clk_get_optional_enabled_with_rate - devm_clk_get_optional() + * clk_set_rate() + * clk_prepare_enable() * @dev: device for clock "consumer" * @id: clock consumer ID * @rate: new clock rate * * Context: May sleep. * * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. If no such clk is found, it returns NULL * which serves as a dummy clk. That's the only difference compared * to devm_clk_get_enabled(). * * The returned clk (if valid) is prepared and enabled and rate was set. * * The clock will automatically be disabled, unprepared and freed * when the device is unbound from the bus. */ struct clk *devm_clk_get_optional_enabled_with_rate(struct device *dev, const char *id, unsigned long rate); /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. * @dev: device for clock "consumer" * @np: pointer to clock consumer node * @con_id: clock consumer ID * * This function parses the clocks, and uses them to look up the * struct clk from the registered list of clock providers by using * @np and @con_id * * The clock will automatically be freed when the device is unbound * from the bus. */ struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id); /** * clk_enable - inform the system when the clock source should be running. * @clk: clock source * * If the clock can not be enabled/disabled, this should return success. * * May be called from atomic contexts. * * Returns success (0) or negative errno. */ int clk_enable(struct clk *clk); /** * clk_bulk_enable - inform the system when the set of clks should be running. * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * May be called from atomic contexts. * * Returns success (0) or negative errno. */ int __must_check clk_bulk_enable(int num_clks, const struct clk_bulk_data *clks); /** * clk_disable - inform the system when the clock source is no longer required. * @clk: clock source * * Inform the system that a clock source is no longer required by * a driver and may be shut down. * * May be called from atomic contexts. * * Implementation detail: if the clock source is shared between * multiple drivers, clk_enable() calls must be balanced by the * same number of clk_disable() calls for the clock source to be * disabled. */ void clk_disable(struct clk *clk); /** * clk_bulk_disable - inform the system when the set of clks is no * longer required. * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * Inform the system that a set of clks is no longer required by * a driver and may be shut down. * * May be called from atomic contexts. * * Implementation detail: if the set of clks is shared between * multiple drivers, clk_bulk_enable() calls must be balanced by the * same number of clk_bulk_disable() calls for the clock source to be * disabled. */ void clk_bulk_disable(int num_clks, const struct clk_bulk_data *clks); /** * clk_get_rate - obtain the current clock rate (in Hz) for a clock source. * This is only valid once the clock source has been enabled. * @clk: clock source */ unsigned long clk_get_rate(struct clk *clk); /** * clk_put - "free" the clock source * @clk: clock source * * Note: drivers must ensure that all clk_enable calls made on this * clock source are balanced by clk_disable calls prior to calling * this function. * * clk_put should not be called from within interrupt context. */ void clk_put(struct clk *clk); /** * clk_bulk_put - "free" the clock source * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * Note: drivers must ensure that all clk_bulk_enable calls made on this * clock source are balanced by clk_bulk_disable calls prior to calling * this function. * * clk_bulk_put should not be called from within interrupt context. */ void clk_bulk_put(int num_clks, struct clk_bulk_data *clks); /** * clk_bulk_put_all - "free" all the clock source * @num_clks: the number of clk_bulk_data * @clks: the clk_bulk_data table of consumer * * Note: drivers must ensure that all clk_bulk_enable calls made on this * clock source are balanced by clk_bulk_disable calls prior to calling * this function. * * clk_bulk_put_all should not be called from within interrupt context. */ void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks); /** * devm_clk_put - "free" a managed clock source * @dev: device used to acquire the clock * @clk: clock source acquired with devm_clk_get() * * Note: drivers must ensure that all clk_enable calls made on this * clock source are balanced by clk_disable calls prior to calling * this function. * * clk_put should not be called from within interrupt context. */ void devm_clk_put(struct device *dev, struct clk *clk); /* * The remaining APIs are optional for machine class support. */ /** * clk_round_rate - adjust a rate to the exact rate a clock can provide * @clk: clock source * @rate: desired clock rate in Hz * * This answers the question "if I were to pass @rate to clk_set_rate(), * what clock rate would I end up with?" without changing the hardware * in any way. In other words: * * rate = clk_round_rate(clk, r); * * and: * * clk_set_rate(clk, r); * rate = clk_get_rate(clk); * * are equivalent except the former does not modify the clock hardware * in any way. * * Returns rounded clock rate in Hz, or negative errno. */ long clk_round_rate(struct clk *clk, unsigned long rate); /** * clk_set_rate - set the clock rate for a clock source * @clk: clock source * @rate: desired clock rate in Hz * * Updating the rate starts at the top-most affected clock and then * walks the tree down to the bottom-most clock that needs updating. * * Returns success (0) or negative errno. */ int clk_set_rate(struct clk *clk, unsigned long rate); /** * clk_set_rate_exclusive- set the clock rate and claim exclusivity over * clock source * @clk: clock source * @rate: desired clock rate in Hz * * This helper function allows drivers to atomically set the rate of a producer * and claim exclusivity over the rate control of the producer. * * It is essentially a combination of clk_set_rate() and * clk_rate_exclusite_get(). Caller must balance this call with a call to * clk_rate_exclusive_put() * * Returns success (0) or negative errno. */ int clk_set_rate_exclusive(struct clk *clk, unsigned long rate); /** * clk_has_parent - check if a clock is a possible parent for another * @clk: clock source * @parent: parent clock source * * This function can be used in drivers that need to check that a clock can be * the parent of another without actually changing the parent. * * Returns true if @parent is a possible parent for @clk, false otherwise. */ bool clk_has_parent(const struct clk *clk, const struct clk *parent); /** * clk_set_rate_range - set a rate range for a clock source * @clk: clock source * @min: desired minimum clock rate in Hz, inclusive * @max: desired maximum clock rate in Hz, inclusive * * Returns success (0) or negative errno. */ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max); /** * clk_set_min_rate - set a minimum clock rate for a clock source * @clk: clock source * @rate: desired minimum clock rate in Hz, inclusive * * Returns success (0) or negative errno. */ int clk_set_min_rate(struct clk *clk, unsigned long rate); /** * clk_set_max_rate - set a maximum clock rate for a clock source * @clk: clock source * @rate: desired maximum clock rate in Hz, inclusive * * Returns success (0) or negative errno. */ int clk_set_max_rate(struct clk *clk, unsigned long rate); /** * clk_set_parent - set the parent clock source for this clock * @clk: clock source * @parent: parent clock source * * Returns success (0) or negative errno. */ int clk_set_parent(struct clk *clk, struct clk *parent); /** * clk_get_parent - get the parent clock source for this clock * @clk: clock source * * Returns struct clk corresponding to parent clock source, or * valid IS_ERR() condition containing errno. */ struct clk *clk_get_parent(struct clk *clk); /** * clk_get_sys - get a clock based upon the device name * @dev_id: device name * @con_id: connection ID * * Returns a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev_id and @con_id to determine the clock consumer, and * thereby the clock producer. In contrast to clk_get() this function * takes the device name instead of the device itself for identification. * * Drivers must assume that the clock source is not enabled. * * clk_get_sys should not be called from within interrupt context. */ struct clk *clk_get_sys(const char *dev_id, const char *con_id); /** * clk_save_context - save clock context for poweroff * * Saves the context of the clock register for powerstates in which the * contents of the registers will be lost. Occurs deep within the suspend * code so locking is not necessary. */ int clk_save_context(void); /** * clk_restore_context - restore clock context after poweroff * * This occurs with all clocks enabled. Occurs deep within the resume code * so locking is not necessary. */ void clk_restore_context(void); #else /* !CONFIG_HAVE_CLK */ static inline struct clk *clk_get(struct device *dev, const char *id) { return NULL; } static inline int __must_check clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { return 0; } static inline int __must_check clk_bulk_get_optional(struct device *dev, int num_clks, struct clk_bulk_data *clks) { return 0; } static inline int __must_check clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { return 0; } static inline struct clk *devm_clk_get(struct device *dev, const char *id) { return NULL; } static inline struct clk *devm_clk_get_prepared(struct device *dev, const char *id) { return NULL; } static inline struct clk *devm_clk_get_enabled(struct device *dev, const char *id) { return NULL; } static inline struct clk *devm_clk_get_optional(struct device *dev, const char *id) { return NULL; } static inline struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id) { return NULL; } static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id) { return NULL; } static inline struct clk * devm_clk_get_optional_enabled_with_rate(struct device *dev, const char *id, unsigned long rate) { return NULL; } static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { return 0; } static inline int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, struct clk_bulk_data *clks) { return 0; } static inline int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { return 0; } static inline int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, struct clk_bulk_data **clks) { return 0; } static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { return NULL; } static inline void clk_put(struct clk *clk) {} static inline void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) {} static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {} static inline void devm_clk_put(struct device *dev, struct clk *clk) {} static inline int clk_enable(struct clk *clk) { return 0; } static inline int __must_check clk_bulk_enable(int num_clks, const struct clk_bulk_data *clks) { return 0; } static inline void clk_disable(struct clk *clk) {} static inline void clk_bulk_disable(int num_clks, const struct clk_bulk_data *clks) {} static inline unsigned long clk_get_rate(struct clk *clk) { return 0; } static inline int clk_set_rate(struct clk *clk, unsigned long rate) { return 0; } static inline int clk_set_rate_exclusive(struct clk *clk, unsigned long rate) { return 0; } static inline long clk_round_rate(struct clk *clk, unsigned long rate) { return 0; } static inline bool clk_has_parent(struct clk *clk, struct clk *parent) { return true; } static inline int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) { return 0; } static inline int clk_set_min_rate(struct clk *clk, unsigned long rate) { return 0; } static inline int clk_set_max_rate(struct clk *clk, unsigned long rate) { return 0; } static inline int clk_set_parent(struct clk *clk, struct clk *parent) { return 0; } static inline struct clk *clk_get_parent(struct clk *clk) { return NULL; } static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id) { return NULL; } static inline int clk_save_context(void) { return 0; } static inline void clk_restore_context(void) {} #endif /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { int ret; ret = clk_prepare(clk); if (ret) return ret; ret = clk_enable(clk); if (ret) clk_unprepare(clk); return ret; } /* clk_disable_unprepare helps cases using clk_disable in non-atomic context. */ static inline void clk_disable_unprepare(struct clk *clk) { clk_disable(clk); clk_unprepare(clk); } static inline int __must_check clk_bulk_prepare_enable(int num_clks, const struct clk_bulk_data *clks) { int ret; ret = clk_bulk_prepare(num_clks, clks); if (ret) return ret; ret = clk_bulk_enable(num_clks, clks); if (ret) clk_bulk_unprepare(num_clks, clks); return ret; } static inline void clk_bulk_disable_unprepare(int num_clks, const struct clk_bulk_data *clks) { clk_bulk_disable(num_clks, clks); clk_bulk_unprepare(num_clks, clks); } /** * clk_drop_range - Reset any range set on that clock * @clk: clock source * * Returns success (0) or negative errno. */ static inline int clk_drop_range(struct clk *clk) { return clk_set_rate_range(clk, 0, ULONG_MAX); } /** * clk_get_optional - lookup and obtain a reference to an optional clock * producer. * @dev: device for clock "consumer" * @id: clock consumer ID * * Behaves the same as clk_get() except where there is no clock producer. In * this case, instead of returning -ENOENT, the function returns NULL. */ static inline struct clk *clk_get_optional(struct device *dev, const char *id) { struct clk *clk = clk_get(dev, id); if (clk == ERR_PTR(-ENOENT)) return NULL; return clk; } #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK) struct clk *of_clk_get(struct device_node *np, int index); struct clk *of_clk_get_by_name(struct device_node *np, const char *name); struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec); #else static inline struct clk *of_clk_get(struct device_node *np, int index) { return ERR_PTR(-ENOENT); } static inline struct clk *of_clk_get_by_name(struct device_node *np, const char *name) { return ERR_PTR(-ENOENT); } static inline struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec) { return ERR_PTR(-ENOENT); } #endif #endif
3 3 3 1 3 1 3 3 3 2 3 3 3 1 3 3 3 3 4 2 2 2 1 1 4 1 3 3 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 // SPDX-License-Identifier: GPL-2.0-or-later /* General persistent per-UID keyrings register * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/user_namespace.h> #include <linux/cred.h> #include "internal.h" unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */ /* * Create the persistent keyring register for the current user namespace. * * Called with the namespace's sem locked for writing. */ static int key_create_persistent_register(struct user_namespace *ns) { struct key *reg = keyring_alloc(".persistent_register", KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ), KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(reg)) return PTR_ERR(reg); ns->persistent_keyring_register = reg; return 0; } /* * Create the persistent keyring for the specified user. * * Called with the namespace's sem locked for writing. */ static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid, struct keyring_index_key *index_key) { struct key *persistent; key_ref_t reg_ref, persistent_ref; if (!ns->persistent_keyring_register) { long err = key_create_persistent_register(ns); if (err < 0) return ERR_PTR(err); } else { reg_ref = make_key_ref(ns->persistent_keyring_register, true); persistent_ref = find_key_to_update(reg_ref, index_key); if (persistent_ref) return persistent_ref; } persistent = keyring_alloc(index_key->description, uid, INVALID_GID, current_cred(), ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ), KEY_ALLOC_NOT_IN_QUOTA, NULL, ns->persistent_keyring_register); if (IS_ERR(persistent)) return ERR_CAST(persistent); return make_key_ref(persistent, true); } /* * Get the persistent keyring for a specific UID and link it to the nominated * keyring. */ static long key_get_persistent(struct user_namespace *ns, kuid_t uid, key_ref_t dest_ref) { struct keyring_index_key index_key; struct key *persistent; key_ref_t reg_ref, persistent_ref; char buf[32]; long ret; /* Look in the register if it exists */ memset(&index_key, 0, sizeof(index_key)); index_key.type = &key_type_keyring; index_key.description = buf; index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); key_set_index_key(&index_key); if (ns->persistent_keyring_register) { reg_ref = make_key_ref(ns->persistent_keyring_register, true); down_read(&ns->keyring_sem); persistent_ref = find_key_to_update(reg_ref, &index_key); up_read(&ns->keyring_sem); if (persistent_ref) goto found; } /* It wasn't in the register, so we'll need to create it. We might * also need to create the register. */ down_write(&ns->keyring_sem); persistent_ref = key_create_persistent(ns, uid, &index_key); up_write(&ns->keyring_sem); if (!IS_ERR(persistent_ref)) goto found; return PTR_ERR(persistent_ref); found: ret = key_task_permission(persistent_ref, current_cred(), KEY_NEED_LINK); if (ret == 0) { persistent = key_ref_to_ptr(persistent_ref); ret = key_link(key_ref_to_ptr(dest_ref), persistent); if (ret == 0) { key_set_timeout(persistent, persistent_keyring_expiry); ret = persistent->serial; } } key_ref_put(persistent_ref); return ret; } /* * Get the persistent keyring for a specific UID and link it to the nominated * keyring. */ long keyctl_get_persistent(uid_t _uid, key_serial_t destid) { struct user_namespace *ns = current_user_ns(); key_ref_t dest_ref; kuid_t uid; long ret; /* -1 indicates the current user */ if (_uid == (uid_t)-1) { uid = current_uid(); } else { uid = make_kuid(ns, _uid); if (!uid_valid(uid)) return -EINVAL; /* You can only see your own persistent cache if you're not * sufficiently privileged. */ if (!uid_eq(uid, current_uid()) && !uid_eq(uid, current_euid()) && !ns_capable(ns, CAP_SETUID)) return -EPERM; } /* There must be a destination keyring */ dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) return PTR_ERR(dest_ref); if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) { ret = -ENOTDIR; goto out_put_dest; } ret = key_get_persistent(ns, uid, dest_ref); out_put_dest: key_ref_put(dest_ref); return ret; }
3 36 7 4 46 2 2 8200 22 10 3 319 47 41 7 487 267 338 41 134 50 74 497 21 4 152 14 15 16 33 22 118 5 19 18 53 16 25 2 33 1759 169 47 120 32 45 20 62 24 28 86 9 318 1740 143 86 82 79 95 39 28 14 16 23 11 32 153 2 8 2 18 3 686 3026 1780 5261 272 212 36 47 101 21 11 729 521 40 570 3 379 232 13 1 7 6 24 20 1 2 1 5 8 5 2 7 3 2 10 25 126 6 45 16 2 1 11 7 8 9 1 16 4 1 1 22 12 5211 6 1 1 14 5 4 27 337 317 18 266 250 58 92 6589 649 34 22 660 1337 49 4981 1 5223 493 32 8 8 2 17 7 26 97 2 67 1 4 12 3 85 221 74 192 1521 19 341 17 619 61 4 1 12 6 54 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 /* SPDX-License-Identifier: GPL-2.0 */ /* * Linux Security Module Hook declarations. * * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com> * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com> * Copyright (C) 2001 James Morris <jmorris@intercode.com.au> * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) * Copyright (C) 2015 Intel Corporation. * Copyright (C) 2015 Casey Schaufler <casey@schaufler-ca.com> * Copyright (C) 2016 Mellanox Techonologies * Copyright (C) 2020 Google LLC. */ /* * The macro LSM_HOOK is used to define the data structures required by * the LSM framework using the pattern: * * LSM_HOOK(<return_type>, <default_value>, <hook_name>, args...) * * struct security_hook_heads { * #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; * #include <linux/lsm_hook_defs.h> * #undef LSM_HOOK * }; */ LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) LSM_HOOK(int, 0, binder_transaction, const struct cred *from, const struct cred *to) LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, const struct cred *to) LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, const struct cred *to, const struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) LSM_HOOK(int, 0, capget, const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_delete, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) LSM_HOOK(int, 0, sb_umount, struct vfsmount *mnt, int flags) LSM_HOOK(int, 0, sb_pivotroot, const struct path *old_path, const struct path *new_path) LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts, unsigned long kern_flags, unsigned long *set_kern_flags) LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, const struct qstr *name, const struct cred *old, struct cred *new) #ifdef CONFIG_SECURITY_PATH LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, umode_t mode) LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap, struct dentry *dentry) LSM_HOOK(int, 0, path_truncate, const struct path *path) LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, const char *old_name) LSM_HOOK(int, 0, path_link, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) LSM_HOOK(int, 0, path_rename, const struct path *old_dir, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry, unsigned int flags) LSM_HOOK(int, 0, path_chmod, const struct path *path, umode_t mode) LSM_HOOK(int, 0, path_chown, const struct path *path, kuid_t uid, kgid_t gid) LSM_HOOK(int, 0, path_chroot, const struct path *path) #endif /* CONFIG_SECURITY_PATH */ /* Needed for inode based security check */ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security) LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count) LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, umode_t mode) LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap, struct inode *inode) LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) LSM_HOOK(int, 0, inode_symlink, struct inode *dir, struct dentry *dentry, const char *old_name) LSM_HOOK(int, 0, inode_mkdir, struct inode *dir, struct dentry *dentry, umode_t mode) LSM_HOOK(int, 0, inode_rmdir, struct inode *dir, struct dentry *dentry) LSM_HOOK(int, 0, inode_mknod, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) LSM_HOOK(int, 0, inode_rename, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, bool rcu) LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) LSM_HOOK(int, 0, inode_xattr_skipcap, const char *name) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_file_setattr, struct dentry *dentry, struct file_kattr *fa) LSM_HOOK(int, 0, inode_file_getattr, struct dentry *dentry, struct file_kattr *fa) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc) LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode, struct lsm_prop *prop) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode, enum lsm_integrity_type type, const void *value, size_t size) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd) LSM_HOOK(int, 0, file_fcntl, struct file *file, unsigned int cmd, unsigned long arg) LSM_HOOK(void, LSM_RET_VOID, file_set_fowner, struct file *file) LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, u64 clone_flags) LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp) LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred) LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, gfp_t gfp) LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, const struct cred *old) LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c, struct lsm_prop *prop) LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id, bool contents) LSM_HOOK(int, 0, kernel_post_load_data, char *buf, loff_t size, enum kernel_load_data_id id, char *description) LSM_HOOK(int, 0, kernel_read_file, struct file *file, enum kernel_read_file_id id, bool contents) LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, loff_t size, enum kernel_read_file_id id) LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, int flags) LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, int flags) LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop) LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj, struct task_struct *p, struct lsm_prop *prop) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) LSM_HOOK(int, 0, task_prlimit, const struct cred *cred, const struct cred *tcred, unsigned int flags) LSM_HOOK(int, 0, task_setrlimit, struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) LSM_HOOK(int, 0, task_setscheduler, struct task_struct *p) LSM_HOOK(int, 0, task_getscheduler, struct task_struct *p) LSM_HOOK(int, 0, task_movememory, struct task_struct *p) LSM_HOOK(int, 0, task_kill, struct task_struct *p, struct kernel_siginfo *info, int sig, const struct cred *cred) LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp, struct lsm_prop *prop) LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) LSM_HOOK(void, LSM_RET_VOID, msg_queue_free_security, struct kern_ipc_perm *perm) LSM_HOOK(int, 0, msg_queue_associate, struct kern_ipc_perm *perm, int msqflg) LSM_HOOK(int, 0, msg_queue_msgctl, struct kern_ipc_perm *perm, int cmd) LSM_HOOK(int, 0, msg_queue_msgsnd, struct kern_ipc_perm *perm, struct msg_msg *msg, int msqflg) LSM_HOOK(int, 0, msg_queue_msgrcv, struct kern_ipc_perm *perm, struct msg_msg *msg, struct task_struct *target, long type, int mode) LSM_HOOK(int, 0, shm_alloc_security, struct kern_ipc_perm *perm) LSM_HOOK(void, LSM_RET_VOID, shm_free_security, struct kern_ipc_perm *perm) LSM_HOOK(int, 0, shm_associate, struct kern_ipc_perm *perm, int shmflg) LSM_HOOK(int, 0, shm_shmctl, struct kern_ipc_perm *perm, int cmd) LSM_HOOK(int, 0, shm_shmat, struct kern_ipc_perm *perm, char __user *shmaddr, int shmflg) LSM_HOOK(int, 0, sem_alloc_security, struct kern_ipc_perm *perm) LSM_HOOK(void, LSM_RET_VOID, sem_free_security, struct kern_ipc_perm *perm) LSM_HOOK(int, 0, sem_associate, struct kern_ipc_perm *perm, int semflg) LSM_HOOK(int, 0, sem_semctl, struct kern_ipc_perm *perm, int cmd) LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, unsigned nsops, int alter) LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr, struct lsm_ctx __user *ctx, u32 *size, u32 flags) LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr, struct lsm_ctx *ctx, u32 size, u32 flags) LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp) LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, struct lsm_context *cp) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, struct lsm_context *cp) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, const struct cred *cred, struct watch_notification *n) #endif /* CONFIG_SECURITY && CONFIG_WATCH_QUEUE */ #if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS) LSM_HOOK(int, 0, watch_key, struct key *key) #endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */ #ifdef CONFIG_SECURITY_NETWORK LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, struct sock *newsk) LSM_HOOK(int, 0, unix_may_send, struct socket *sock, struct socket *other) LSM_HOOK(int, 0, socket_create, int family, int type, int protocol, int kern) LSM_HOOK(int, 0, socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) LSM_HOOK(int, 0, socket_socketpair, struct socket *socka, struct socket *sockb) LSM_HOOK(int, 0, socket_bind, struct socket *sock, struct sockaddr *address, int addrlen) LSM_HOOK(int, 0, socket_connect, struct socket *sock, struct sockaddr *address, int addrlen) LSM_HOOK(int, 0, socket_listen, struct socket *sock, int backlog) LSM_HOOK(int, 0, socket_accept, struct socket *sock, struct socket *newsock) LSM_HOOK(int, 0, socket_sendmsg, struct socket *sock, struct msghdr *msg, int size) LSM_HOOK(int, 0, socket_recvmsg, struct socket *sock, struct msghdr *msg, int size, int flags) LSM_HOOK(int, 0, socket_getsockname, struct socket *sock) LSM_HOOK(int, 0, socket_getpeername, struct socket *sock) LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, struct sock *newsk) LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, const struct sock *sk, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb, struct request_sock *req) LSM_HOOK(void, LSM_RET_VOID, inet_csk_clone, struct sock *newsk, const struct request_sock *req) LSM_HOOK(void, LSM_RET_VOID, inet_conn_established, struct sock *sk, struct sk_buff *skb) LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, struct flowi_common *flic) LSM_HOOK(int, 0, tun_dev_alloc_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) LSM_HOOK(int, 0, tun_dev_open, void *security) LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_association *asoc, struct sk_buff *skb) LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, struct sk_buff *skb) LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, u8 port_num) LSM_HOOK(int, 0, ib_alloc_security, void *sec) #endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM LSM_HOOK(int, 0, xfrm_policy_alloc_security, struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp) LSM_HOOK(int, 0, xfrm_policy_clone_security, struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx) LSM_HOOK(void, LSM_RET_VOID, xfrm_policy_free_security, struct xfrm_sec_ctx *ctx) LSM_HOOK(int, 0, xfrm_policy_delete_security, struct xfrm_sec_ctx *ctx) LSM_HOOK(int, 0, xfrm_state_alloc, struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) LSM_HOOK(int, 0, xfrm_state_alloc_acquire, struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid) LSM_HOOK(void, LSM_RET_VOID, xfrm_state_free_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid) LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, struct xfrm_policy *xp, const struct flowi_common *flic) LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, int ckall) #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ #ifdef CONFIG_KEYS LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, struct key *key, const void *payload, size_t payload_len, unsigned long flags, bool create) #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op, void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ #ifdef CONFIG_BPF_SYSCALL LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, const struct path *path) LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ #ifdef CONFIG_IO_URING LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) LSM_HOOK(int, 0, uring_allowed, void) #endif /* CONFIG_IO_URING */ LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev) LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev) LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev, enum lsm_integrity_type type, const void *value, size_t size)
3 3 3 3 3 1 1 2 2 2 3 3 3 3 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hpfs/buffer.c * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * general buffer i/o */ #include <linux/sched.h> #include <linux/slab.h> #include <linux/blkdev.h> #include "hpfs_fn.h" secno hpfs_search_hotfix_map(struct super_block *s, secno sec) { unsigned i; struct hpfs_sb_info *sbi = hpfs_sb(s); for (i = 0; unlikely(i < sbi->n_hotfixes); i++) { if (sbi->hotfix_from[i] == sec) { return sbi->hotfix_to[i]; } } return sec; } unsigned hpfs_search_hotfix_map_for_range(struct super_block *s, secno sec, unsigned n) { unsigned i; struct hpfs_sb_info *sbi = hpfs_sb(s); for (i = 0; unlikely(i < sbi->n_hotfixes); i++) { if (sbi->hotfix_from[i] >= sec && sbi->hotfix_from[i] < sec + n) { n = sbi->hotfix_from[i] - sec; } } return n; } void hpfs_prefetch_sectors(struct super_block *s, unsigned secno, int n) { struct buffer_head *bh; struct blk_plug plug; if (n <= 0 || unlikely(secno >= hpfs_sb(s)->sb_fs_size)) return; if (unlikely(hpfs_search_hotfix_map_for_range(s, secno, n) != n)) return; bh = sb_find_get_block(s, secno); if (bh) { if (buffer_uptodate(bh)) { brelse(bh); return; } brelse(bh); } blk_start_plug(&plug); while (n > 0) { if (unlikely(secno >= hpfs_sb(s)->sb_fs_size)) break; sb_breadahead(s, secno); secno++; n--; } blk_finish_plug(&plug); } /* Map a sector into a buffer and return pointers to it and to the buffer. */ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, int ahead) { struct buffer_head *bh; hpfs_lock_assert(s); hpfs_prefetch_sectors(s, secno, ahead); cond_resched(); *bhp = bh = sb_bread(s, hpfs_search_hotfix_map(s, secno)); if (bh != NULL) return bh->b_data; else { pr_err("%s(): read error\n", __func__); return NULL; } } /* Like hpfs_map_sector but don't read anything */ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp) { struct buffer_head *bh; /*return hpfs_map_sector(s, secno, bhp, 0);*/ hpfs_lock_assert(s); cond_resched(); if ((*bhp = bh = sb_getblk(s, hpfs_search_hotfix_map(s, secno))) != NULL) { if (!buffer_uptodate(bh)) wait_on_buffer(bh); set_buffer_uptodate(bh); return bh->b_data; } else { pr_err("%s(): getblk failed\n", __func__); return NULL; } } /* Map 4 sectors into a 4buffer and return pointers to it and to the buffer. */ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffer_head *qbh, int ahead) { char *data; hpfs_lock_assert(s); cond_resched(); if (secno & 3) { pr_err("%s(): unaligned read\n", __func__); return NULL; } hpfs_prefetch_sectors(s, secno, 4 + ahead); if (!hpfs_map_sector(s, secno + 0, &qbh->bh[0], 0)) goto bail0; if (!hpfs_map_sector(s, secno + 1, &qbh->bh[1], 0)) goto bail1; if (!hpfs_map_sector(s, secno + 2, &qbh->bh[2], 0)) goto bail2; if (!hpfs_map_sector(s, secno + 3, &qbh->bh[3], 0)) goto bail3; if (likely(qbh->bh[1]->b_data == qbh->bh[0]->b_data + 1 * 512) && likely(qbh->bh[2]->b_data == qbh->bh[0]->b_data + 2 * 512) && likely(qbh->bh[3]->b_data == qbh->bh[0]->b_data + 3 * 512)) { return qbh->data = qbh->bh[0]->b_data; } qbh->data = data = kmalloc(2048, GFP_NOFS); if (!data) { pr_err("%s(): out of memory\n", __func__); goto bail4; } memcpy(data + 0 * 512, qbh->bh[0]->b_data, 512); memcpy(data + 1 * 512, qbh->bh[1]->b_data, 512); memcpy(data + 2 * 512, qbh->bh[2]->b_data, 512); memcpy(data + 3 * 512, qbh->bh[3]->b_data, 512); return data; bail4: brelse(qbh->bh[3]); bail3: brelse(qbh->bh[2]); bail2: brelse(qbh->bh[1]); bail1: brelse(qbh->bh[0]); bail0: return NULL; } /* Don't read sectors */ void *hpfs_get_4sectors(struct super_block *s, unsigned secno, struct quad_buffer_head *qbh) { cond_resched(); hpfs_lock_assert(s); if (secno & 3) { pr_err("%s(): unaligned read\n", __func__); return NULL; } if (!hpfs_get_sector(s, secno + 0, &qbh->bh[0])) goto bail0; if (!hpfs_get_sector(s, secno + 1, &qbh->bh[1])) goto bail1; if (!hpfs_get_sector(s, secno + 2, &qbh->bh[2])) goto bail2; if (!hpfs_get_sector(s, secno + 3, &qbh->bh[3])) goto bail3; if (likely(qbh->bh[1]->b_data == qbh->bh[0]->b_data + 1 * 512) && likely(qbh->bh[2]->b_data == qbh->bh[0]->b_data + 2 * 512) && likely(qbh->bh[3]->b_data == qbh->bh[0]->b_data + 3 * 512)) { return qbh->data = qbh->bh[0]->b_data; } if (!(qbh->data = kmalloc(2048, GFP_NOFS))) { pr_err("%s(): out of memory\n", __func__); goto bail4; } return qbh->data; bail4: brelse(qbh->bh[3]); bail3: brelse(qbh->bh[2]); bail2: brelse(qbh->bh[1]); bail1: brelse(qbh->bh[0]); bail0: return NULL; } void hpfs_brelse4(struct quad_buffer_head *qbh) { if (unlikely(qbh->data != qbh->bh[0]->b_data)) kfree(qbh->data); brelse(qbh->bh[0]); brelse(qbh->bh[1]); brelse(qbh->bh[2]); brelse(qbh->bh[3]); } void hpfs_mark_4buffers_dirty(struct quad_buffer_head *qbh) { if (unlikely(qbh->data != qbh->bh[0]->b_data)) { memcpy(qbh->bh[0]->b_data, qbh->data + 0 * 512, 512); memcpy(qbh->bh[1]->b_data, qbh->data + 1 * 512, 512); memcpy(qbh->bh[2]->b_data, qbh->data + 2 * 512, 512); memcpy(qbh->bh[3]->b_data, qbh->data + 3 * 512, 512); } mark_buffer_dirty(qbh->bh[0]); mark_buffer_dirty(qbh->bh[1]); mark_buffer_dirty(qbh->bh[2]); mark_buffer_dirty(qbh->bh[3]); }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/fs.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/iversion.h> #include "super.h" #include "mds_client.h" #include <linux/ceph/decode.h> /* unused map expires after 5 minutes */ #define CEPH_SNAPID_MAP_TIMEOUT (5 * 60 * HZ) /* * Snapshots in ceph are driven in large part by cooperation from the * client. In contrast to local file systems or file servers that * implement snapshots at a single point in the system, ceph's * distributed access to storage requires clients to help decide * whether a write logically occurs before or after a recently created * snapshot. * * This provides a perfect instantanous client-wide snapshot. Between * clients, however, snapshots may appear to be applied at slightly * different points in time, depending on delays in delivering the * snapshot notification. * * Snapshots are _not_ file system-wide. Instead, each snapshot * applies to the subdirectory nested beneath some directory. This * effectively divides the hierarchy into multiple "realms," where all * of the files contained by each realm share the same set of * snapshots. An individual realm's snap set contains snapshots * explicitly created on that realm, as well as any snaps in its * parent's snap set _after_ the point at which the parent became it's * parent (due to, say, a rename). Similarly, snaps from prior parents * during the time intervals during which they were the parent are included. * * The client is spared most of this detail, fortunately... it must only * maintains a hierarchy of realms reflecting the current parent/child * realm relationship, and for each realm has an explicit list of snaps * inherited from prior parents. * * A snap_realm struct is maintained for realms containing every inode * with an open cap in the system. (The needed snap realm information is * provided by the MDS whenever a cap is issued, i.e., on open.) A 'seq' * version number is used to ensure that as realm parameters change (new * snapshot, new parent, etc.) the client's realm hierarchy is updated. * * The realm hierarchy drives the generation of a 'snap context' for each * realm, which simply lists the resulting set of snaps for the realm. This * is attached to any writes sent to OSDs. */ /* * Unfortunately error handling is a bit mixed here. If we get a snap * update, but don't have enough memory to update our realm hierarchy, * it's not clear what we can do about it (besides complaining to the * console). */ /* * increase ref count for the realm * * caller must hold snap_rwsem. */ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { lockdep_assert_held(&mdsc->snap_rwsem); /* * The 0->1 and 1->0 transitions must take the snap_empty_lock * atomically with the refcount change. Go ahead and bump the * nref here, unless it's 0, in which case we take the spinlock * and then do the increment and remove it from the list. */ if (atomic_inc_not_zero(&realm->nref)) return; spin_lock(&mdsc->snap_empty_lock); if (atomic_inc_return(&realm->nref) == 1) list_del_init(&realm->empty_item); spin_unlock(&mdsc->snap_empty_lock); } static void __insert_snap_realm(struct rb_root *root, struct ceph_snap_realm *new) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct ceph_snap_realm *r = NULL; while (*p) { parent = *p; r = rb_entry(parent, struct ceph_snap_realm, node); if (new->ino < r->ino) p = &(*p)->rb_left; else if (new->ino > r->ino) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, root); } /* * create and get the realm rooted at @ino and bump its ref count. * * caller must hold snap_rwsem for write. */ static struct ceph_snap_realm *ceph_create_snap_realm( struct ceph_mds_client *mdsc, u64 ino) { struct ceph_snap_realm *realm; lockdep_assert_held_write(&mdsc->snap_rwsem); realm = kzalloc(sizeof(*realm), GFP_NOFS); if (!realm) return ERR_PTR(-ENOMEM); /* Do not release the global dummy snaprealm until unmouting */ if (ino == CEPH_INO_GLOBAL_SNAPREALM) atomic_set(&realm->nref, 2); else atomic_set(&realm->nref, 1); realm->ino = ino; INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); INIT_LIST_HEAD(&realm->dirty_item); INIT_LIST_HEAD(&realm->rebuild_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); mdsc->num_snap_realms++; doutc(mdsc->fsc->client, "%llx %p\n", realm->ino, realm); return realm; } /* * lookup the realm rooted at @ino. * * caller must hold snap_rwsem. */ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { struct ceph_client *cl = mdsc->fsc->client; struct rb_node *n = mdsc->snap_realms.rb_node; struct ceph_snap_realm *r; lockdep_assert_held(&mdsc->snap_rwsem); while (n) { r = rb_entry(n, struct ceph_snap_realm, node); if (ino < r->ino) n = n->rb_left; else if (ino > r->ino) n = n->rb_right; else { doutc(cl, "%llx %p\n", r->ino, r); return r; } } return NULL; } struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { struct ceph_snap_realm *r; r = __lookup_snap_realm(mdsc, ino); if (r) ceph_get_snap_realm(mdsc, r); return r; } static void __put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm); /* * called with snap_rwsem (write) */ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { struct ceph_client *cl = mdsc->fsc->client; lockdep_assert_held_write(&mdsc->snap_rwsem); doutc(cl, "%p %llx\n", realm, realm->ino); rb_erase(&realm->node, &mdsc->snap_realms); mdsc->num_snap_realms--; if (realm->parent) { list_del_init(&realm->child_item); __put_snap_realm(mdsc, realm->parent); } kfree(realm->prior_parent_snaps); kfree(realm->snaps); ceph_put_snap_context(realm->cached_context); kfree(realm); } /* * caller holds snap_rwsem (write) */ static void __put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { lockdep_assert_held_write(&mdsc->snap_rwsem); /* * We do not require the snap_empty_lock here, as any caller that * increments the value must hold the snap_rwsem. */ if (atomic_dec_and_test(&realm->nref)) __destroy_snap_realm(mdsc, realm); } /* * See comments in ceph_get_snap_realm. Caller needn't hold any locks. */ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock)) return; if (down_write_trylock(&mdsc->snap_rwsem)) { spin_unlock(&mdsc->snap_empty_lock); __destroy_snap_realm(mdsc, realm); up_write(&mdsc->snap_rwsem); } else { list_add(&realm->empty_item, &mdsc->snap_empty); spin_unlock(&mdsc->snap_empty_lock); } } /* * Clean up any realms whose ref counts have dropped to zero. Note * that this does not include realms who were created but not yet * used. * * Called under snap_rwsem (write) */ static void __cleanup_empty_realms(struct ceph_mds_client *mdsc) { struct ceph_snap_realm *realm; lockdep_assert_held_write(&mdsc->snap_rwsem); spin_lock(&mdsc->snap_empty_lock); while (!list_empty(&mdsc->snap_empty)) { realm = list_first_entry(&mdsc->snap_empty, struct ceph_snap_realm, empty_item); list_del(&realm->empty_item); spin_unlock(&mdsc->snap_empty_lock); __destroy_snap_realm(mdsc, realm); spin_lock(&mdsc->snap_empty_lock); } spin_unlock(&mdsc->snap_empty_lock); } void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc) { struct ceph_snap_realm *global_realm; down_write(&mdsc->snap_rwsem); global_realm = __lookup_snap_realm(mdsc, CEPH_INO_GLOBAL_SNAPREALM); if (global_realm) ceph_put_snap_realm(mdsc, global_realm); __cleanup_empty_realms(mdsc); up_write(&mdsc->snap_rwsem); } /* * adjust the parent realm of a given @realm. adjust child list, and parent * pointers, and ref counts appropriately. * * return true if parent was changed, 0 if unchanged, <0 on error. * * caller must hold snap_rwsem for write. */ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, u64 parentino) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent; lockdep_assert_held_write(&mdsc->snap_rwsem); if (realm->parent_ino == parentino) return 0; parent = ceph_lookup_snap_realm(mdsc, parentino); if (!parent) { parent = ceph_create_snap_realm(mdsc, parentino); if (IS_ERR(parent)) return PTR_ERR(parent); } doutc(cl, "%llx %p: %llx %p -> %llx %p\n", realm->ino, realm, realm->parent_ino, realm->parent, parentino, parent); if (realm->parent) { list_del_init(&realm->child_item); ceph_put_snap_realm(mdsc, realm->parent); } realm->parent_ino = parentino; realm->parent = parent; list_add(&realm->child_item, &parent->children); return 1; } static int cmpu64_rev(const void *a, const void *b) { if (*(u64 *)a < *(u64 *)b) return 1; if (*(u64 *)a > *(u64 *)b) return -1; return 0; } /* * build the snap context for a given realm. */ static int build_snap_context(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, struct list_head *realm_queue, struct list_head *dirty_realms) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; int err = 0; u32 num = realm->num_prior_parent_snaps + realm->num_snaps; /* * build parent context, if it hasn't been built. * conservatively estimate that all parent snaps might be * included by us. */ if (parent) { if (!parent->cached_context) { /* add to the queue head */ list_add(&parent->rebuild_item, realm_queue); return 1; } num += parent->cached_context->num_snaps; } /* do i actually need to update? not if my context seq matches realm seq, and my parents' does to. (this works because we rebuild_snap_realms() works _downward_ in hierarchy after each update.) */ if (realm->cached_context && realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { doutc(cl, "%llx %p: %p seq %lld (%u snaps) (unchanged)\n", realm->ino, realm, realm->cached_context, realm->cached_context->seq, (unsigned int)realm->cached_context->num_snaps); return 0; } /* alloc new snap context */ err = -ENOMEM; if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) goto fail; snapc = ceph_create_snap_context(num, GFP_NOFS); if (!snapc) goto fail; /* build (reverse sorted) snap vector */ num = 0; snapc->seq = realm->seq; if (parent) { u32 i; /* include any of parent's snaps occurring _after_ my parent became my parent */ for (i = 0; i < parent->cached_context->num_snaps; i++) if (parent->cached_context->snaps[i] >= realm->parent_since) snapc->snaps[num++] = parent->cached_context->snaps[i]; if (parent->cached_context->seq > snapc->seq) snapc->seq = parent->cached_context->seq; } memcpy(snapc->snaps + num, realm->snaps, sizeof(u64)*realm->num_snaps); num += realm->num_snaps; memcpy(snapc->snaps + num, realm->prior_parent_snaps, sizeof(u64)*realm->num_prior_parent_snaps); num += realm->num_prior_parent_snaps; sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; doutc(cl, "%llx %p: %p seq %lld (%u snaps)\n", realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; /* queue realm for cap_snap creation */ list_add_tail(&realm->dirty_item, dirty_realms); return 0; fail: /* * if we fail, clear old (incorrect) cached_context... hopefully * we'll have better luck building it later */ if (realm->cached_context) { ceph_put_snap_context(realm->cached_context); realm->cached_context = NULL; } pr_err_client(cl, "%llx %p fail %d\n", realm->ino, realm, err); return err; } /* * rebuild snap context for the given realm and all of its children. */ static void rebuild_snap_realms(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, struct list_head *dirty_realms) { struct ceph_client *cl = mdsc->fsc->client; LIST_HEAD(realm_queue); int last = 0; bool skip = false; list_add_tail(&realm->rebuild_item, &realm_queue); while (!list_empty(&realm_queue)) { struct ceph_snap_realm *_realm, *child; _realm = list_first_entry(&realm_queue, struct ceph_snap_realm, rebuild_item); /* * If the last building failed dues to memory * issue, just empty the realm_queue and return * to avoid infinite loop. */ if (last < 0) { list_del_init(&_realm->rebuild_item); continue; } last = build_snap_context(mdsc, _realm, &realm_queue, dirty_realms); doutc(cl, "%llx %p, %s\n", realm->ino, realm, last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); /* is any child in the list ? */ list_for_each_entry(child, &_realm->children, child_item) { if (!list_empty(&child->rebuild_item)) { skip = true; break; } } if (!skip) { list_for_each_entry(child, &_realm->children, child_item) list_add_tail(&child->rebuild_item, &realm_queue); } /* last == 1 means need to build parent first */ if (last <= 0) list_del_init(&_realm->rebuild_item); } } /* * helper to allocate and decode an array of snapids. free prior * instance, if any. */ static int dup_array(u64 **dst, __le64 *src, u32 num) { u32 i; kfree(*dst); if (num) { *dst = kcalloc(num, sizeof(u64), GFP_NOFS); if (!*dst) return -ENOMEM; for (i = 0; i < num; i++) (*dst)[i] = get_unaligned_le64(src + i); } else { *dst = NULL; } return 0; } static bool has_new_snaps(struct ceph_snap_context *o, struct ceph_snap_context *n) { if (n->num_snaps == 0) return false; /* snaps are in descending order */ return n->snaps[0] > o->seq; } /* * When a snapshot is applied, the size/mtime inode metadata is queued * in a ceph_cap_snap (one for each snapshot) until writeback * completes and the metadata can be flushed back to the MDS. * * However, if a (sync) write is currently in-progress when we apply * the snapshot, we have to wait until the write succeeds or fails * (and a final size/mtime is known). In this case the * cap_snap->writing = 1, and is said to be "pending." When the write * finishes, we __ceph_finish_cap_snap(). * * Caller must hold snap_rwsem for read (i.e., the realm topology won't * change). */ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap **pcapsnap) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *old_snapc, *new_snapc; struct ceph_cap_snap *capsnap = *pcapsnap; struct ceph_buffer *old_blob = NULL; int used, dirty; spin_lock(&ci->i_ceph_lock); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); old_snapc = ci->i_head_snapc; new_snapc = ci->i_snap_realm->cached_context; /* * If there is a write in progress, treat that as a dirty Fw, * even though it hasn't completed yet; by the time we finish * up this capsnap it will be. */ if (used & CEPH_CAP_FILE_WR) dirty |= CEPH_CAP_FILE_WR; if (__ceph_have_pending_cap_snap(ci)) { /* there is no point in queuing multiple "pending" cap_snaps, as no new writes are allowed to start when pending, so any writes in progress now were started before the previous cap_snap. lucky us. */ doutc(cl, "%p %llx.%llx already pending\n", inode, ceph_vinop(inode)); goto update_snapc; } if (ci->i_wrbuffer_ref_head == 0 && !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { doutc(cl, "%p %llx.%llx nothing dirty|writing\n", inode, ceph_vinop(inode)); goto update_snapc; } BUG_ON(!old_snapc); /* * There is no need to send FLUSHSNAP message to MDS if there is * no new snapshot. But when there is dirty pages or on-going * writes, we still need to create cap_snap. cap_snap is needed * by the write path and page writeback path. * * also see ceph_try_drop_cap_snap() */ if (has_new_snaps(old_snapc, new_snapc)) { if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR)) capsnap->need_flush = true; } else { if (!(used & CEPH_CAP_FILE_WR) && ci->i_wrbuffer_ref_head == 0) { doutc(cl, "%p %llx.%llx no new_snap|dirty_page|writing\n", inode, ceph_vinop(inode)); goto update_snapc; } } doutc(cl, "%p %llx.%llx cap_snap %p queuing under %p %s %s\n", inode, ceph_vinop(inode), capsnap, old_snapc, ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); capsnap->follows = old_snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; capsnap->mode = inode->i_mode; capsnap->uid = inode->i_uid; capsnap->gid = inode->i_gid; if (dirty & CEPH_CAP_XATTR_EXCL) { old_blob = __ceph_build_xattrs_blob(ci); capsnap->xattr_blob = ceph_buffer_get(ci->i_xattrs.blob); capsnap->xattr_version = ci->i_xattrs.version; } else { capsnap->xattr_blob = NULL; capsnap->xattr_version = 0; } capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; /* dirty page count moved from _head to this cap_snap; all subsequent writes page dirties occur _after_ this snapshot. */ capsnap->dirty_pages = ci->i_wrbuffer_ref_head; ci->i_wrbuffer_ref_head = 0; capsnap->context = old_snapc; list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); if (used & CEPH_CAP_FILE_WR) { doutc(cl, "%p %llx.%llx cap_snap %p snapc %p seq %llu used WR," " now pending\n", inode, ceph_vinop(inode), capsnap, old_snapc, old_snapc->seq); capsnap->writing = 1; } else { /* note mtime, size NOW. */ __ceph_finish_cap_snap(ci, capsnap); } *pcapsnap = NULL; old_snapc = NULL; update_snapc: if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { ci->i_head_snapc = NULL; } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); doutc(cl, " new snapc is %p\n", new_snapc); } spin_unlock(&ci->i_ceph_lock); ceph_buffer_put(old_blob); ceph_put_snap_context(old_snapc); } /* * Finalize the size, mtime for a cap_snap.. that is, settle on final values * to be used for the snapshot, to be flushed back to the mds. * * If capsnap can now be flushed, add to snap_flush list, and return 1. * * Caller must hold i_ceph_lock. */ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_client *cl = mdsc->fsc->client; BUG_ON(capsnap->writing); capsnap->size = i_size_read(inode); capsnap->mtime = inode_get_mtime(inode); capsnap->atime = inode_get_atime(inode); capsnap->ctime = inode_get_ctime(inode); capsnap->btime = ci->i_btime; capsnap->change_attr = inode_peek_iversion_raw(inode); capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; if (capsnap->dirty_pages) { doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " "s=%llu still has %d dirty pages\n", inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size, capsnap->dirty_pages); return 0; } /* * Defer flushing the capsnap if the dirty buffer not flushed yet. * And trigger to flush the buffer immediately. */ if (ci->i_wrbuffer_ref) { doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " "s=%llu used WRBUFFER, delaying\n", inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size); ceph_queue_writeback(inode); return 0; } ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size); spin_lock(&mdsc->snap_flush_lock); if (list_empty(&ci->i_snap_flush_item)) { ihold(inode); list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); } spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } /* * Queue cap_snaps for snap writeback for this realm and its children. * Called under snap_rwsem, so realm topology won't change. */ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *lastinode = NULL; struct ceph_cap_snap *capsnap = NULL; doutc(cl, "%p %llx inode\n", realm, realm->ino); spin_lock(&realm->inodes_with_caps_lock); list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { struct inode *inode = igrab(&ci->netfs.inode); if (!inode) continue; spin_unlock(&realm->inodes_with_caps_lock); iput(lastinode); lastinode = inode; /* * Allocate the capsnap memory outside of ceph_queue_cap_snap() * to reduce very possible but unnecessary frequently memory * allocate/free in this loop. */ if (!capsnap) { capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS); if (!capsnap) { pr_err_client(cl, "ENOMEM allocating ceph_cap_snap on %p\n", inode); return; } } capsnap->cap_flush.is_capsnap = true; refcount_set(&capsnap->nref, 1); INIT_LIST_HEAD(&capsnap->cap_flush.i_list); INIT_LIST_HEAD(&capsnap->cap_flush.g_list); INIT_LIST_HEAD(&capsnap->ci_item); ceph_queue_cap_snap(ci, &capsnap); spin_lock(&realm->inodes_with_caps_lock); } spin_unlock(&realm->inodes_with_caps_lock); iput(lastinode); if (capsnap) kmem_cache_free(ceph_cap_snap_cachep, capsnap); doutc(cl, "%p %llx done\n", realm, realm->ino); } /* * Parse and apply a snapblob "snap trace" from the MDS. This specifies * the snap realm parameters from a given realm and all of its ancestors, * up to the root. * * Caller must hold snap_rwsem for write. */ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, void *p, void *e, bool deletion, struct ceph_snap_realm **realm_ret) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; struct ceph_client *client = mdsc->fsc->client; int rebuild_snapcs; int err = -ENOMEM; int ret; LIST_HEAD(dirty_realms); lockdep_assert_held_write(&mdsc->snap_rwsem); doutc(cl, "deletion=%d\n", deletion); more: realm = NULL; rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; p += sizeof(*ri); ceph_decode_need(&p, e, sizeof(u64)*(le32_to_cpu(ri->num_snaps) + le32_to_cpu(ri->num_prior_parent_snaps)), bad); snaps = p; p += sizeof(u64) * le32_to_cpu(ri->num_snaps); prior_parent_snaps = p; p += sizeof(u64) * le32_to_cpu(ri->num_prior_parent_snaps); realm = ceph_lookup_snap_realm(mdsc, le64_to_cpu(ri->ino)); if (!realm) { realm = ceph_create_snap_realm(mdsc, le64_to_cpu(ri->ino)); if (IS_ERR(realm)) { err = PTR_ERR(realm); goto fail; } } /* ensure the parent is correct */ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) goto fail; rebuild_snapcs += err; if (le64_to_cpu(ri->seq) > realm->seq) { doutc(cl, "updating %llx %p %lld -> %lld\n", realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); realm->parent_since = le64_to_cpu(ri->parent_since); realm->num_snaps = le32_to_cpu(ri->num_snaps); err = dup_array(&realm->snaps, snaps, realm->num_snaps); if (err < 0) goto fail; realm->num_prior_parent_snaps = le32_to_cpu(ri->num_prior_parent_snaps); err = dup_array(&realm->prior_parent_snaps, prior_parent_snaps, realm->num_prior_parent_snaps); if (err < 0) goto fail; if (realm->seq > mdsc->last_snap_seq) mdsc->last_snap_seq = realm->seq; rebuild_snapcs = 1; } else if (!realm->cached_context) { doutc(cl, "%llx %p seq %lld new\n", realm->ino, realm, realm->seq); rebuild_snapcs = 1; } else { doutc(cl, "%llx %p seq %lld unchanged\n", realm->ino, realm, realm->seq); } doutc(cl, "done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, realm, rebuild_snapcs, p, e); /* * this will always track the uppest parent realm from which * we need to rebuild the snapshot contexts _downward_ in * hierarchy. */ if (rebuild_snapcs) realm_to_rebuild = realm; /* rebuild_snapcs when we reach the _end_ (root) of the trace */ if (realm_to_rebuild && p >= e) rebuild_snap_realms(mdsc, realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; else ceph_put_snap_realm(mdsc, realm); if (p < e) goto more; /* * queue cap snaps _after_ we've built the new snap contexts, * so that i_head_snapc can be set appropriately. */ while (!list_empty(&dirty_realms)) { realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, dirty_item); list_del_init(&realm->dirty_item); queue_realm_cap_snaps(mdsc, realm); } if (realm_ret) *realm_ret = first_realm; else ceph_put_snap_realm(mdsc, first_realm); __cleanup_empty_realms(mdsc); return 0; bad: err = -EIO; fail: if (realm && !IS_ERR(realm)) ceph_put_snap_realm(mdsc, realm); if (first_realm) ceph_put_snap_realm(mdsc, first_realm); pr_err_client(cl, "error %d\n", err); /* * When receiving a corrupted snap trace we don't know what * exactly has happened in MDS side. And we shouldn't continue * writing to OSD, which may corrupt the snapshot contents. * * Just try to blocklist this kclient and then this kclient * must be remounted to continue after the corrupted metadata * fixed in the MDS side. */ WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); if (ret) pr_err_client(cl, "failed to blocklist %s: %d\n", ceph_pr_addr(&client->msgr.inst.addr), ret); WARN(1, "[client.%lld] %s %s%sdo remount to continue%s", client->monc.auth->global_id, __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), ret ? "" : " was blocklisted, ", err == -EIO ? " after corrupted snaptrace is fixed" : ""); return err; } /* * Send any cap_snaps that are queued for flush. Try to carry * s_mutex across multiple snap flushes to avoid locking overhead. * * Caller holds no locks. */ static void flush_snaps(struct ceph_mds_client *mdsc) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; struct ceph_mds_session *session = NULL; doutc(cl, "begin\n"); spin_lock(&mdsc->snap_flush_lock); while (!list_empty(&mdsc->snap_flush_list)) { ci = list_first_entry(&mdsc->snap_flush_list, struct ceph_inode_info, i_snap_flush_item); inode = &ci->netfs.inode; ihold(inode); spin_unlock(&mdsc->snap_flush_lock); ceph_flush_snaps(ci, &session); iput(inode); spin_lock(&mdsc->snap_flush_lock); } spin_unlock(&mdsc->snap_flush_lock); ceph_put_mds_session(session); doutc(cl, "done\n"); } /** * ceph_change_snap_realm - change the snap_realm for an inode * @inode: inode to move to new snap realm * @realm: new realm to move inode into (may be NULL) * * Detach an inode from its old snaprealm (if any) and attach it to * the new snaprealm (if any). The old snap realm reference held by * the inode is put. If realm is non-NULL, then the caller's reference * to it is taken over by the inode. */ void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_snap_realm *oldrealm = ci->i_snap_realm; lockdep_assert_held(&ci->i_ceph_lock); if (oldrealm) { spin_lock(&oldrealm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); if (oldrealm->ino == ci->i_vino.ino) oldrealm->inode = NULL; spin_unlock(&oldrealm->inodes_with_caps_lock); ceph_put_snap_realm(mdsc, oldrealm); } ci->i_snap_realm = realm; if (realm) { spin_lock(&realm->inodes_with_caps_lock); list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); if (realm->ino == ci->i_vino.ino) realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); } } /* * Handle a snap notification from the MDS. * * This can take two basic forms: the simplest is just a snap creation * or deletion notification on an existing realm. This should update the * realm and its children. * * The more difficult case is realm creation, due to snap creation at a * new point in the file hierarchy, or due to a rename that moves a file or * directory into another realm. */ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_client *cl = mdsc->fsc->client; struct super_block *sb = mdsc->fsc->sb; int mds = session->s_mds; u64 split; int op; int trace_len; struct ceph_snap_realm *realm = NULL; void *p = msg->front.iov_base; void *e = p + msg->front.iov_len; struct ceph_mds_snap_head *h; int num_split_inos, num_split_realms; __le64 *split_inos = NULL, *split_realms = NULL; int i; int locked_rwsem = 0; bool close_sessions = false; if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; /* decode */ if (msg->front.iov_len < sizeof(*h)) goto bad; h = p; op = le32_to_cpu(h->op); split = le64_to_cpu(h->split); /* non-zero if we are splitting an * existing realm */ num_split_inos = le32_to_cpu(h->num_split_inos); num_split_realms = le32_to_cpu(h->num_split_realms); trace_len = le32_to_cpu(h->trace_len); p += sizeof(*h); doutc(cl, "from mds%d op %s split %llx tracelen %d\n", mds, ceph_snap_op_name(op), split, trace_len); down_write(&mdsc->snap_rwsem); locked_rwsem = 1; if (op == CEPH_SNAP_OP_SPLIT) { struct ceph_mds_snap_realm *ri; /* * A "split" breaks part of an existing realm off into * a new realm. The MDS provides a list of inodes * (with caps) and child realms that belong to the new * child. */ split_inos = p; p += sizeof(u64) * num_split_inos; split_realms = p; p += sizeof(u64) * num_split_realms; ceph_decode_need(&p, e, sizeof(*ri), bad); /* we will peek at realm info here, but will _not_ * advance p, as the realm update will occur below in * ceph_update_snap_trace. */ ri = p; realm = ceph_lookup_snap_realm(mdsc, split); if (!realm) { realm = ceph_create_snap_realm(mdsc, split); if (IS_ERR(realm)) goto out; } doutc(cl, "splitting snap_realm %llx %p\n", realm->ino, realm); for (i = 0; i < num_split_inos; i++) { struct ceph_vino vino = { .ino = le64_to_cpu(split_inos[i]), .snap = CEPH_NOSNAP, }; struct inode *inode = ceph_find_inode(sb, vino); struct ceph_inode_info *ci; if (!inode) continue; ci = ceph_inode(inode); spin_lock(&ci->i_ceph_lock); if (!ci->i_snap_realm) goto skip_inode; /* * If this inode belongs to a realm that was * created after our new realm, we experienced * a race (due to another split notifications * arriving from a different MDS). So skip * this inode. */ if (ci->i_snap_realm->created > le64_to_cpu(ri->created)) { doutc(cl, " leaving %p %llx.%llx in newer realm %llx %p\n", inode, ceph_vinop(inode), ci->i_snap_realm->ino, ci->i_snap_realm); goto skip_inode; } doutc(cl, " will move %p %llx.%llx to split realm %llx %p\n", inode, ceph_vinop(inode), realm->ino, realm); ceph_get_snap_realm(mdsc, realm); ceph_change_snap_realm(inode, realm); spin_unlock(&ci->i_ceph_lock); iput(inode); continue; skip_inode: spin_unlock(&ci->i_ceph_lock); iput(inode); } /* we may have taken some of the old realm's children. */ for (i = 0; i < num_split_realms; i++) { struct ceph_snap_realm *child = __lookup_snap_realm(mdsc, le64_to_cpu(split_realms[i])); if (!child) continue; adjust_snap_realm_parent(mdsc, child, realm->ino); } } else { /* * In the non-split case both 'num_split_inos' and * 'num_split_realms' should be 0, making this a no-op. * However the MDS happens to populate 'split_realms' list * in one of the UPDATE op cases by mistake. * * Skip both lists just in case to ensure that 'p' is * positioned at the start of realm info, as expected by * ceph_update_snap_trace(). */ p += sizeof(u64) * num_split_inos; p += sizeof(u64) * num_split_realms; } /* * update using the provided snap trace. if we are deleting a * snap, we can avoid queueing cap_snaps. */ if (ceph_update_snap_trace(mdsc, p, e, op == CEPH_SNAP_OP_DESTROY, NULL)) { close_sessions = true; goto bad; } if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ ceph_put_snap_realm(mdsc, realm); __cleanup_empty_realms(mdsc); up_write(&mdsc->snap_rwsem); flush_snaps(mdsc); ceph_dec_mds_stopping_blocker(mdsc); return; bad: pr_err_client(cl, "corrupt snap message from mds%d\n", mds); ceph_msg_dump(msg); out: if (locked_rwsem) up_write(&mdsc->snap_rwsem); ceph_dec_mds_stopping_blocker(mdsc); if (close_sessions) ceph_mdsc_close_sessions(mdsc); return; } struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm, *exist; struct rb_node **p, *parent; int ret; exist = NULL; spin_lock(&mdsc->snapid_map_lock); p = &mdsc->snapid_map_tree.rb_node; while (*p) { exist = rb_entry(*p, struct ceph_snapid_map, node); if (snap > exist->snap) { p = &(*p)->rb_left; } else if (snap < exist->snap) { p = &(*p)->rb_right; } else { if (atomic_inc_return(&exist->ref) == 1) list_del_init(&exist->lru); break; } exist = NULL; } spin_unlock(&mdsc->snapid_map_lock); if (exist) { doutc(cl, "found snapid map %llx -> %x\n", exist->snap, exist->dev); return exist; } sm = kmalloc(sizeof(*sm), GFP_NOFS); if (!sm) return NULL; ret = get_anon_bdev(&sm->dev); if (ret < 0) { kfree(sm); return NULL; } INIT_LIST_HEAD(&sm->lru); atomic_set(&sm->ref, 1); sm->snap = snap; exist = NULL; parent = NULL; p = &mdsc->snapid_map_tree.rb_node; spin_lock(&mdsc->snapid_map_lock); while (*p) { parent = *p; exist = rb_entry(*p, struct ceph_snapid_map, node); if (snap > exist->snap) p = &(*p)->rb_left; else if (snap < exist->snap) p = &(*p)->rb_right; else break; exist = NULL; } if (exist) { if (atomic_inc_return(&exist->ref) == 1) list_del_init(&exist->lru); } else { rb_link_node(&sm->node, parent, p); rb_insert_color(&sm->node, &mdsc->snapid_map_tree); } spin_unlock(&mdsc->snapid_map_lock); if (exist) { free_anon_bdev(sm->dev); kfree(sm); doutc(cl, "found snapid map %llx -> %x\n", exist->snap, exist->dev); return exist; } doutc(cl, "create snapid map %llx -> %x\n", sm->snap, sm->dev); return sm; } void ceph_put_snapid_map(struct ceph_mds_client* mdsc, struct ceph_snapid_map *sm) { if (!sm) return; if (atomic_dec_and_lock(&sm->ref, &mdsc->snapid_map_lock)) { if (!RB_EMPTY_NODE(&sm->node)) { sm->last_used = jiffies; list_add_tail(&sm->lru, &mdsc->snapid_map_lru); spin_unlock(&mdsc->snapid_map_lock); } else { /* already cleaned up by * ceph_cleanup_snapid_map() */ spin_unlock(&mdsc->snapid_map_lock); kfree(sm); } } } void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; unsigned long now; LIST_HEAD(to_free); spin_lock(&mdsc->snapid_map_lock); now = jiffies; while (!list_empty(&mdsc->snapid_map_lru)) { sm = list_first_entry(&mdsc->snapid_map_lru, struct ceph_snapid_map, lru); if (time_after(sm->last_used + CEPH_SNAPID_MAP_TIMEOUT, now)) break; rb_erase(&sm->node, &mdsc->snapid_map_tree); list_move(&sm->lru, &to_free); } spin_unlock(&mdsc->snapid_map_lock); while (!list_empty(&to_free)) { sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); list_del(&sm->lru); doutc(cl, "trim snapid map %llx -> %x\n", sm->snap, sm->dev); free_anon_bdev(sm->dev); kfree(sm); } } void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; struct rb_node *p; LIST_HEAD(to_free); spin_lock(&mdsc->snapid_map_lock); while ((p = rb_first(&mdsc->snapid_map_tree))) { sm = rb_entry(p, struct ceph_snapid_map, node); rb_erase(p, &mdsc->snapid_map_tree); RB_CLEAR_NODE(p); list_move(&sm->lru, &to_free); } spin_unlock(&mdsc->snapid_map_lock); while (!list_empty(&to_free)) { sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); list_del(&sm->lru); free_anon_bdev(sm->dev); if (WARN_ON_ONCE(atomic_read(&sm->ref))) { pr_err_client(cl, "snapid map %llx -> %x still in use\n", sm->snap, sm->dev); } kfree(sm); } }
2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 // SPDX-License-Identifier: GPL-2.0 /* * comedi/drivers/dt2801.c * Device Driver for DataTranslation DT2801 * */ /* * Driver: dt2801 * Description: Data Translation DT2801 series and DT01-EZ * Author: ds * Status: works * Devices: [Data Translation] DT2801 (dt2801), DT2801-A, DT2801/5716A, * DT2805, DT2805/5716A, DT2808, DT2818, DT2809, DT01-EZ * * This driver can autoprobe the type of board. * * Configuration options: * [0] - I/O port base address * [1] - unused * [2] - A/D reference 0=differential, 1=single-ended * [3] - A/D range * 0 = [-10, 10] * 1 = [0,10] * [4] - D/A 0 range * 0 = [-10, 10] * 1 = [-5,5] * 2 = [-2.5,2.5] * 3 = [0,10] * 4 = [0,5] * [5] - D/A 1 range (same choices) */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/delay.h> #define DT2801_TIMEOUT 1000 /* Hardware Configuration */ /* ====================== */ #define DT2801_MAX_DMA_SIZE (64 * 1024) /* define's */ /* ====================== */ /* Commands */ #define DT_C_RESET 0x0 #define DT_C_CLEAR_ERR 0x1 #define DT_C_READ_ERRREG 0x2 #define DT_C_SET_CLOCK 0x3 #define DT_C_TEST 0xb #define DT_C_STOP 0xf #define DT_C_SET_DIGIN 0x4 #define DT_C_SET_DIGOUT 0x5 #define DT_C_READ_DIG 0x6 #define DT_C_WRITE_DIG 0x7 #define DT_C_WRITE_DAIM 0x8 #define DT_C_SET_DA 0x9 #define DT_C_WRITE_DA 0xa #define DT_C_READ_ADIM 0xc #define DT_C_SET_AD 0xd #define DT_C_READ_AD 0xe /* * Command modifiers (only used with read/write), EXTTRIG can be * used with some other commands. */ #define DT_MOD_DMA BIT(4) #define DT_MOD_CONT BIT(5) #define DT_MOD_EXTCLK BIT(6) #define DT_MOD_EXTTRIG BIT(7) /* Bits in status register */ #define DT_S_DATA_OUT_READY BIT(0) #define DT_S_DATA_IN_FULL BIT(1) #define DT_S_READY BIT(2) #define DT_S_COMMAND BIT(3) #define DT_S_COMPOSITE_ERROR BIT(7) /* registers */ #define DT2801_DATA 0 #define DT2801_STATUS 1 #define DT2801_CMD 1 #if 0 /* ignore 'defined but not used' warning */ static const struct comedi_lrange range_dt2801_ai_pgh_bipolar = { 4, { BIP_RANGE(10), BIP_RANGE(5), BIP_RANGE(2.5), BIP_RANGE(1.25) } }; #endif static const struct comedi_lrange range_dt2801_ai_pgl_bipolar = { 4, { BIP_RANGE(10), BIP_RANGE(1), BIP_RANGE(0.1), BIP_RANGE(0.02) } }; #if 0 /* ignore 'defined but not used' warning */ static const struct comedi_lrange range_dt2801_ai_pgh_unipolar = { 4, { UNI_RANGE(10), UNI_RANGE(5), UNI_RANGE(2.5), UNI_RANGE(1.25) } }; #endif static const struct comedi_lrange range_dt2801_ai_pgl_unipolar = { 4, { UNI_RANGE(10), UNI_RANGE(1), UNI_RANGE(0.1), UNI_RANGE(0.02) } }; struct dt2801_board { const char *name; int boardcode; int ad_diff; int ad_chan; int adbits; int adrangetype; int dabits; }; /* * Typeid's for the different boards of the DT2801-series * (taken from the test-software, that comes with the board) */ static const struct dt2801_board boardtypes[] = { { .name = "dt2801", .boardcode = 0x09, .ad_diff = 2, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2801-a", .boardcode = 0x52, .ad_diff = 2, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2801/5716a", .boardcode = 0x82, .ad_diff = 1, .ad_chan = 16, .adbits = 16, .adrangetype = 1, .dabits = 12}, { .name = "dt2805", .boardcode = 0x12, .ad_diff = 1, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2805/5716a", .boardcode = 0x92, .ad_diff = 1, .ad_chan = 16, .adbits = 16, .adrangetype = 1, .dabits = 12}, { .name = "dt2808", .boardcode = 0x20, .ad_diff = 0, .ad_chan = 16, .adbits = 12, .adrangetype = 2, .dabits = 8}, { .name = "dt2818", .boardcode = 0xa2, .ad_diff = 0, .ad_chan = 4, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2809", .boardcode = 0xb0, .ad_diff = 0, .ad_chan = 8, .adbits = 12, .adrangetype = 1, .dabits = 12}, }; struct dt2801_private { const struct comedi_lrange *dac_range_types[2]; }; /* * These are the low-level routines: * writecommand: write a command to the board * writedata: write data byte * readdata: read data byte */ /* * Only checks DataOutReady-flag, not the Ready-flag as it is done * in the examples of the manual. I don't see why this should be * necessary. */ static int dt2801_readdata(struct comedi_device *dev, int *data) { int stat = 0; int timeout = DT2801_TIMEOUT; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & (DT_S_COMPOSITE_ERROR | DT_S_READY)) return stat; if (stat & DT_S_DATA_OUT_READY) { *data = inb_p(dev->iobase + DT2801_DATA); return 0; } } while (--timeout > 0); return -ETIME; } static int dt2801_readdata2(struct comedi_device *dev, int *data) { int lb = 0; int hb = 0; int ret; ret = dt2801_readdata(dev, &lb); if (ret) return ret; ret = dt2801_readdata(dev, &hb); if (ret) return ret; *data = (hb << 8) + lb; return 0; } static int dt2801_writedata(struct comedi_device *dev, unsigned int data) { int stat = 0; int timeout = DT2801_TIMEOUT; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) return stat; if (!(stat & DT_S_DATA_IN_FULL)) { outb_p(data & 0xff, dev->iobase + DT2801_DATA); return 0; } } while (--timeout > 0); return -ETIME; } static int dt2801_writedata2(struct comedi_device *dev, unsigned int data) { int ret; ret = dt2801_writedata(dev, data & 0xff); if (ret < 0) return ret; ret = dt2801_writedata(dev, data >> 8); if (ret < 0) return ret; return 0; } static int dt2801_wait_for_ready(struct comedi_device *dev) { int timeout = DT2801_TIMEOUT; int stat; stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) return 0; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) return stat; if (stat & DT_S_READY) return 0; } while (--timeout > 0); return -ETIME; } static void dt2801_writecmd(struct comedi_device *dev, int command) { int stat; dt2801_wait_for_ready(dev); stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) { dev_dbg(dev->class_dev, "composite-error in %s, ignoring\n", __func__); } if (!(stat & DT_S_READY)) dev_dbg(dev->class_dev, "!ready in %s, ignoring\n", __func__); outb_p(command, dev->iobase + DT2801_CMD); } static int dt2801_reset(struct comedi_device *dev) { int board_code = 0; unsigned int stat; int timeout; /* pull random data from data port */ inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); /* dt2801_writecmd(dev,DT_C_STOP); */ outb_p(DT_C_STOP, dev->iobase + DT2801_CMD); /* dt2801_wait_for_ready(dev); */ usleep_range(100, 200); timeout = 10000; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) break; } while (timeout--); if (!timeout) dev_dbg(dev->class_dev, "timeout 1 status=0x%02x\n", stat); /* dt2801_readdata(dev,&board_code); */ outb_p(DT_C_RESET, dev->iobase + DT2801_CMD); /* dt2801_writecmd(dev,DT_C_RESET); */ usleep_range(100, 200); timeout = 10000; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) break; } while (timeout--); if (!timeout) dev_dbg(dev->class_dev, "timeout 2 status=0x%02x\n", stat); dt2801_readdata(dev, &board_code); return board_code; } static int probe_number_of_ai_chans(struct comedi_device *dev) { int n_chans; int stat; int data; for (n_chans = 0; n_chans < 16; n_chans++) { dt2801_writecmd(dev, DT_C_READ_ADIM); dt2801_writedata(dev, 0); dt2801_writedata(dev, n_chans); stat = dt2801_readdata2(dev, &data); if (stat) break; } dt2801_reset(dev); dt2801_reset(dev); return n_chans; } static const struct comedi_lrange *dac_range_table[] = { &range_bipolar10, &range_bipolar5, &range_bipolar2_5, &range_unipolar10, &range_unipolar5 }; static const struct comedi_lrange *dac_range_lkup(int opt) { if (opt < 0 || opt >= 5) return &range_unknown; return dac_range_table[opt]; } static const struct comedi_lrange *ai_range_lkup(int type, int opt) { switch (type) { case 0: return (opt) ? &range_dt2801_ai_pgl_unipolar : &range_dt2801_ai_pgl_bipolar; case 1: return (opt) ? &range_unipolar10 : &range_bipolar10; case 2: return &range_unipolar5; } return &range_unknown; } static int dt2801_error(struct comedi_device *dev, int stat) { if (stat < 0) { if (stat == -ETIME) dev_dbg(dev->class_dev, "timeout\n"); else dev_dbg(dev->class_dev, "error %d\n", stat); return stat; } dev_dbg(dev->class_dev, "error status 0x%02x, resetting...\n", stat); dt2801_reset(dev); dt2801_reset(dev); return -EIO; } static int dt2801_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int d; int stat; int i; for (i = 0; i < insn->n; i++) { dt2801_writecmd(dev, DT_C_READ_ADIM); dt2801_writedata(dev, CR_RANGE(insn->chanspec)); dt2801_writedata(dev, CR_CHAN(insn->chanspec)); stat = dt2801_readdata2(dev, &d); if (stat != 0) return dt2801_error(dev, stat); data[i] = d; } return i; } static int dt2801_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); dt2801_writecmd(dev, DT_C_WRITE_DAIM); dt2801_writedata(dev, chan); dt2801_writedata2(dev, data[0]); s->readback[chan] = data[0]; return 1; } static int dt2801_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int which = (s == &dev->subdevices[3]) ? 1 : 0; unsigned int val = 0; if (comedi_dio_update_state(s, data)) { dt2801_writecmd(dev, DT_C_WRITE_DIG); dt2801_writedata(dev, which); dt2801_writedata(dev, s->state); } dt2801_writecmd(dev, DT_C_READ_DIG); dt2801_writedata(dev, which); dt2801_readdata(dev, &val); data[1] = val; return insn->n; } static int dt2801_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0xff); if (ret) return ret; dt2801_writecmd(dev, s->io_bits ? DT_C_SET_DIGOUT : DT_C_SET_DIGIN); dt2801_writedata(dev, (s == &dev->subdevices[3]) ? 1 : 0); return insn->n; } /* * options: * [0] - i/o base * [1] - unused * [2] - a/d 0=differential, 1=single-ended * [3] - a/d range 0=[-10,10], 1=[0,10] * [4] - dac0 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5] * [5] - dac1 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5] */ static int dt2801_attach(struct comedi_device *dev, struct comedi_devconfig *it) { const struct dt2801_board *board; struct dt2801_private *devpriv; struct comedi_subdevice *s; int board_code, type; int ret = 0; int n_ai_chans; ret = comedi_request_region(dev, it->options[0], 0x2); if (ret) return ret; /* do some checking */ board_code = dt2801_reset(dev); /* heh. if it didn't work, try it again. */ if (!board_code) board_code = dt2801_reset(dev); for (type = 0; type < ARRAY_SIZE(boardtypes); type++) { if (boardtypes[type].boardcode == board_code) goto havetype; } dev_dbg(dev->class_dev, "unrecognized board code=0x%02x, contact author\n", board_code); type = 0; havetype: dev->board_ptr = boardtypes + type; board = dev->board_ptr; n_ai_chans = probe_number_of_ai_chans(dev); ret = comedi_alloc_subdevices(dev, 4); if (ret) goto out; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; dev->board_name = board->name; s = &dev->subdevices[0]; /* ai subdevice */ s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND; #if 1 s->n_chan = n_ai_chans; #else if (it->options[2]) s->n_chan = board->ad_chan; else s->n_chan = board->ad_chan / 2; #endif s->maxdata = (1 << board->adbits) - 1; s->range_table = ai_range_lkup(board->adrangetype, it->options[3]); s->insn_read = dt2801_ai_insn_read; s = &dev->subdevices[1]; /* ao subdevice */ s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = (1 << board->dabits) - 1; s->range_table_list = devpriv->dac_range_types; devpriv->dac_range_types[0] = dac_range_lkup(it->options[4]); devpriv->dac_range_types[1] = dac_range_lkup(it->options[5]); s->insn_write = dt2801_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; s = &dev->subdevices[2]; /* 1st digital subdevice */ s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = dt2801_dio_insn_bits; s->insn_config = dt2801_dio_insn_config; s = &dev->subdevices[3]; /* 2nd digital subdevice */ s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = dt2801_dio_insn_bits; s->insn_config = dt2801_dio_insn_config; ret = 0; out: return ret; } static struct comedi_driver dt2801_driver = { .driver_name = "dt2801", .module = THIS_MODULE, .attach = dt2801_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(dt2801_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi low-level driver"); MODULE_LICENSE("GPL");
10 34 34 34 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 // SPDX-License-Identifier: GPL-2.0-only #include <linux/if_bridge.h> #include <linux/in.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rhashtable.h> #include <linux/rhashtable-types.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/netlink.h> #include <net/vxlan.h> #include "vxlan_private.h" struct vxlan_mdb_entry_key { union vxlan_addr src; union vxlan_addr dst; __be32 vni; }; struct vxlan_mdb_entry { struct rhash_head rhnode; struct list_head remotes; struct vxlan_mdb_entry_key key; struct hlist_node mdb_node; struct rcu_head rcu; }; #define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0) struct vxlan_mdb_remote { struct list_head list; struct vxlan_rdst __rcu *rd; u8 flags; u8 filter_mode; u8 rt_protocol; struct hlist_head src_list; struct rcu_head rcu; }; #define VXLAN_SGRP_F_DELETE BIT(0) struct vxlan_mdb_src_entry { struct hlist_node node; union vxlan_addr addr; u8 flags; }; struct vxlan_mdb_dump_ctx { long reserved; long entry_idx; long remote_idx; }; struct vxlan_mdb_config_src_entry { union vxlan_addr addr; struct list_head node; }; struct vxlan_mdb_config { struct vxlan_dev *vxlan; struct vxlan_mdb_entry_key group; struct list_head src_list; union vxlan_addr remote_ip; u32 remote_ifindex; __be32 remote_vni; __be16 remote_port; u16 nlflags; u8 flags; u8 filter_mode; u8 rt_protocol; }; struct vxlan_mdb_flush_desc { union vxlan_addr remote_ip; __be32 src_vni; __be32 remote_vni; __be16 remote_port; u8 rt_protocol; }; static const struct rhashtable_params vxlan_mdb_rht_params = { .head_offset = offsetof(struct vxlan_mdb_entry, rhnode), .key_offset = offsetof(struct vxlan_mdb_entry, key), .key_len = sizeof(struct vxlan_mdb_entry_key), .automatic_shrinking = true, }; static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack); static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack); static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, struct br_mdb_entry *e) { const union vxlan_addr *dst = &mdb_entry->key.dst; memset(e, 0, sizeof(*e)); e->ifindex = vxlan->dev->ifindex; e->state = MDB_PERMANENT; if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED) e->flags |= MDB_FLAGS_BLOCKED; switch (dst->sa.sa_family) { case AF_INET: e->addr.u.ip4 = dst->sin.sin_addr.s_addr; e->addr.proto = htons(ETH_P_IP); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: e->addr.u.ip6 = dst->sin6.sin6_addr; e->addr.proto = htons(ETH_P_IPV6); break; #endif } } static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb, const struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; struct nlattr *nest; if (hlist_empty(&remote->src_list)) return 0; nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); if (!nest) return -EMSGSIZE; hlist_for_each_entry(ent, &remote->src_list, node) { struct nlattr *nest_ent; nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); if (!nest_ent) goto out_cancel_err; if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, &ent->addr) || nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0)) goto out_cancel_err; nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest); return 0; out_cancel_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); struct br_mdb_entry e; struct nlattr *nest; nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); if (!nest) return -EMSGSIZE; vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e); if (nla_put_nohdr(skb, sizeof(e), &e) || nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0)) goto nest_err; if (!vxlan_addr_any(&mdb_entry->key.src) && vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src)) goto nest_err; if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) || nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) || vxlan_mdb_entry_info_fill_srcs(skb, remote) || vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip)) goto nest_err; if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port && nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT, be16_to_cpu(rd->remote_port))) goto nest_err; if (rd->remote_vni != vxlan->default_dst.remote_vni && nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni))) goto nest_err; if (rd->remote_ifindex && nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex)) goto nest_err; if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI, be32_to_cpu(mdb_entry->key.vni))) goto nest_err; nla_nest_end(skb, nest); return 0; nest_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, struct vxlan_mdb_dump_ctx *ctx, const struct vxlan_mdb_entry *mdb_entry) { int remote_idx = 0, s_remote_idx = ctx->remote_idx; struct vxlan_mdb_remote *remote; struct nlattr *nest; int err = 0; nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!nest) return -EMSGSIZE; list_for_each_entry(remote, &mdb_entry->remotes, list) { if (remote_idx < s_remote_idx) goto skip; err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); if (err) break; skip: remote_idx++; } ctx->remote_idx = err ? remote_idx : 0; nla_nest_end(skb, nest); return err; } static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, struct vxlan_mdb_dump_ctx *ctx) { int entry_idx = 0, s_entry_idx = ctx->entry_idx; struct vxlan_mdb_entry *mdb_entry; struct nlattr *nest; int err = 0; nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!nest) return -EMSGSIZE; hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) { if (entry_idx < s_entry_idx) goto skip; err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry); if (err) break; skip: entry_idx++; } ctx->entry_idx = err ? entry_idx : 0; nla_nest_end(skb, nest); return err; } int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; ASSERT_RTNL(); NL_ASSERT_CTX_FITS(struct vxlan_mdb_dump_ctx); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; err = vxlan_mdb_fill(vxlan, skb, ctx); nlmsg_end(skb, nlh); cb->seq = vxlan->mdb_seq; nl_dump_check_consistent(cb, nlh); return err; } static const struct nla_policy vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = { [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), }; static const struct nla_policy vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol), }; static const struct netlink_range_validation vni_range = { .max = VXLAN_N_VID - 1, }; static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE, MCAST_INCLUDE), [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol), [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), }; static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { switch (proto) { case htons(ETH_P_IP): if (nla_len(attr) != sizeof(struct in_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length"); return false; } if (ipv4_is_multicast(nla_get_in_addr(attr))) { NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed"); return false; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct in6_addr src; if (nla_len(attr) != sizeof(struct in6_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length"); return false; } src = nla_get_in6_addr(attr); if (ipv6_addr_is_multicast(&src)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed"); return false; } break; } #endif default: NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address"); return false; } return true; } static void vxlan_mdb_group_set(struct vxlan_mdb_entry_key *group, const struct br_mdb_entry *entry, const struct nlattr *source_attr) { switch (entry->addr.proto) { case htons(ETH_P_IP): group->dst.sa.sa_family = AF_INET; group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): group->dst.sa.sa_family = AF_INET6; group->dst.sin6.sin6_addr = entry->addr.u.ip6; break; #endif } if (source_attr) vxlan_nla_get_addr(&group->src, source_attr); } static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group) { return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src); } static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group) { return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src); } static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg, __be16 proto, const struct nlattr *src_entry, struct netlink_ext_ack *extack) { struct nlattr *tb[MDBE_SRCATTR_MAX + 1]; struct vxlan_mdb_config_src_entry *src; int err; err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry, vxlan_mdbe_src_list_entry_pol, extack); if (err) return err; if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS)) return -EINVAL; if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto, extack)) return -EINVAL; src = kzalloc(sizeof(*src), GFP_KERNEL); if (!src) return -ENOMEM; err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]); if (err) goto err_free_src; list_add_tail(&src->node, &cfg->src_list); return 0; err_free_src: kfree(src); return err; } static void vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src) { list_del(&src->node); kfree(src); } static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg, __be16 proto, const struct nlattr *src_list, struct netlink_ext_ack *extack) { struct vxlan_mdb_config_src_entry *src, *tmp; struct nlattr *src_entry; int rem, err; nla_for_each_nested(src_entry, src_list, rem) { err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry, extack); if (err) goto err_src_entry_init; } return 0; err_src_entry_init: list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) vxlan_mdb_config_src_entry_fini(src); return err; } static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg) { struct vxlan_mdb_config_src_entry *src, *tmp; list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) vxlan_mdb_config_src_entry_fini(src); } static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg, const struct br_mdb_entry *entry, const struct nlattr *set_attrs, struct netlink_ext_ack *extack) { struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; int err; err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs, vxlan_mdbe_attrs_pol, extack); if (err) return err; if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) { NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address"); return -EINVAL; } if (mdbe_attrs[MDBE_ATTR_SOURCE] && !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], entry->addr.proto, extack)) return -EINVAL; vxlan_mdb_group_set(&cfg->group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); /* rtnetlink code only validates that IPv4 group address is * multicast. */ if (!vxlan_addr_is_multicast(&cfg->group.dst) && !vxlan_addr_any(&cfg->group.dst)) { NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast"); return -EINVAL; } if (vxlan_addr_any(&cfg->group.dst) && mdbe_attrs[MDBE_ATTR_SOURCE]) { NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry"); return -EINVAL; } if (vxlan_mdb_is_sg(&cfg->group)) cfg->filter_mode = MCAST_INCLUDE; if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { if (!vxlan_mdb_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries"); return -EINVAL; } cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]); } if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) { if (!vxlan_mdb_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries"); return -EINVAL; } if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode"); return -EINVAL; } err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto, mdbe_attrs[MDBE_ATTR_SRC_LIST], extack); if (err) return err; } if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) && cfg->filter_mode == MCAST_INCLUDE) { NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list"); return -EINVAL; } if (mdbe_attrs[MDBE_ATTR_RTPROT]) cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); if (err) { NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address"); goto err_src_list_fini; } if (mdbe_attrs[MDBE_ATTR_DST_PORT]) cfg->remote_port = cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); if (mdbe_attrs[MDBE_ATTR_VNI]) cfg->remote_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); if (mdbe_attrs[MDBE_ATTR_IFINDEX]) { cfg->remote_ifindex = nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]); if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) { NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found"); err = -EINVAL; goto err_src_list_fini; } } if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) cfg->group.vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; err_src_list_fini: vxlan_mdb_config_src_list_fini(cfg); return err; } static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg, struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); struct vxlan_dev *vxlan = netdev_priv(dev); memset(cfg, 0, sizeof(*cfg)); cfg->vxlan = vxlan; cfg->group.vni = vxlan->default_dst.remote_vni; INIT_LIST_HEAD(&cfg->src_list); cfg->nlflags = nlmsg_flags; cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; cfg->remote_vni = vxlan->default_dst.remote_vni; cfg->remote_port = vxlan->cfg.dst_port; if (entry->ifindex != dev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device"); return -EINVAL; } /* State is not part of the entry key and can be ignored on deletion * requests. */ if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) && entry->state != MDB_PERMANENT) { NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent"); return -EINVAL; } if (entry->flags) { NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags"); return -EINVAL; } if (entry->vid) { NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); return -EINVAL; } if (entry->addr.proto != htons(ETH_P_IP) && entry->addr.proto != htons(ETH_P_IPV6)) { NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address"); return -EINVAL; } if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) { NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute"); return -EINVAL; } return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS], extack); } static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg) { vxlan_mdb_config_src_list_fini(cfg); } static struct vxlan_mdb_entry * vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group) { return rhashtable_lookup_fast(&vxlan->mdb_tbl, group, vxlan_mdb_rht_params); } static struct vxlan_mdb_remote * vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry, const union vxlan_addr *addr) { struct vxlan_mdb_remote *remote; list_for_each_entry(remote, &mdb_entry->remotes, list) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); if (vxlan_addr_equal(addr, &rd->remote_ip)) return remote; } return NULL; } static void vxlan_mdb_rdst_free(struct rcu_head *head) { struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); dst_cache_destroy(&rd->dst_cache); kfree(rd); } static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote) { struct vxlan_rdst *rd; int err; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) return -ENOMEM; err = dst_cache_init(&rd->dst_cache, GFP_KERNEL); if (err) goto err_free_rdst; rd->remote_ip = cfg->remote_ip; rd->remote_port = cfg->remote_port; rd->remote_vni = cfg->remote_vni; rd->remote_ifindex = cfg->remote_ifindex; rcu_assign_pointer(remote->rd, rd); return 0; err_free_rdst: kfree(rd); return err; } static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd) { call_rcu(&rd->rcu, vxlan_mdb_rdst_free); } static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote) { int err; err = vxlan_mdb_remote_rdst_init(cfg, remote); if (err) return err; remote->flags = cfg->flags; remote->filter_mode = cfg->filter_mode; remote->rt_protocol = cfg->rt_protocol; INIT_HLIST_HEAD(&remote->src_list); return 0; } static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan, struct vxlan_mdb_remote *remote) { WARN_ON_ONCE(!hlist_empty(&remote->src_list)); vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd)); } static struct vxlan_mdb_src_entry * vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_mdb_src_entry *ent; hlist_for_each_entry(ent, &remote->src_list, node) { if (vxlan_addr_equal(&ent->addr, addr)) return ent; } return NULL; } static struct vxlan_mdb_src_entry * vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_mdb_src_entry *ent; ent = kzalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return NULL; ent->addr = *addr; hlist_add_head(&ent->node, &remote->src_list); return ent; } static void vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent) { hlist_del(&ent->node); kfree(ent); } static int vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg, const union vxlan_addr *addr, struct netlink_ext_ack *extack) { struct vxlan_mdb_config sg_cfg; memset(&sg_cfg, 0, sizeof(sg_cfg)); sg_cfg.vxlan = cfg->vxlan; sg_cfg.group.src = *addr; sg_cfg.group.dst = cfg->group.dst; sg_cfg.group.vni = cfg->group.vni; INIT_LIST_HEAD(&sg_cfg.src_list); sg_cfg.remote_ip = cfg->remote_ip; sg_cfg.remote_ifindex = cfg->remote_ifindex; sg_cfg.remote_vni = cfg->remote_vni; sg_cfg.remote_port = cfg->remote_port; sg_cfg.nlflags = cfg->nlflags; sg_cfg.filter_mode = MCAST_INCLUDE; if (cfg->filter_mode == MCAST_EXCLUDE) sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED; sg_cfg.rt_protocol = cfg->rt_protocol; return __vxlan_mdb_add(&sg_cfg, extack); } static void vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote, const union vxlan_addr *addr) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); struct vxlan_mdb_config sg_cfg; memset(&sg_cfg, 0, sizeof(sg_cfg)); sg_cfg.vxlan = vxlan; sg_cfg.group.src = *addr; sg_cfg.group.dst = group->dst; sg_cfg.group.vni = group->vni; INIT_LIST_HEAD(&sg_cfg.src_list); sg_cfg.remote_ip = rd->remote_ip; __vxlan_mdb_del(&sg_cfg, NULL); } static int vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote, const struct vxlan_mdb_config_src_entry *src, struct netlink_ext_ack *extack) { struct vxlan_mdb_src_entry *ent; int err; ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr); if (!ent) { ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr); if (!ent) return -ENOMEM; } else if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Source entry already exists"); return -EEXIST; } err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack); if (err) goto err_src_del; /* Clear flags in case source entry was marked for deletion as part of * replace flow. */ ent->flags = 0; return 0; err_src_del: vxlan_mdb_remote_src_entry_del(ent); return err; } static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote, struct vxlan_mdb_src_entry *ent) { vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr); vxlan_mdb_remote_src_entry_del(ent); } static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_mdb_config_src_entry *src; struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; int err; list_for_each_entry(src, &cfg->src_list, node) { err = vxlan_mdb_remote_src_add(cfg, remote, src, extack); if (err) goto err_src_del; } return 0; err_src_del: hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent); return err; } static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group, struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) vxlan_mdb_remote_src_del(vxlan, group, remote, ent); } static size_t vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group, const struct vxlan_mdb_remote *remote) { struct vxlan_mdb_src_entry *ent; size_t nlmsg_size; if (hlist_empty(&remote->src_list)) return 0; /* MDBA_MDB_EATTR_SRC_LIST */ nlmsg_size = nla_total_size(0); hlist_for_each_entry(ent, &remote->src_list, node) { /* MDBA_MDB_SRCLIST_ENTRY */ nlmsg_size += nla_total_size(0) + /* MDBA_MDB_SRCATTR_ADDRESS */ nla_total_size(vxlan_addr_size(&group->dst)) + /* MDBA_MDB_SRCATTR_TIMER */ nla_total_size(sizeof(u8)); } return nlmsg_size; } static size_t vxlan_mdb_nlmsg_remote_size(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { const struct vxlan_mdb_entry_key *group = &mdb_entry->key; struct vxlan_rdst *rd = rtnl_dereference(remote->rd); size_t nlmsg_size; /* MDBA_MDB_ENTRY_INFO */ nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + /* MDBA_MDB_EATTR_TIMER */ nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_SOURCE */ if (vxlan_mdb_is_sg(group)) nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst)); /* MDBA_MDB_EATTR_RTPROT */ nlmsg_size += nla_total_size(sizeof(u8)); /* MDBA_MDB_EATTR_SRC_LIST */ nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote); /* MDBA_MDB_EATTR_GROUP_MODE */ nlmsg_size += nla_total_size(sizeof(u8)); /* MDBA_MDB_EATTR_DST */ nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip)); /* MDBA_MDB_EATTR_DST_PORT */ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port) nlmsg_size += nla_total_size(sizeof(u16)); /* MDBA_MDB_EATTR_VNI */ if (rd->remote_vni != vxlan->default_dst.remote_vni) nlmsg_size += nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_IFINDEX */ if (rd->remote_ifindex) nlmsg_size += nla_total_size(sizeof(u32)); /* MDBA_MDB_EATTR_SRC_VNI */ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni) nlmsg_size += nla_total_size(sizeof(u32)); return nlmsg_size; } static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0) + /* Remote entry */ vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote); } static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, int type) { struct nlattr *mdb_nest, *mdb_entry_nest; struct br_port_msg *bpm; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = vxlan->dev->ifindex; mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!mdb_nest) goto cancel; mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!mdb_entry_nest) goto cancel; if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote)) goto cancel; nla_nest_end(skb, mdb_entry_nest); nla_nest_end(skb, mdb_nest); nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_remote *remote, int type) { struct net *net = dev_net(vxlan->dev); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote), GFP_KERNEL); if (!skb) goto errout; err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type); if (err) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } static int vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg, const struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_src_entry *ent; struct hlist_node *tmp; int err; hlist_for_each_entry(ent, &remote->src_list, node) ent->flags |= VXLAN_SGRP_F_DELETE; err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); if (err) goto err_clear_delete; hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) { if (ent->flags & VXLAN_SGRP_F_DELETE) vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote, ent); } return 0; err_clear_delete: hlist_for_each_entry(ent, &remote->src_list, node) ent->flags &= ~VXLAN_SGRP_F_DELETE; return err; } static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg, const struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote, struct netlink_ext_ack *extack) { struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd); struct vxlan_dev *vxlan = cfg->vxlan; int err; err = vxlan_mdb_remote_rdst_init(cfg, remote); if (err) return err; new_rd = rtnl_dereference(remote->rd); err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack); if (err) goto err_rdst_reset; WRITE_ONCE(remote->flags, cfg->flags); WRITE_ONCE(remote->filter_mode, cfg->filter_mode); remote->rt_protocol = cfg->rt_protocol; vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB); vxlan_mdb_remote_rdst_fini(old_rd); return 0; err_rdst_reset: rcu_assign_pointer(remote->rd, old_rd); vxlan_mdb_remote_rdst_fini(new_rd); return err; } static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg, struct vxlan_mdb_entry *mdb_entry, struct netlink_ext_ack *extack) { struct vxlan_mdb_remote *remote; int err; remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); if (remote) { if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists"); return -EEXIST; } return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack); } if (!(cfg->nlflags & NLM_F_CREATE)) { NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist"); return -ENOENT; } remote = kzalloc(sizeof(*remote), GFP_KERNEL); if (!remote) return -ENOMEM; err = vxlan_mdb_remote_init(cfg, remote); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry"); goto err_free_remote; } err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); if (err) goto err_remote_fini; list_add_rcu(&remote->list, &mdb_entry->remotes); vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB); return 0; err_remote_fini: vxlan_mdb_remote_fini(cfg->vxlan, remote); err_free_remote: kfree(remote); return err; } static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry, struct vxlan_mdb_remote *remote) { vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB); list_del_rcu(&remote->list); vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote); vxlan_mdb_remote_fini(vxlan, remote); kfree_rcu(remote, rcu); } static struct vxlan_mdb_entry * vxlan_mdb_entry_get(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry_key *group) { struct vxlan_mdb_entry *mdb_entry; int err; mdb_entry = vxlan_mdb_entry_lookup(vxlan, group); if (mdb_entry) return mdb_entry; mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL); if (!mdb_entry) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&mdb_entry->remotes); memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key)); hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list); err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, vxlan_mdb_rht_params); if (err) goto err_free_entry; if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) vxlan->cfg.flags |= VXLAN_F_MDB; return mdb_entry; err_free_entry: hlist_del(&mdb_entry->mdb_node); kfree(mdb_entry); return ERR_PTR(err); } static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry) { if (!list_empty(&mdb_entry->remotes)) return; if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) vxlan->cfg.flags &= ~VXLAN_F_MDB; rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, vxlan_mdb_rht_params); hlist_del(&mdb_entry->mdb_node); kfree_rcu(mdb_entry, rcu); } static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_entry *mdb_entry; int err; mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group); if (IS_ERR(mdb_entry)) return PTR_ERR(mdb_entry); err = vxlan_mdb_remote_add(cfg, mdb_entry, extack); if (err) goto err_entry_put; vxlan->mdb_seq++; return 0; err_entry_put: vxlan_mdb_entry_put(vxlan, mdb_entry); return err; } static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = cfg->vxlan; struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_remote *remote; mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group); if (!mdb_entry) { NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry"); return -ENOENT; } remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); if (!remote) { NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry"); return -ENOENT; } vxlan_mdb_remote_del(vxlan, mdb_entry, remote); vxlan_mdb_entry_put(vxlan, mdb_entry); vxlan->mdb_seq++; return 0; } int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct vxlan_mdb_config cfg; int err; ASSERT_RTNL(); err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; err = __vxlan_mdb_add(&cfg, extack); vxlan_mdb_config_fini(&cfg); return err; } int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_mdb_config cfg; int err; ASSERT_RTNL(); err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; err = __vxlan_mdb_del(&cfg, extack); vxlan_mdb_config_fini(&cfg); return err; } static const struct nla_policy vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), }; static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan, struct vxlan_mdb_flush_desc *desc, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; int err; if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Invalid port net device"); return -EINVAL; } if (entry->vid) { NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); return -EINVAL; } if (!tb[MDBA_SET_ENTRY_ATTRS]) return 0; err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, tb[MDBA_SET_ENTRY_ATTRS], vxlan_mdbe_attrs_del_bulk_pol, extack); if (err) return err; if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) { u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) { NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported"); return -EINVAL; } } if (mdbe_attrs[MDBE_ATTR_RTPROT]) desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); if (mdbe_attrs[MDBE_ATTR_DST]) vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); if (mdbe_attrs[MDBE_ATTR_DST_PORT]) desc->remote_port = cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); if (mdbe_attrs[MDBE_ATTR_VNI]) desc->remote_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) desc->src_vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; } static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan, struct vxlan_mdb_entry *mdb_entry, const struct vxlan_mdb_flush_desc *desc) { struct vxlan_mdb_remote *remote, *tmp; list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) { struct vxlan_rdst *rd = rtnl_dereference(remote->rd); __be32 remote_vni; if (desc->remote_ip.sa.sa_family && !vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip)) continue; /* Encapsulation is performed with source VNI if remote VNI * is not set. */ remote_vni = rd->remote_vni ? : mdb_entry->key.vni; if (desc->remote_vni && desc->remote_vni != remote_vni) continue; if (desc->remote_port && desc->remote_port != rd->remote_port) continue; if (desc->rt_protocol && desc->rt_protocol != remote->rt_protocol) continue; vxlan_mdb_remote_del(vxlan, mdb_entry, remote); } } static void vxlan_mdb_flush(struct vxlan_dev *vxlan, const struct vxlan_mdb_flush_desc *desc) { struct vxlan_mdb_entry *mdb_entry; struct hlist_node *tmp; /* The removal of an entry cannot trigger the removal of another entry * since entries are always added to the head of the list. */ hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) { if (desc->src_vni && desc->src_vni != mdb_entry->key.vni) continue; vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc); /* Entry will only be removed if its remotes list is empty. */ vxlan_mdb_entry_put(vxlan, mdb_entry); } } int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_mdb_flush_desc desc = {}; int err; ASSERT_RTNL(); err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack); if (err) return err; vxlan_mdb_flush(vxlan, &desc); return 0; } static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), }; static int vxlan_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], struct vxlan_mdb_entry_key *group, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; struct vxlan_dev *vxlan = netdev_priv(dev); int err; memset(group, 0, sizeof(*group)); group->vni = vxlan->default_dst.remote_vni; if (!tb[MDBA_GET_ENTRY_ATTRS]) { vxlan_mdb_group_set(group, entry, NULL); return 0; } err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, tb[MDBA_GET_ENTRY_ATTRS], vxlan_mdbe_attrs_get_pol, extack); if (err) return err; if (mdbe_attrs[MDBE_ATTR_SOURCE] && !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], entry->addr.proto, extack)) return -EINVAL; vxlan_mdb_group_set(group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) group->vni = cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); return 0; } static struct sk_buff * vxlan_mdb_get_reply_alloc(const struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry) { struct vxlan_mdb_remote *remote; size_t nlmsg_size; nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0); list_for_each_entry(remote, &mdb_entry->remotes, list) nlmsg_size += vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote); return nlmsg_new(nlmsg_size, GFP_KERNEL); } static int vxlan_mdb_get_reply_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, u32 portid, u32 seq) { struct nlattr *mdb_nest, *mdb_entry_nest; struct vxlan_mdb_remote *remote; struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = vxlan->dev->ifindex; mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!mdb_nest) { err = -EMSGSIZE; goto cancel; } mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!mdb_entry_nest) { err = -EMSGSIZE; goto cancel; } list_for_each_entry(remote, &mdb_entry->remotes, list) { err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); if (err) goto cancel; } nla_nest_end(skb, mdb_entry_nest); nla_nest_end(skb, mdb_nest); nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return err; } int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_entry_key group; struct sk_buff *skb; int err; ASSERT_RTNL(); err = vxlan_mdb_get_parse(dev, tb, &group, extack); if (err) return err; mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (!mdb_entry) { NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); return -ENOENT; } skb = vxlan_mdb_get_reply_alloc(vxlan, mdb_entry); if (!skb) return -ENOMEM; err = vxlan_mdb_get_reply_fill(vxlan, skb, mdb_entry, portid, seq); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); goto free; } return rtnl_unicast(skb, dev_net(dev), portid); free: kfree_skb(skb); return err; } struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, struct sk_buff *skb, __be32 src_vni) { struct vxlan_mdb_entry *mdb_entry; struct vxlan_mdb_entry_key group; if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) || is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) return NULL; /* When not in collect metadata mode, 'src_vni' is zero, but MDB * entries are stored with the VNI of the VXLAN device. */ if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) src_vni = vxlan->default_dst.remote_vni; memset(&group, 0, sizeof(group)); group.vni = src_vni; switch (skb->protocol) { case htons(ETH_P_IP): if (!pskb_may_pull(skb, sizeof(struct iphdr))) return NULL; group.dst.sa.sa_family = AF_INET; group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr; group.src.sa.sa_family = AF_INET; group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return NULL; group.dst.sa.sa_family = AF_INET6; group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr; group.src.sa.sa_family = AF_INET6; group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr; break; #endif default: return NULL; } mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (mdb_entry) return mdb_entry; memset(&group.src, 0, sizeof(group.src)); mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); if (mdb_entry) return mdb_entry; /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if * the destination IP address is not link-local multicast since we want * to transmit such traffic together with broadcast and unknown unicast * traffic. */ switch (skb->protocol) { case htons(ETH_P_IP): if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr)) return NULL; group.dst.sin.sin_addr.s_addr = 0; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (ipv6_addr_type(&group.dst.sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) return NULL; memset(&group.dst.sin6.sin6_addr, 0, sizeof(group.dst.sin6.sin6_addr)); break; #endif default: return NULL; } return vxlan_mdb_entry_lookup(vxlan, &group); } netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan, const struct vxlan_mdb_entry *mdb_entry, struct sk_buff *skb) { struct vxlan_mdb_remote *remote, *fremote = NULL; __be32 src_vni = mdb_entry->key.vni; list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) { struct sk_buff *skb1; if ((vxlan_mdb_is_star_g(&mdb_entry->key) && READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) || (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED)) continue; if (!fremote) { fremote = remote; continue; } skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) vxlan_xmit_one(skb1, vxlan->dev, src_vni, rcu_dereference(remote->rd), false); } if (fremote) vxlan_xmit_one(skb, vxlan->dev, src_vni, rcu_dereference(fremote->rd), false); else kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); return NETDEV_TX_OK; } static void vxlan_mdb_check_empty(void *ptr, void *arg) { WARN_ON_ONCE(1); } int vxlan_mdb_init(struct vxlan_dev *vxlan) { int err; err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params); if (err) return err; INIT_HLIST_HEAD(&vxlan->mdb_list); return 0; } void vxlan_mdb_fini(struct vxlan_dev *vxlan) { struct vxlan_mdb_flush_desc desc = {}; vxlan_mdb_flush(vxlan, &desc); WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB); rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty, NULL); }
6 50 50 1 50 1 50 49 49 14 14 4 51 44 45 44 43 45 45 3 4 4 4 2 1 3 1 2 1 1 1 4 1 1 1 4 1 2 2 2 1 2 1 2 1 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/timer.h> #include <linux/module.h> #include <linux/udp.h> #include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ip6_checksum.h> #include <net/checksum.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> static const unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, [UDP_CT_REPLIED] = 120*HZ, }; static unsigned int *udp_get_timeouts(struct net *net) { return nf_udp_pernet(net)->timeouts; } static void udp_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg); } static bool udp_error(struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (!hdr) { udp_error_log(skb, state, "short packet"); return true; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { udp_error_log(skb, state, "truncated/malformed packet"); return true; } /* Packet with no checksum */ if (!hdr->check) return false; /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) { udp_error_log(skb, state, "bad checksum"); return true; } return false; } /* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeouts; unsigned long status; if (udp_error(skb, dataoff, state)) return -NF_ACCEPT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); status = READ_ONCE(ct->status); if ((status & IPS_CONFIRMED) == 0) ct->proto.udp.stream_ts = 2 * HZ + jiffies; /* If we've seen traffic both ways, this is some kind of UDP * stream. Set Assured. */ if (status & IPS_SEEN_REPLY) { unsigned long extra = timeouts[UDP_CT_UNREPLIED]; bool stream = false; /* Still active after two seconds? Extend timeout. */ if (time_after(jiffies, ct->proto.udp.stream_ts)) { extra = timeouts[UDP_CT_REPLIED]; stream = (status & IPS_ASSURED) == 0; } nf_ct_refresh_acct(ct, ctinfo, skb, extra); /* never set ASSURED for IPS_NAT_CLASH, they time out soon */ if (unlikely((status & IPS_NAT_CLASH))) return NF_ACCEPT; /* Also, more likely to be important, and not a probe */ if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } #ifdef CONFIG_NF_CT_PROTO_UDPLITE static void udplite_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg); } static bool udplite_error(struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; unsigned int cscov; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (!hdr) { udplite_error_log(skb, state, "short packet"); return true; } cscov = ntohs(hdr->len); if (cscov == 0) { cscov = udplen; } else if (cscov < sizeof(*hdr) || cscov > udplen) { udplite_error_log(skb, state, "invalid checksum coverage"); return true; } /* UDPLITE mandates checksums */ if (!hdr->check) { udplite_error_log(skb, state, "checksum missing"); return true; } /* Checksum invalid? Ignore. */ if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP, state->pf)) { udplite_error_log(skb, state, "bad checksum"); return true; } return false; } /* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udplite_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeouts; if (udplite_error(skb, dataoff, state)) return -NF_ACCEPT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_REPLIED]); if (unlikely((ct->status & IPS_NAT_CLASH))) return NF_ACCEPT; /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; struct nf_udp_net *un = nf_udp_pernet(net); if (!timeouts) timeouts = un->timeouts; /* set default timeouts for UDP. */ timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { timeouts[UDP_CT_UNREPLIED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; } if (tb[CTA_TIMEOUT_UDP_REPLIED]) { timeouts[UDP_CT_REPLIED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; } return 0; } static int udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED, htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED, htonl(timeouts[UDP_CT_REPLIED] / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_udp_init_net(struct net *net) { struct nf_udp_net *un = nf_udp_pernet(net); int i; for (i = 0; i < UDP_CT_MAX; i++) un->timeouts[i] = udp_timeouts[i]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) un->offload_timeout = 30 * HZ; #endif } const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = { .l4proto = IPPROTO_UDP, .allow_clash = true, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_UDP_MAX, .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; #ifdef CONFIG_NF_CT_PROTO_UDPLITE const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .allow_clash = true, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_UDP_MAX, .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; #endif
2 3 2 6 7 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 // SPDX-License-Identifier: GPL-2.0 #ifndef IOU_FILE_TABLE_H #define IOU_FILE_TABLE_H #include <linux/file.h> #include <linux/io_uring_types.h> #include "rsrc.h" bool io_alloc_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table, unsigned nr_files); void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table); int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, struct file *file, unsigned int file_slot); int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file, unsigned int file_slot); int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset); int io_register_file_alloc_range(struct io_ring_ctx *ctx, struct io_uring_file_index_range __user *arg); io_req_flags_t io_file_get_flags(struct file *file); static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) { WARN_ON_ONCE(!test_bit(bit, table->bitmap)); __clear_bit(bit, table->bitmap); table->alloc_hint = bit; } static inline void io_file_bitmap_set(struct io_file_table *table, int bit) { WARN_ON_ONCE(test_bit(bit, table->bitmap)); __set_bit(bit, table->bitmap); table->alloc_hint = bit + 1; } #define FFS_NOWAIT 0x1UL #define FFS_ISREG 0x2UL #define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG) static inline unsigned int io_slot_flags(struct io_rsrc_node *node) { return (node->file_ptr & ~FFS_MASK) << REQ_F_SUPPORT_NOWAIT_BIT; } static inline struct file *io_slot_file(struct io_rsrc_node *node) { return (struct file *)(node->file_ptr & FFS_MASK); } static inline void io_fixed_file_set(struct io_rsrc_node *node, struct file *file) { node->file_ptr = (unsigned long)file | (io_file_get_flags(file) >> REQ_F_SUPPORT_NOWAIT_BIT); } static inline void io_file_table_set_alloc_range(struct io_ring_ctx *ctx, unsigned off, unsigned len) { ctx->file_alloc_start = off; ctx->file_alloc_end = off + len; ctx->file_table.alloc_hint = ctx->file_alloc_start; } #endif
1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _ASM_X86_GSSEG_H #define _ASM_X86_GSSEG_H #include <linux/types.h> #include <asm/asm.h> #include <asm/cpufeature.h> #include <asm/alternative.h> #include <asm/processor.h> #include <asm/nops.h> #ifdef CONFIG_X86_64 extern asmlinkage void asm_load_gs_index(u16 selector); /* Replace with "lkgs %di" once binutils support LKGS instruction */ #define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7) static inline void native_lkgs(unsigned int selector) { u16 sel = selector; asm_inline volatile("1: " LKGS_DI _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel]) : [sel] "+D" (sel)); } static inline void native_load_gs_index(unsigned int selector) { if (cpu_feature_enabled(X86_FEATURE_LKGS)) { native_lkgs(selector); } else { unsigned long flags; local_irq_save(flags); asm_load_gs_index(selector); local_irq_restore(flags); } } #endif /* CONFIG_X86_64 */ static inline void __init lkgs_init(void) { #ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_X86_64 if (cpu_feature_enabled(X86_FEATURE_LKGS)) pv_ops.cpu.load_gs_index = native_lkgs; #endif #endif } #ifndef CONFIG_PARAVIRT_XXL static inline void load_gs_index(unsigned int selector) { #ifdef CONFIG_X86_64 native_load_gs_index(selector); #else loadsegment(gs, selector); #endif } #endif /* CONFIG_PARAVIRT_XXL */ #endif /* _ASM_X86_GSSEG_H */
5 49 68 66 6 3 3 3 1 3 3 1 3 1 1 1 70 65 3 3 51 50 49 1 1 50 2 2 2 6 19 3 19 38 37 60 63 8 5 5 5 8 62 1 1 1 63 71 67 71 5 1 4 2 70 56 68 43 70 2 1 2 2 2 1 2 71 8 8 6 64 40 39 64 51 9 63 50 6 1 6 2 1 6 5 2 5 5 6 62 63 7 7 63 1 1 1 1 71 62 3 3 3 69 64 13 13 49 50 44 6 71 71 52 52 70 71 67 3 3 67 51 51 2 2 1 51 23 5 5 5 23 28 27 2 2 27 70 23 18 19 19 1 19 4 3 3 3 3 4 4 2 4 1 4 3 1 1 1 2 2 1 6 6 2 1 1 3 1 3 1 1 21 19 2 17 17 17 17 2 1 22 22 21 2 21 2 20 20 6 3 14 17 1 16 1 17 17 17 11 17 1 17 1 17 2 17 2 17 1 17 1 17 1 17 1 17 4 17 17 16 17 20 21 1 22 21 21 17 4 17 1 17 13 13 13 10 2 2 4 4 13 13 13 13 13 13 13 13 13 13 13 13 1 1 1 1 1 12 13 1 1 1 4 4 5 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 // SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_netem.c Network emulator * * Many of the algorithms and ideas for this came from * NIST Net which is not copyrighted. * * Authors: Stephen Hemminger <shemminger@osdl.org> * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> */ #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/prandom.h> #include <linux/rtnetlink.h> #include <linux/reciprocal_div.h> #include <linux/rbtree.h> #include <net/gso.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> #define VERSION "1.3" /* Network Emulation Queuing algorithm. ==================================== Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based Network Emulation Tool [2] Luigi Rizzo, DummyNet for FreeBSD ---------------------------------------------------------------- This started out as a simple way to delay outgoing packets to test TCP but has grown to include most of the functionality of a full blown network emulator like NISTnet. It can delay packets and add random jitter (and correlation). The random distribution can be loaded from a table as well to provide normal, Pareto, or experimental curves. Packet loss, duplication, and reordering can also be emulated. This qdisc does not do classification that can be handled in layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. Correlated Loss Generator models Added generation of correlated loss according to the "Gilbert-Elliot" model, a 4-state markov model. References: [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general and intuitive loss model for packet networks and its implementation in the Netem module in the Linux kernel", available in [1] Authors: Stefano Salsano <stefano.salsano at uniroma2.it Fabio Ludovici <fabio.ludovici at yahoo.it> */ struct disttable { u32 size; s16 table[] __counted_by(size); }; struct netem_sched_data { /* internal t(ime)fifo qdisc uses t_root and sch->limit */ struct rb_root t_root; /* a linear queue; reduces rbtree rebalancing when jitter is low */ struct sk_buff *t_head; struct sk_buff *t_tail; u32 t_len; /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; struct qdisc_watchdog watchdog; s64 latency; s64 jitter; u32 loss; u32 ecn; u32 limit; u32 counter; u32 gap; u32 duplicate; u32 reorder; u32 corrupt; u64 rate; s32 packet_overhead; u32 cell_size; struct reciprocal_value cell_size_reciprocal; s32 cell_overhead; struct crndstate { u32 last; u32 rho; } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; struct prng { u64 seed; struct rnd_state prng_state; } prng; struct disttable *delay_dist; enum { CLG_RANDOM, CLG_4_STATES, CLG_GILB_ELL, } loss_model; enum { TX_IN_GAP_PERIOD = 1, TX_IN_BURST_PERIOD, LOST_IN_GAP_PERIOD, LOST_IN_BURST_PERIOD, } _4_state_model; enum { GOOD_STATE = 1, BAD_STATE, } GE_state_model; /* Correlated Loss Generation models */ struct clgstate { /* state of the Markov chain */ u8 state; /* 4-states and Gilbert-Elliot models */ u32 a1; /* p13 for 4-states or p for GE */ u32 a2; /* p31 for 4-states or r for GE */ u32 a3; /* p32 for 4-states or h for GE */ u32 a4; /* p14 for 4-states or 1-k for GE */ u32 a5; /* p23 used only in 4-states */ } clg; struct tc_netem_slot slot_config; struct slotstate { u64 slot_next; s32 packets_left; s32 bytes_left; } slot; struct disttable *slot_dist; }; /* Time stamp put into socket buffer control block * Only valid when skbs are in our internal t(ime)fifo queue. * * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp, * and skb->next & skb->prev are scratch space for a qdisc, * we save skb->tstamp value in skb->cb[] before destroying it. */ struct netem_skb_cb { u64 time_to_send; }; static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { /* we assume we can use skb next/prev/tstamp as storage for rb_node */ qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } /* init_crandom - initialize correlated random number generator * Use entropy source for initial seed. */ static void init_crandom(struct crndstate *state, unsigned long rho) { state->rho = rho; state->last = get_random_u32(); } /* get_crandom - correlated random number generator * Next number depends on last value. * rho is scaled to avoid floating point. */ static u32 get_crandom(struct crndstate *state, struct prng *p) { u64 value, rho; unsigned long answer; struct rnd_state *s = &p->prng_state; if (!state || state->rho == 0) /* no correlation */ return prandom_u32_state(s); value = prandom_u32_state(s); rho = (u64)state->rho + 1; answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; state->last = answer; return answer; } /* loss_4state - 4-state model loss generator * Generates losses according to the 4-state Markov chain adopted in * the GI (General and Intuitive) loss model. */ static bool loss_4state(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; u32 rnd = prandom_u32_state(&q->prng.prng_state); /* * Makes a comparison between rnd and the transition * probabilities outgoing from the current state, then decides the * next state and if the next packet has to be transmitted or lost. * The four states correspond to: * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period * LOST_IN_GAP_PERIOD => isolated losses within a gap period * LOST_IN_BURST_PERIOD => lost packets within a burst period * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period */ switch (clg->state) { case TX_IN_GAP_PERIOD: if (rnd < clg->a4) { clg->state = LOST_IN_GAP_PERIOD; return true; } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = LOST_IN_BURST_PERIOD; return true; } else if (clg->a1 + clg->a4 < rnd) { clg->state = TX_IN_GAP_PERIOD; } break; case TX_IN_BURST_PERIOD: if (rnd < clg->a5) { clg->state = LOST_IN_BURST_PERIOD; return true; } else { clg->state = TX_IN_BURST_PERIOD; } break; case LOST_IN_BURST_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; } else if (clg->a2 + clg->a3 < rnd) { clg->state = LOST_IN_BURST_PERIOD; return true; } break; case LOST_IN_GAP_PERIOD: clg->state = TX_IN_GAP_PERIOD; break; } return false; } /* loss_gilb_ell - Gilbert-Elliot model loss generator * Generates losses according to the Gilbert-Elliot loss model or * its special cases (Gilbert or Simple Gilbert) * * Makes a comparison between random number and the transition * probabilities outgoing from the current state, then decides the * next state. A second random number is extracted and the comparison * with the loss probability of the current state decides if the next * packet will be transmitted or lost. */ static bool loss_gilb_ell(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; struct rnd_state *s = &q->prng.prng_state; switch (clg->state) { case GOOD_STATE: if (prandom_u32_state(s) < clg->a1) clg->state = BAD_STATE; if (prandom_u32_state(s) < clg->a4) return true; break; case BAD_STATE: if (prandom_u32_state(s) < clg->a2) clg->state = GOOD_STATE; if (prandom_u32_state(s) > clg->a3) return true; } return false; } static bool loss_event(struct netem_sched_data *q) { switch (q->loss_model) { case CLG_RANDOM: /* Random packet drop 0 => none, ~0 => all */ return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng); case CLG_4_STATES: /* 4state loss model algorithm (used also for GI model) * Extracts a value from the markov 4 state loss generator, * if it is 1 drops a packet and if needed writes the event in * the kernel logs */ return loss_4state(q); case CLG_GILB_ELL: /* Gilbert-Elliot loss model algorithm * Extracts a value from the Gilbert-Elliot loss generator, * if it is 1 drops a packet and if needed writes the event in * the kernel logs */ return loss_gilb_ell(q); } return false; /* not reached */ } /* tabledist - return a pseudo-randomly distributed value with mean mu and * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ static s64 tabledist(s64 mu, s32 sigma, struct crndstate *state, struct prng *prng, const struct disttable *dist) { s64 x; long t; u32 rnd; if (sigma == 0) return mu; rnd = get_crandom(state, prng); /* default uniform distribution */ if (dist == NULL) return ((rnd % (2 * (u32)sigma)) + mu) - sigma; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; if (x >= 0) x += NETEM_DIST_SCALE/2; else x -= NETEM_DIST_SCALE/2; return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) { len += q->packet_overhead; if (q->cell_size) { u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); if (len > cells * q->cell_size) /* extra cell needed for remainder */ cells++; len = cells * (q->cell_size + q->cell_overhead); } return div64_u64(len * NSEC_PER_SEC, q->rate); } static void tfifo_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct rb_node *p = rb_first(&q->t_root); while (p) { struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } rtnl_kfree_skbs(q->t_head, q->t_tail); q->t_head = NULL; q->t_tail = NULL; q->t_len = 0; } static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); u64 tnext = netem_skb_cb(nskb)->time_to_send; if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { if (q->t_tail) q->t_tail->next = nskb; else q->t_head = nskb; q->t_tail = nskb; } else { struct rb_node **p = &q->t_root.rb_node, *parent = NULL; while (*p) { struct sk_buff *skb; parent = *p; skb = rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&nskb->rbnode, parent, p); rb_insert_color(&nskb->rbnode, &q->t_root); } q->t_len++; sch->q.qlen++; } /* netem can't properly corrupt a megapacket (like we get from GSO), so instead * when we statistically choose to corrupt one, we instead segment it, returning * the first packet to be corrupted, and re-enqueue the remaining frames */ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct sk_buff *segs; netdev_features_t features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { qdisc_drop(skb, sch, to_free); return NULL; } consume_skb(skb); return segs; } /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2 = NULL; struct sk_buff *segs = NULL; unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) ++count; /* Drop packet? */ if (loss_event(q)) { if (q->ecn && INET_ECN_set_ce(skb)) qdisc_qstats_drop(sch); /* mark packet */ else --count; } if (count == 0) { qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } /* If a delay is expected, orphan the skb. (orphaning usually takes * place at TX completion time, so _before_ the link transit delay) */ if (q->latency || q->jitter || q->rate) skb_orphan_partial(skb); /* * If we need to duplicate packet, then clone it before * original is modified. */ if (count > 1) skb2 = skb_clone(skb, GFP_ATOMIC); /* * Randomized packet corruption. * Make copy if needed since we are modifying * If packet is going to be hardware checksummed, then * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) { if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) goto finish_segs; segs = skb->next; skb_mark_not_on_list(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; } skb = skb_unshare(skb, GFP_ATOMIC); if (unlikely(!skb)) { qdisc_qstats_drop(sch); goto finish_segs; } if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) { qdisc_drop(skb, sch, to_free); skb = NULL; goto finish_segs; } skb->data[get_random_u32_below(skb_headlen(skb))] ^= 1<<get_random_u32_below(8); } if (unlikely(q->t_len >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); if (skb2) __qdisc_drop(skb2, to_free); return NET_XMIT_DROP; } /* * If doing duplication then re-insert at top of the * qdisc tree, since parent queuer expects that only one * skb will be queued. */ if (skb2) { struct Qdisc *rootq = qdisc_root_bh(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; skb2 = NULL; } qdisc_qstats_backlog_inc(sch, skb); cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor, &q->prng)) { u64 now; s64 delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, &q->prng, q->delay_dist); now = ktime_get_ns(); if (q->rate) { struct netem_skb_cb *last = NULL; if (sch->q.tail) last = netem_skb_cb(sch->q.tail); if (q->t_root.rb_node) { struct sk_buff *t_skb; struct netem_skb_cb *t_last; t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || t_last->time_to_send > last->time_to_send) last = t_last; } if (q->t_tail) { struct netem_skb_cb *t_last = netem_skb_cb(q->t_tail); if (!last || t_last->time_to_send > last->time_to_send) last = t_last; } if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ delay -= last->time_to_send - now; delay = max_t(s64, 0, delay); now = last->time_to_send; } delay += packet_time_ns(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; ++q->counter; tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front * of the queue. */ cb->time_to_send = ktime_get_ns(); q->counter = 0; __qdisc_enqueue_head(skb, &sch->q); sch->qstats.requeues++; } finish_segs: if (skb2) __qdisc_drop(skb2, to_free); if (segs) { unsigned int len, last_len; int rc, nb; len = skb ? skb->len : 0; nb = skb ? 1 : 0; while (segs) { skb2 = segs->next; skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; last_len = segs->len; rc = qdisc_enqueue(segs, sch, to_free); if (rc != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(rc)) qdisc_qstats_drop(sch); } else { nb++; len += last_len; } segs = skb2; } /* Parent qdiscs accounted for 1 skb of size @prev_len */ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); } else if (!skb) { return NET_XMIT_DROP; } return NET_XMIT_SUCCESS; } /* Delay the next round with a new future slot with a * correct number of bytes and packets. */ static void get_slot_next(struct netem_sched_data *q, u64 now) { s64 next_delay; if (!q->slot_dist) next_delay = q->slot_config.min_delay + (get_random_u32() * (q->slot_config.max_delay - q->slot_config.min_delay) >> 32); else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), NULL, &q->prng, q->slot_dist); q->slot.slot_next = now + next_delay; q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; } static struct sk_buff *netem_peek(struct netem_sched_data *q) { struct sk_buff *skb = skb_rb_first(&q->t_root); u64 t1, t2; if (!skb) return q->t_head; if (!q->t_head) return skb; t1 = netem_skb_cb(skb)->time_to_send; t2 = netem_skb_cb(q->t_head)->time_to_send; if (t1 < t2) return skb; return q->t_head; } static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) { if (skb == q->t_head) { q->t_head = skb->next; if (!q->t_head) q->t_tail = NULL; } else { rb_erase(&skb->rbnode, &q->t_root); } } static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; tfifo_dequeue: skb = __qdisc_dequeue_head(&sch->q); if (skb) { deliver: qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); return skb; } skb = netem_peek(q); if (skb) { u64 time_to_send; u64 now = ktime_get_ns(); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; if (q->slot.slot_next && q->slot.slot_next < time_to_send) get_slot_next(q, now); if (time_to_send <= now && q->slot.slot_next <= now) { netem_erase_head(q, skb); q->t_len--; skb->next = NULL; skb->prev = NULL; /* skb->dev shares skb->rbnode area, * we need to restore its value. */ skb->dev = qdisc_dev(sch); if (q->slot.slot_next) { q->slot.packets_left--; q->slot.bytes_left -= qdisc_pkt_len(skb); if (q->slot.packets_left <= 0 || q->slot.bytes_left <= 0) get_slot_next(q, now); } if (q->qdisc) { unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; int err; err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); sch->qstats.backlog -= pkt_len; sch->q.qlen--; qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } sch->q.qlen--; goto deliver; } if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); if (skb) { sch->q.qlen--; goto deliver; } } qdisc_watchdog_schedule_ns(&q->watchdog, max(time_to_send, q->slot.slot_next)); } if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); if (skb) { sch->q.qlen--; goto deliver; } } return NULL; } static void netem_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset_queue(sch); tfifo_reset(sch); if (q->qdisc) qdisc_reset(q->qdisc); qdisc_watchdog_cancel(&q->watchdog); } static void dist_free(struct disttable *d) { kvfree(d); } /* * Distribution data is a variable size payload containing * signed 16 bit values. */ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) { size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); struct disttable *d; int i; if (!n || n > NETEM_DIST_MAX) return -EINVAL; d = kvmalloc(struct_size(d, table, n), GFP_KERNEL); if (!d) return -ENOMEM; d->size = n; for (i = 0; i < n; i++) d->table[i] = data[i]; *tbl = d; return 0; } static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_slot *c = nla_data(attr); q->slot_config = *c; if (q->slot_config.max_packets == 0) q->slot_config.max_packets = INT_MAX; if (q->slot_config.max_bytes == 0) q->slot_config.max_bytes = INT_MAX; /* capping dist_jitter to the range acceptable by tabledist() */ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; if (q->slot_config.min_delay | q->slot_config.max_delay | q->slot_config.dist_jitter) q->slot.slot_next = ktime_get_ns(); else q->slot.slot_next = 0; } static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corr *c = nla_data(attr); init_crandom(&q->delay_cor, c->delay_corr); init_crandom(&q->loss_cor, c->loss_corr); init_crandom(&q->dup_cor, c->dup_corr); } static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_reorder *r = nla_data(attr); q->reorder = r->probability; init_crandom(&q->reorder_cor, r->correlation); } static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corrupt *r = nla_data(attr); q->corrupt = r->probability; init_crandom(&q->corrupt_cor, r->correlation); } static void get_rate(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_rate *r = nla_data(attr); q->rate = r->rate; q->packet_overhead = r->packet_overhead; q->cell_size = r->cell_size; q->cell_overhead = r->cell_overhead; if (q->cell_size) q->cell_size_reciprocal = reciprocal_value(q->cell_size); else q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; } static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr) { const struct nlattr *la; int rem; nla_for_each_nested(la, attr, rem) { u16 type = nla_type(la); switch (type) { case NETEM_LOSS_GI: { const struct tc_netem_gimodel *gi = nla_data(la); if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { pr_info("netem: incorrect gi model size\n"); return -EINVAL; } q->loss_model = CLG_4_STATES; q->clg.state = TX_IN_GAP_PERIOD; q->clg.a1 = gi->p13; q->clg.a2 = gi->p31; q->clg.a3 = gi->p32; q->clg.a4 = gi->p14; q->clg.a5 = gi->p23; break; } case NETEM_LOSS_GE: { const struct tc_netem_gemodel *ge = nla_data(la); if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { pr_info("netem: incorrect ge model size\n"); return -EINVAL; } q->loss_model = CLG_GILB_ELL; q->clg.state = GOOD_STATE; q->clg.a1 = ge->p; q->clg.a2 = ge->r; q->clg.a3 = ge->h; q->clg.a4 = ge->k1; break; } default: pr_info("netem: unknown loss type %u\n", type); return -EINVAL; } } return 0; } static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, [TCA_NETEM_RATE64] = { .type = NLA_U64 }, [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, const struct nla_policy *policy, int len) { int nested_len = nla_len(nla) - NLA_ALIGN(len); if (nested_len < 0) { pr_info("netem: invalid attributes len %d\n", nested_len); return -EINVAL; } if (nested_len >= nla_attr_size(0)) return nla_parse_deprecated(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; } static const struct Qdisc_class_ops netem_class_ops; static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, struct netlink_ext_ack *extack) { struct Qdisc *root, *q; unsigned int i; root = qdisc_root_sleeping(sch); if (sch != root && root->ops->cl_ops == &netem_class_ops) { if (duplicates || ((struct netem_sched_data *)qdisc_priv(root))->duplicate) goto err; } if (!qdisc_dev(root)) return 0; hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { if (sch != q && q->ops->cl_ops == &netem_class_ops) { if (duplicates || ((struct netem_sched_data *)qdisc_priv(q))->duplicate) goto err; } } return 0; err: NL_SET_ERR_MSG(extack, "netem: cannot mix duplicating netems with other netems in tree"); return -EINVAL; } /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; struct disttable *delay_dist = NULL; struct disttable *slot_dist = NULL; struct tc_netem_qopt *qopt; struct clgstate old_clg; int old_loss_model = CLG_RANDOM; int ret; qopt = nla_data(opt); ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) return ret; if (tb[TCA_NETEM_DELAY_DIST]) { ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); if (ret) goto table_free; } if (tb[TCA_NETEM_SLOT_DIST]) { ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); if (ret) goto table_free; } sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; if (tb[TCA_NETEM_LOSS]) { ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; q->clg = old_clg; goto unlock; } } else { q->loss_model = CLG_RANDOM; } if (delay_dist) swap(q->delay_dist, delay_dist); if (slot_dist) swap(q->slot_dist, slot_dist); sch->limit = qopt->limit; q->latency = PSCHED_TICKS2NS(qopt->latency); q->jitter = PSCHED_TICKS2NS(qopt->jitter); q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; ret = check_netem_in_tree(sch, qopt->duplicate, extack); if (ret) goto unlock; q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. * if gap is set, need to assume 100% probability */ if (q->gap) q->reorder = ~0; if (tb[TCA_NETEM_CORR]) get_correlation(q, tb[TCA_NETEM_CORR]); if (tb[TCA_NETEM_REORDER]) get_reorder(q, tb[TCA_NETEM_REORDER]); if (tb[TCA_NETEM_CORRUPT]) get_corrupt(q, tb[TCA_NETEM_CORRUPT]); if (tb[TCA_NETEM_RATE]) get_rate(q, tb[TCA_NETEM_RATE]); if (tb[TCA_NETEM_RATE64]) q->rate = max_t(u64, q->rate, nla_get_u64(tb[TCA_NETEM_RATE64])); if (tb[TCA_NETEM_LATENCY64]) q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); if (tb[TCA_NETEM_JITTER64]) q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); if (tb[TCA_NETEM_SLOT]) get_slot(q, tb[TCA_NETEM_SLOT]); /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); if (tb[TCA_NETEM_PRNG_SEED]) q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); else q->prng.seed = get_random_u64(); prandom_seed_state(&q->prng.prng_state, q->prng.seed); unlock: sch_tree_unlock(sch); table_free: dist_free(delay_dist); dist_free(slot_dist); return ret; } static int netem_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); int ret; qdisc_watchdog_init(&q->watchdog, sch); if (!opt) return -EINVAL; q->loss_model = CLG_RANDOM; ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); return ret; } static void netem_destroy(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); if (q->qdisc) qdisc_put(q->qdisc); dist_free(q->delay_dist); dist_free(q->slot_dist); } static int dump_loss_model(const struct netem_sched_data *q, struct sk_buff *skb) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); if (nest == NULL) goto nla_put_failure; switch (q->loss_model) { case CLG_RANDOM: /* legacy loss model */ nla_nest_cancel(skb, nest); return 0; /* no data */ case CLG_4_STATES: { struct tc_netem_gimodel gi = { .p13 = q->clg.a1, .p31 = q->clg.a2, .p32 = q->clg.a3, .p14 = q->clg.a4, .p23 = q->clg.a5, }; if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi)) goto nla_put_failure; break; } case CLG_GILB_ELL: { struct tc_netem_gemodel ge = { .p = q->clg.a1, .r = q->clg.a2, .h = q->clg.a3, .k1 = q->clg.a4, }; if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge)) goto nla_put_failure; break; } } nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); struct tc_netem_qopt qopt; struct tc_netem_corr cor; struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; struct tc_netem_slot slot; qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; qopt.duplicate = q->duplicate; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) goto nla_put_failure; if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) goto nla_put_failure; cor.delay_corr = q->delay_cor.rho; cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor)) goto nla_put_failure; reorder.probability = q->reorder; reorder.correlation = q->reorder_cor.rho; if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder)) goto nla_put_failure; corrupt.probability = q->corrupt; corrupt.correlation = q->corrupt_cor.rho; if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) goto nla_put_failure; if (q->rate >= (1ULL << 32)) { if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate, TCA_NETEM_PAD)) goto nla_put_failure; rate.rate = ~0U; } else { rate.rate = q->rate; } rate.packet_overhead = q->packet_overhead; rate.cell_size = q->cell_size; rate.cell_overhead = q->cell_overhead; if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate)) goto nla_put_failure; if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn)) goto nla_put_failure; if (dump_loss_model(q, skb) != 0) goto nla_put_failure; if (q->slot_config.min_delay | q->slot_config.max_delay | q->slot_config.dist_jitter) { slot = q->slot_config; if (slot.max_packets == INT_MAX) slot.max_packets = 0; if (slot.max_bytes == INT_MAX) slot.max_bytes = 0; if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) goto nla_put_failure; } if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed, TCA_NETEM_PAD)) goto nla_put_failure; return nla_nest_end(skb, nla); nla_put_failure: nlmsg_trim(skb, nla); return -1; } static int netem_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct netem_sched_data *q = qdisc_priv(sch); if (cl != 1 || !q->qdisc) /* only one class */ return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); *old = qdisc_replace(sch, new, &q->qdisc); return 0; } static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) { struct netem_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long netem_find(struct Qdisc *sch, u32 classid) { return 1; } static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { if (!tc_qdisc_stats_dump(sch, 1, walker)) return; } } static const struct Qdisc_class_ops netem_class_ops = { .graft = netem_graft, .leaf = netem_leaf, .find = netem_find, .walk = netem_walk, .dump = netem_dump_class, }; static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, .peek = qdisc_peek_dequeued, .init = netem_init, .reset = netem_reset, .destroy = netem_destroy, .change = netem_change, .dump = netem_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("netem"); static int __init netem_module_init(void) { pr_info("netem: version " VERSION "\n"); return register_qdisc(&netem_qdisc_ops); } static void __exit netem_module_exit(void) { unregister_qdisc(&netem_qdisc_ops); } module_init(netem_module_init) module_exit(netem_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Network characteristics emulator qdisc");
13 8 7 1 12 11 1 10 3 3 3 1 2 1 6 2 2 2 1 7 6 6 6 1 6 1 6 6 6 1 13 1 4 4 4 4 4 4 4 1 1 4 1 4 4 4 4 4 2 1 10 10 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 // SPDX-License-Identifier: GPL-2.0+ /* net/sched/act_ctinfo.c netfilter ctinfo connmark actions * * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ctinfo.h> #include <net/tc_act/tc_ctinfo.h> #include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_ctinfo_ops; static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, struct sk_buff *skb, int wlen, int proto) { u8 dscp, newdscp; newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { case NFPROTO_IPV4: dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK; if (dscp != newdscp) { if (likely(!skb_try_make_writable(skb, wlen))) { ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, newdscp); atomic64_inc(&ca->stats_dscp_set); } else { atomic64_inc(&ca->stats_dscp_error); } } break; case NFPROTO_IPV6: dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK; if (dscp != newdscp) { if (likely(!skb_try_make_writable(skb, wlen))) { ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, newdscp); atomic64_inc(&ca->stats_dscp_set); } else { atomic64_inc(&ca->stats_dscp_error); } } break; default: break; } } static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, struct sk_buff *skb) { atomic64_inc(&ca->stats_cpmark_set); skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash = NULL; struct tcf_ctinfo *ca = to_ctinfo(a); struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; enum ip_conntrack_info ctinfo; struct tcf_ctinfo_params *cp; struct nf_conn *ct; int proto, wlen; cp = rcu_dereference_bh(ca->params); tcf_lastuse_update(&ca->tcf_tm); tcf_action_update_bstats(&ca->common, skb); wlen = skb_network_offset(skb); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; break; case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; break; default: goto out; } ct = nf_ct_get(skb, &ctinfo); if (!ct) { /* look harder, usually ingress */ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, cp->net, &tuple)) goto out; zone.id = cp->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(cp->net, &zone, &tuple); if (!thash) goto out; ct = nf_ct_tuplehash_to_ctrack(thash); } if (cp->mode & CTINFO_MODE_DSCP) if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) tcf_ctinfo_cpmark_set(ct, ca, cp, skb); if (thash) nf_ct_put(ct); out: return cp->action; } static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { [TCA_CTINFO_ACT] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ctinfo)), [TCA_CTINFO_ZONE] = { .type = NLA_U16 }, [TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 }, [TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 }, [TCA_CTINFO_PARMS_CPMARK_MASK] = { .type = NLA_U32 }, }; static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; struct tcf_chain *goto_ch = NULL; struct tc_ctinfo *actparm; struct tcf_ctinfo *ci; u8 dscpmaskshift; int ret = 0, err; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "ctinfo requires attributes to be passed"); return -EINVAL; } err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, extack); if (err < 0) return err; if (!tb[TCA_CTINFO_ACT]) { NL_SET_ERR_MSG_MOD(extack, "Missing required TCA_CTINFO_ACT attribute"); return -EINVAL; } actparm = nla_data(tb[TCA_CTINFO_ACT]); /* do some basic validation here before dynamically allocating things */ /* that we would otherwise have to clean up. */ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]); /* need contiguous 6 bit mask */ dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0; if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CTINFO_PARMS_DSCP_MASK], "dscp mask must be 6 contiguous bits"); return -EINVAL; } dscpstatemask = nla_get_u32_default(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK], 0); /* mask & statemask must not overlap */ if (dscpmask & dscpstatemask) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CTINFO_PARMS_DSCP_STATEMASK], "dscp statemask must not overlap dscp mask"); return -EINVAL; } } /* done the validation:now to the actual action allocation */ index = actparm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_ctinfo_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* don't override defaults */ return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } else { return err; } err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; ci = to_ctinfo(*a); cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL); if (unlikely(!cp_new)) { err = -ENOMEM; goto put_chain; } cp_new->net = net; cp_new->zone = nla_get_u16_default(tb[TCA_CTINFO_ZONE], 0); if (dscpmask) { cp_new->dscpmask = dscpmask; cp_new->dscpmaskshift = dscpmaskshift; cp_new->dscpstatemask = dscpstatemask; cp_new->mode |= CTINFO_MODE_DSCP; } if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) { cp_new->cpmarkmask = nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]); cp_new->mode |= CTINFO_MODE_CPMARK; } cp_new->action = actparm->action; spin_lock_bh(&ci->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch); cp_new = rcu_replace_pointer(ci->params, cp_new, lockdep_is_held(&ci->tcf_lock)); spin_unlock_bh(&ci->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (cp_new) kfree_rcu(cp_new, rcu); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { const struct tcf_ctinfo *ci = to_ctinfo(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_ctinfo_params *cp; struct tc_ctinfo opt = { .index = ci->tcf_index, .refcnt = refcount_read(&ci->tcf_refcnt) - ref, .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, }; struct tcf_t t; rcu_read_lock(); cp = rcu_dereference(ci->params); tcf_tm_dump(&t, &ci->tcf_tm); if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD)) goto nla_put_failure; opt.action = cp->action; if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt)) goto nla_put_failure; if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone)) goto nla_put_failure; if (cp->mode & CTINFO_MODE_DSCP) { if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK, cp->dscpmask)) goto nla_put_failure; if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK, cp->dscpstatemask)) goto nla_put_failure; } if (cp->mode & CTINFO_MODE_CPMARK) { if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK, cp->cpmarkmask)) goto nla_put_failure; } if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET, atomic64_read(&ci->stats_dscp_set), TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR, atomic64_read(&ci->stats_dscp_error), TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET, atomic64_read(&ci->stats_cpmark_set), TCA_CTINFO_PAD)) goto nla_put_failure; rcu_read_unlock(); return skb->len; nla_put_failure: rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } static void tcf_ctinfo_cleanup(struct tc_action *a) { struct tcf_ctinfo *ci = to_ctinfo(a); struct tcf_ctinfo_params *cp; cp = rcu_dereference_protected(ci->params, 1); if (cp) kfree_rcu(cp, rcu); } static struct tc_action_ops act_ctinfo_ops = { .kind = "ctinfo", .id = TCA_ID_CTINFO, .owner = THIS_MODULE, .act = tcf_ctinfo_act, .dump = tcf_ctinfo_dump, .init = tcf_ctinfo_init, .cleanup= tcf_ctinfo_cleanup, .size = sizeof(struct tcf_ctinfo), }; MODULE_ALIAS_NET_ACT("ctinfo"); static __net_init int ctinfo_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_ctinfo_ops.net_id); } static struct pernet_operations ctinfo_net_ops = { .init = ctinfo_init_net, .exit_batch = ctinfo_exit_net, .id = &act_ctinfo_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init ctinfo_init_module(void) { return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops); } static void __exit ctinfo_cleanup_module(void) { tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops); } module_init(ctinfo_init_module); module_exit(ctinfo_cleanup_module); MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>"); MODULE_DESCRIPTION("Connection tracking mark actions"); MODULE_LICENSE("GPL");
1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 // SPDX-License-Identifier: GPL-2.0 /* * (C) 2001 Clemson University and The University of Chicago * (C) 2011 Omnibond Systems * * Changes by Acxiom Corporation to implement generic service_operation() * function, Copyright Acxiom Corporation, 2005. * * See COPYING in top-level directory. */ /* * In-kernel waitqueue operations. */ #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, long timeout, int flags) __acquires(op->lock); static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) __releases(op->lock); /* * What we do in this function is to walk the list of operations that are * present in the request queue and mark them as purged. * NOTE: This is called from the device close after client-core has * guaranteed that no new operations could appear on the list since the * client-core is anyway going to exit. */ void purge_waiting_ops(void) { struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), get_opname_string(op)); set_op_state_purged(op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(op), op->op_state, current->comm); } spin_unlock(&orangefs_request_list_lock); } /* * submits a ORANGEFS operation and waits for it to complete * * Note op->downcall.status will contain the status of the operation (in * errno format), whether provided by pvfs2-client or a result of failure to * service the operation. If the caller wishes to distinguish, then * op->state can be checked to see if it was serviced or not. * * Returns contents of op->downcall.status for convenience */ int service_operation(struct orangefs_kernel_op_s *op, const char *op_name, int flags) { long timeout = MAX_SCHEDULE_TIMEOUT; int ret = 0; DEFINE_WAIT(wait_entry); op->upcall.tgid = current->tgid; op->upcall.pid = current->pid; retry_servicing: op->downcall.status = 0; gossip_debug(GOSSIP_WAIT_DEBUG, "%s: %s op:%p: process:%s: pid:%d:\n", __func__, op_name, op, current->comm, current->pid); /* * If ORANGEFS_OP_NO_MUTEX was set in flags, we need to avoid * acquiring the request_mutex because we're servicing a * high priority remount operation and the request_mutex is * already taken. */ if (!(flags & ORANGEFS_OP_NO_MUTEX)) { if (flags & ORANGEFS_OP_INTERRUPTIBLE) ret = mutex_lock_interruptible(&orangefs_request_mutex); else ret = mutex_lock_killable(&orangefs_request_mutex); /* * check to see if we were interrupted while waiting for * mutex */ if (ret < 0) { op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, "%s: service_operation interrupted.\n", __func__); return ret; } } /* queue up the operation */ spin_lock(&orangefs_request_list_lock); spin_lock(&op->lock); set_op_state_waiting(op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(op), op->op_state, current->comm); /* add high priority remount op to the front of the line. */ if (flags & ORANGEFS_OP_PRIORITY) list_add(&op->list, &orangefs_request_list); else list_add_tail(&op->list, &orangefs_request_list); spin_unlock(&op->lock); wake_up_interruptible(&orangefs_request_list_waitq); if (!__is_daemon_in_service()) { gossip_debug(GOSSIP_WAIT_DEBUG, "%s:client core is NOT in service.\n", __func__); /* * Don't wait for the userspace component to return if * the filesystem is being umounted anyway. */ if (op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT) timeout = 0; else timeout = op_timeout_secs * HZ; } spin_unlock(&orangefs_request_list_lock); if (!(flags & ORANGEFS_OP_NO_MUTEX)) mutex_unlock(&orangefs_request_mutex); ret = wait_for_matching_downcall(op, timeout, flags); gossip_debug(GOSSIP_WAIT_DEBUG, "%s: wait_for_matching_downcall returned %d for %p\n", __func__, ret, op); /* got matching downcall; make sure status is in errno format */ if (!ret) { spin_unlock(&op->lock); op->downcall.status = orangefs_normalize_to_errno(op->downcall.status); ret = op->downcall.status; goto out; } /* failed to get matching downcall */ if (ret == -ETIMEDOUT) { gossip_err("%s: %s -- wait timed out; aborting attempt.\n", __func__, op_name); } /* * remove a waiting op from the request list or * remove an in-progress op from the in-progress list. */ orangefs_clean_up_interrupted_operation(op); op->downcall.status = ret; /* retry if operation has not been serviced and if requested */ if (ret == -EAGAIN) { op->attempts++; timeout = op_timeout_secs * HZ; gossip_debug(GOSSIP_WAIT_DEBUG, "orangefs: tag %llu (%s)" " -- operation to be retried (%d attempt)\n", llu(op->tag), op_name, op->attempts); /* * io ops (ops that use the shared memory buffer) have * to be returned to their caller for a retry. Other ops * can just be recycled here. */ if (!op->uses_shared_memory) goto retry_servicing; } out: gossip_debug(GOSSIP_WAIT_DEBUG, "%s: %s returning: %d for %p.\n", __func__, op_name, ret, op); return ret; } /* This can get called on an I/O op if it had a bad service_operation. */ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) { u64 tag = op->tag; if (!op_state_in_progress(op)) return false; op->slot_to_free = op->upcall.req.io.buf_index; memset(&op->upcall, 0, sizeof(op->upcall)); memset(&op->downcall, 0, sizeof(op->downcall)); op->upcall.type = ORANGEFS_VFS_OP_CANCEL; op->upcall.req.cancel.op_tag = tag; op->downcall.type = ORANGEFS_VFS_OP_INVALID; op->downcall.status = -1; orangefs_new_tag(op); spin_lock(&orangefs_request_list_lock); /* orangefs_request_list_lock is enough of a barrier here */ if (!__is_daemon_in_service()) { spin_unlock(&orangefs_request_list_lock); return false; } spin_lock(&op->lock); set_op_state_waiting(op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(op), op->op_state, current->comm); list_add(&op->list, &orangefs_request_list); spin_unlock(&op->lock); spin_unlock(&orangefs_request_list_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Attempting ORANGEFS operation cancellation of tag %llu\n", llu(tag)); return true; } /* * Change an op to the "given up" state and remove it from its list. */ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) __releases(op->lock) { /* * handle interrupted cases depending on what state we were in when * the interruption is detected. * * Called with op->lock held. */ /* * List manipulation code elsewhere will ignore ops that * have been given up upon. */ op->op_state |= OP_VFS_STATE_GIVEN_UP; if (list_empty(&op->list)) { /* caught copying to/from daemon */ BUG_ON(op_state_serviced(op)); spin_unlock(&op->lock); wait_for_completion(&op->waitq); } else if (op_state_waiting(op)) { /* * upcall hasn't been read; remove op from upcall request * list. */ spin_unlock(&op->lock); spin_lock(&orangefs_request_list_lock); list_del_init(&op->list); spin_unlock(&orangefs_request_list_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from request_list\n", op); } else if (op_state_in_progress(op)) { /* op must be removed from the in progress htable */ spin_unlock(&op->lock); spin_lock(&orangefs_htable_ops_in_progress_lock); list_del_init(&op->list); spin_unlock(&orangefs_htable_ops_in_progress_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p" " from htable_ops_in_progress\n", op); } else { spin_unlock(&op->lock); gossip_err("interrupted operation is in a weird state 0x%x\n", op->op_state); } reinit_completion(&op->waitq); } /* * Sleeps on waitqueue waiting for matching downcall. * If client-core finishes servicing, then we are good to go. * else if client-core exits, we get woken up here, and retry with a timeout * * When this call returns to the caller, the specified op will no * longer be in either the in_progress hash table or on the request list. * * Returns 0 on success and -errno on failure * Errors are: * EAGAIN in case we want the caller to requeue and try again.. * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this * operation since client-core seems to be exiting too often * or if we were interrupted. * * Returns with op->lock taken. */ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, long timeout, int flags) __acquires(op->lock) { long n; int writeback = flags & ORANGEFS_OP_WRITEBACK, interruptible = flags & ORANGEFS_OP_INTERRUPTIBLE; /* * There's a "schedule_timeout" inside of these wait * primitives, during which the op is out of the hands of the * user process that needs something done and is being * manipulated by the client-core process. */ if (writeback) n = wait_for_completion_io_timeout(&op->waitq, timeout); else if (!writeback && interruptible) n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); else /* !writeback && !interruptible but compiler complains */ n = wait_for_completion_killable_timeout(&op->waitq, timeout); spin_lock(&op->lock); if (op_state_serviced(op)) return 0; if (unlikely(n < 0)) { gossip_debug(GOSSIP_WAIT_DEBUG, "%s: operation interrupted, tag %llu, %p\n", __func__, llu(op->tag), op); return -EINTR; } if (op_state_purged(op)) { gossip_debug(GOSSIP_WAIT_DEBUG, "%s: operation purged, tag %llu, %p, %d\n", __func__, llu(op->tag), op, op->attempts); return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? -EAGAIN : -EIO; } /* must have timed out, then... */ gossip_debug(GOSSIP_WAIT_DEBUG, "%s: operation timed out, tag %llu, %p, %d)\n", __func__, llu(op->tag), op, op->attempts); return -ETIMEDOUT; }
27 25 25 24 25 24 25 20 19 19 20 19 27 27 26 27 27 27 27 26 26 13 27 2 26 27 24 1 24 16 1 16 1 16 27 20 20 20 19 20 17 16 16 20 18 2 2 2 20 19 12 20 20 20 13 13 9 9 9 8 13 3 4 4 4 31 31 31 23 27 27 26 27 26 27 26 26 27 23 27 6 27 6 2 27 27 27 27 27 27 26 27 25 25 25 25 25 24 25 27 24 27 25 25 25 26 27 27 27 26 41 35 41 42 42 42 42 42 43 27 9 9 9 8 8 9 25 25 24 25 25 9 8 25 24 24 25 24 25 9 8 9 8 9 9 8 9 9 8 8 9 25 25 25 25 25 25 25 25 25 25 25 2 23 47 27 43 40 71 1 43 44 73 73 6 6 48 9 8 9 9 8 8 9 7 9 9 9 9 9 9 25 4 4 27 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 // SPDX-License-Identifier: GPL-2.0 /* * drivers/usb/core/driver.c - most of the driver model stuff for usb * * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de> * * based on drivers/usb/usb.c which had the following copyrights: * (C) Copyright Linus Torvalds 1999 * (C) Copyright Johannes Erdfelt 1999-2001 * (C) Copyright Andreas Gal 1999 * (C) Copyright Gregory P. Smith 1999 * (C) Copyright Deti Fliegl 1999 (new USB architecture) * (C) Copyright Randy Dunlap 2000 * (C) Copyright David Brownell 2000-2004 * (C) Copyright Yggdrasil Computing, Inc. 2000 * (usb_device_id matching changes by Adam J. Richter) * (C) Copyright Greg Kroah-Hartman 2002-2003 * * Released under the GPLv2 only. * * NOTE! This is not actually a driver at all, rather this is * just a collection of helper routines that implement the * matching, probing, releasing, suspending and resuming for * real drivers. * */ #include <linux/device.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/usb.h> #include <linux/usb/quirks.h> #include <linux/usb/hcd.h> #include "usb.h" /* * Adds a new dynamic USBdevice ID to this driver, * and cause the driver to probe for all devices again. */ ssize_t usb_store_new_id(struct usb_dynids *dynids, const struct usb_device_id *id_table, struct device_driver *driver, const char *buf, size_t count) { struct usb_dynid *dynid; u32 idVendor = 0; u32 idProduct = 0; unsigned int bInterfaceClass = 0; u32 refVendor, refProduct; int fields = 0; int retval = 0; fields = sscanf(buf, "%x %x %x %x %x", &idVendor, &idProduct, &bInterfaceClass, &refVendor, &refProduct); if (fields < 2) return -EINVAL; dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); if (!dynid) return -ENOMEM; INIT_LIST_HEAD(&dynid->node); dynid->id.idVendor = idVendor; dynid->id.idProduct = idProduct; dynid->id.match_flags = USB_DEVICE_ID_MATCH_DEVICE; if (fields > 2 && bInterfaceClass) { if (bInterfaceClass > 255) { retval = -EINVAL; goto fail; } dynid->id.bInterfaceClass = (u8)bInterfaceClass; dynid->id.match_flags |= USB_DEVICE_ID_MATCH_INT_CLASS; } if (fields > 4) { const struct usb_device_id *id = id_table; if (!id) { retval = -ENODEV; goto fail; } for (; id->match_flags; id++) if (id->idVendor == refVendor && id->idProduct == refProduct) break; if (id->match_flags) { dynid->id.driver_info = id->driver_info; } else { retval = -ENODEV; goto fail; } } mutex_lock(&usb_dynids_lock); list_add_tail(&dynid->node, &dynids->list); mutex_unlock(&usb_dynids_lock); retval = driver_attach(driver); if (retval) return retval; return count; fail: kfree(dynid); return retval; } EXPORT_SYMBOL_GPL(usb_store_new_id); ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf) { struct usb_dynid *dynid; size_t count = 0; guard(mutex)(&usb_dynids_lock); list_for_each_entry(dynid, &dynids->list, node) if (dynid->id.bInterfaceClass != 0) count += sysfs_emit_at(buf, count, "%04x %04x %02x\n", dynid->id.idVendor, dynid->id.idProduct, dynid->id.bInterfaceClass); else count += sysfs_emit_at(buf, count, "%04x %04x\n", dynid->id.idVendor, dynid->id.idProduct); return count; } EXPORT_SYMBOL_GPL(usb_show_dynids); static ssize_t new_id_show(struct device_driver *driver, char *buf) { struct usb_driver *usb_drv = to_usb_driver(driver); return usb_show_dynids(&usb_drv->dynids, buf); } static ssize_t new_id_store(struct device_driver *driver, const char *buf, size_t count) { struct usb_driver *usb_drv = to_usb_driver(driver); return usb_store_new_id(&usb_drv->dynids, usb_drv->id_table, driver, buf, count); } static DRIVER_ATTR_RW(new_id); /* * Remove a USB device ID from this driver */ static ssize_t remove_id_store(struct device_driver *driver, const char *buf, size_t count) { struct usb_dynid *dynid, *n; struct usb_driver *usb_driver = to_usb_driver(driver); u32 idVendor; u32 idProduct; int fields; fields = sscanf(buf, "%x %x", &idVendor, &idProduct); if (fields < 2) return -EINVAL; guard(mutex)(&usb_dynids_lock); list_for_each_entry_safe(dynid, n, &usb_driver->dynids.list, node) { struct usb_device_id *id = &dynid->id; if ((id->idVendor == idVendor) && (id->idProduct == idProduct)) { list_del(&dynid->node); kfree(dynid); break; } } return count; } static ssize_t remove_id_show(struct device_driver *driver, char *buf) { return new_id_show(driver, buf); } static DRIVER_ATTR_RW(remove_id); static int usb_create_newid_files(struct usb_driver *usb_drv) { int error = 0; if (usb_drv->no_dynamic_id) goto exit; if (usb_drv->probe != NULL) { error = driver_create_file(&usb_drv->driver, &driver_attr_new_id); if (error == 0) { error = driver_create_file(&usb_drv->driver, &driver_attr_remove_id); if (error) driver_remove_file(&usb_drv->driver, &driver_attr_new_id); } } exit: return error; } static void usb_remove_newid_files(struct usb_driver *usb_drv) { if (usb_drv->no_dynamic_id) return; if (usb_drv->probe != NULL) { driver_remove_file(&usb_drv->driver, &driver_attr_remove_id); driver_remove_file(&usb_drv->driver, &driver_attr_new_id); } } static void usb_free_dynids(struct usb_driver *usb_drv) { struct usb_dynid *dynid, *n; guard(mutex)(&usb_dynids_lock); list_for_each_entry_safe(dynid, n, &usb_drv->dynids.list, node) { list_del(&dynid->node); kfree(dynid); } } static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *intf, const struct usb_driver *drv) { struct usb_dynid *dynid; guard(mutex)(&usb_dynids_lock); list_for_each_entry(dynid, &drv->dynids.list, node) { if (usb_match_one_id(intf, &dynid->id)) { return &dynid->id; } } return NULL; } /* called from driver core with dev locked */ static int usb_probe_device(struct device *dev) { struct usb_device_driver *udriver = to_usb_device_driver(dev->driver); struct usb_device *udev = to_usb_device(dev); int error = 0; dev_dbg(dev, "%s\n", __func__); /* TODO: Add real matching code */ /* The device should always appear to be in use * unless the driver supports autosuspend. */ if (!udriver->supports_autosuspend) error = usb_autoresume_device(udev); if (error) return error; if (udriver->generic_subclass) error = usb_generic_driver_probe(udev); if (error) return error; /* Probe the USB device with the driver in hand, but only * defer to a generic driver in case the current USB * device driver has an id_table or a match function; i.e., * when the device driver was explicitly matched against * a device. * * If the device driver does not have either of these, * then we assume that it can bind to any device and is * not truly a more specialized/non-generic driver, so a * return value of -ENODEV should not force the device * to be handled by the generic USB driver, as there * can still be another, more specialized, device driver. * * This accommodates the usbip driver. * * TODO: What if, in the future, there are multiple * specialized USB device drivers for a particular device? * In such cases, there is a need to try all matching * specialised device drivers prior to setting the * use_generic_driver bit. */ if (udriver->probe) error = udriver->probe(udev); else if (!udriver->generic_subclass) error = -EINVAL; if (error == -ENODEV && udriver != &usb_generic_driver && (udriver->id_table || udriver->match)) { udev->use_generic_driver = 1; return -EPROBE_DEFER; } return error; } /* called from driver core with dev locked */ static int usb_unbind_device(struct device *dev) { struct usb_device *udev = to_usb_device(dev); struct usb_device_driver *udriver = to_usb_device_driver(dev->driver); if (udriver->disconnect) udriver->disconnect(udev); if (udriver->generic_subclass) usb_generic_driver_disconnect(udev); if (!udriver->supports_autosuspend) usb_autosuspend_device(udev); return 0; } /* called from driver core with dev locked */ static int usb_probe_interface(struct device *dev) { struct usb_driver *driver = to_usb_driver(dev->driver); struct usb_interface *intf = to_usb_interface(dev); struct usb_device *udev = interface_to_usbdev(intf); const struct usb_device_id *id; int error = -ENODEV; int lpm_disable_error = -ENODEV; dev_dbg(dev, "%s\n", __func__); intf->needs_binding = 0; if (usb_device_is_owned(udev)) return error; if (udev->authorized == 0) { dev_info(&intf->dev, "Device is not authorized for usage\n"); return error; } else if (intf->authorized == 0) { dev_info(&intf->dev, "Interface %d is not authorized for usage\n", intf->altsetting->desc.bInterfaceNumber); return error; } id = usb_match_dynamic_id(intf, driver); if (!id) id = usb_match_id(intf, driver->id_table); if (!id) return error; dev_dbg(dev, "%s - got id\n", __func__); error = usb_autoresume_device(udev); if (error) return error; intf->condition = USB_INTERFACE_BINDING; /* Probed interfaces are initially active. They are * runtime-PM-enabled only if the driver has autosuspend support. * They are sensitive to their children's power states. */ pm_runtime_set_active(dev); pm_suspend_ignore_children(dev, false); if (driver->supports_autosuspend) pm_runtime_enable(dev); /* If the new driver doesn't allow hub-initiated LPM, and we can't * disable hub-initiated LPM, then fail the probe. * * Otherwise, leaving LPM enabled should be harmless, because the * endpoint intervals should remain the same, and the U1/U2 timeouts * should remain the same. * * If we need to install alt setting 0 before probe, or another alt * setting during probe, that should also be fine. usb_set_interface() * will attempt to disable LPM, and fail if it can't disable it. */ if (driver->disable_hub_initiated_lpm) { lpm_disable_error = usb_unlocked_disable_lpm(udev); if (lpm_disable_error) { dev_err(&intf->dev, "%s Failed to disable LPM for driver %s\n", __func__, driver->name); error = lpm_disable_error; goto err; } } /* Carry out a deferred switch to altsetting 0 */ if (intf->needs_altsetting0) { error = usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); if (error < 0) goto err; intf->needs_altsetting0 = 0; } error = driver->probe(intf, id); if (error) goto err; intf->condition = USB_INTERFACE_BOUND; /* If the LPM disable succeeded, balance the ref counts. */ if (!lpm_disable_error) usb_unlocked_enable_lpm(udev); usb_autosuspend_device(udev); return error; err: usb_set_intfdata(intf, NULL); intf->needs_remote_wakeup = 0; intf->condition = USB_INTERFACE_UNBOUND; /* If the LPM disable succeeded, balance the ref counts. */ if (!lpm_disable_error) usb_unlocked_enable_lpm(udev); /* Unbound interfaces are always runtime-PM-disabled and -suspended */ if (driver->supports_autosuspend) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); usb_autosuspend_device(udev); return error; } /* called from driver core with dev locked */ static int usb_unbind_interface(struct device *dev) { struct usb_driver *driver = to_usb_driver(dev->driver); struct usb_interface *intf = to_usb_interface(dev); struct usb_host_endpoint *ep, **eps = NULL; struct usb_device *udev; int i, j, error, r; int lpm_disable_error = -ENODEV; intf->condition = USB_INTERFACE_UNBINDING; /* Autoresume for set_interface call below */ udev = interface_to_usbdev(intf); error = usb_autoresume_device(udev); /* If hub-initiated LPM policy may change, attempt to disable LPM until * the driver is unbound. If LPM isn't disabled, that's fine because it * wouldn't be enabled unless all the bound interfaces supported * hub-initiated LPM. */ if (driver->disable_hub_initiated_lpm) lpm_disable_error = usb_unlocked_disable_lpm(udev); /* * Terminate all URBs for this interface unless the driver * supports "soft" unbinding and the device is still present. */ if (!driver->soft_unbind || udev->state == USB_STATE_NOTATTACHED) usb_disable_interface(udev, intf, false); driver->disconnect(intf); /* Free streams */ for (i = 0, j = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { ep = &intf->cur_altsetting->endpoint[i]; if (ep->streams == 0) continue; if (j == 0) { eps = kmalloc_array(USB_MAXENDPOINTS, sizeof(void *), GFP_KERNEL); if (!eps) break; } eps[j++] = ep; } if (j) { usb_free_streams(intf, eps, j, GFP_KERNEL); kfree(eps); } /* Reset other interface state. * We cannot do a Set-Interface if the device is suspended or * if it is prepared for a system sleep (since installing a new * altsetting means creating new endpoint device entries). * When either of these happens, defer the Set-Interface. */ if (intf->cur_altsetting->desc.bAlternateSetting == 0) { /* Already in altsetting 0 so skip Set-Interface. * Just re-enable it without affecting the endpoint toggles. */ usb_enable_interface(udev, intf, false); } else if (!error && !intf->dev.power.is_prepared) { r = usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); if (r < 0) intf->needs_altsetting0 = 1; } else { intf->needs_altsetting0 = 1; } usb_set_intfdata(intf, NULL); intf->condition = USB_INTERFACE_UNBOUND; intf->needs_remote_wakeup = 0; /* Attempt to re-enable USB3 LPM, if the disable succeeded. */ if (!lpm_disable_error) usb_unlocked_enable_lpm(udev); /* Unbound interfaces are always runtime-PM-disabled and -suspended */ if (driver->supports_autosuspend) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); if (!error) usb_autosuspend_device(udev); return 0; } static void usb_shutdown_interface(struct device *dev) { struct usb_interface *intf = to_usb_interface(dev); struct usb_driver *driver; if (!dev->driver) return; driver = to_usb_driver(dev->driver); if (driver->shutdown) driver->shutdown(intf); } /** * usb_driver_claim_interface - bind a driver to an interface * @driver: the driver to be bound * @iface: the interface to which it will be bound; must be in the * usb device's active configuration * @data: driver data associated with that interface * * This is used by usb device drivers that need to claim more than one * interface on a device when probing (audio and acm are current examples). * No device driver should directly modify internal usb_interface or * usb_device structure members. * * Callers must own the device lock, so driver probe() entries don't need * extra locking, but other call contexts may need to explicitly claim that * lock. * * Return: 0 on success. */ int usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void *data) { struct device *dev; int retval = 0; if (!iface) return -ENODEV; dev = &iface->dev; if (dev->driver) return -EBUSY; /* reject claim if interface is not authorized */ if (!iface->authorized) return -ENODEV; dev->driver = &driver->driver; usb_set_intfdata(iface, data); iface->needs_binding = 0; iface->condition = USB_INTERFACE_BOUND; /* Claimed interfaces are initially inactive (suspended) and * runtime-PM-enabled, but only if the driver has autosuspend * support. Otherwise they are marked active, to prevent the * device from being autosuspended, but left disabled. In either * case they are sensitive to their children's power states. */ pm_suspend_ignore_children(dev, false); if (driver->supports_autosuspend) pm_runtime_enable(dev); else pm_runtime_set_active(dev); /* if interface was already added, bind now; else let * the future device_add() bind it, bypassing probe() */ if (device_is_registered(dev)) retval = device_bind_driver(dev); if (retval) { dev->driver = NULL; usb_set_intfdata(iface, NULL); iface->needs_remote_wakeup = 0; iface->condition = USB_INTERFACE_UNBOUND; /* * Unbound interfaces are always runtime-PM-disabled * and runtime-PM-suspended */ if (driver->supports_autosuspend) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); } return retval; } EXPORT_SYMBOL_GPL(usb_driver_claim_interface); /** * usb_driver_release_interface - unbind a driver from an interface * @driver: the driver to be unbound * @iface: the interface from which it will be unbound * * This can be used by drivers to release an interface without waiting * for their disconnect() methods to be called. In typical cases this * also causes the driver disconnect() method to be called. * * This call is synchronous, and may not be used in an interrupt context. * Callers must own the device lock, so driver disconnect() entries don't * need extra locking, but other call contexts may need to explicitly claim * that lock. */ void usb_driver_release_interface(struct usb_driver *driver, struct usb_interface *iface) { struct device *dev = &iface->dev; /* this should never happen, don't release something that's not ours */ if (!dev->driver || dev->driver != &driver->driver) return; /* don't release from within disconnect() */ if (iface->condition != USB_INTERFACE_BOUND) return; iface->condition = USB_INTERFACE_UNBINDING; /* Release via the driver core only if the interface * has already been registered */ if (device_is_registered(dev)) { device_release_driver(dev); } else { device_lock(dev); usb_unbind_interface(dev); dev->driver = NULL; device_unlock(dev); } } EXPORT_SYMBOL_GPL(usb_driver_release_interface); /* returns 0 if no match, 1 if match */ int usb_match_device(struct usb_device *dev, const struct usb_device_id *id) { if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) && id->idVendor != le16_to_cpu(dev->descriptor.idVendor)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) && id->idProduct != le16_to_cpu(dev->descriptor.idProduct)) return 0; /* No need to test id->bcdDevice_lo != 0, since 0 is never greater than any unsigned number. */ if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) && (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice))) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) && (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice))) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) && (id->bDeviceClass != dev->descriptor.bDeviceClass)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) && (id->bDeviceSubClass != dev->descriptor.bDeviceSubClass)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) && (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol)) return 0; return 1; } /* returns 0 if no match, 1 if match */ int usb_match_one_id_intf(struct usb_device *dev, struct usb_host_interface *intf, const struct usb_device_id *id) { /* The interface class, subclass, protocol and number should never be * checked for a match if the device class is Vendor Specific, * unless the match record specifies the Vendor ID. */ if (dev->descriptor.bDeviceClass == USB_CLASS_VENDOR_SPEC && !(id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) && (id->match_flags & (USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS | USB_DEVICE_ID_MATCH_INT_PROTOCOL | USB_DEVICE_ID_MATCH_INT_NUMBER))) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) && (id->bInterfaceClass != intf->desc.bInterfaceClass)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) && (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) && (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol)) return 0; if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER) && (id->bInterfaceNumber != intf->desc.bInterfaceNumber)) return 0; return 1; } /* returns 0 if no match, 1 if match */ int usb_match_one_id(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_host_interface *intf; struct usb_device *dev; /* proc_connectinfo in devio.c may call us with id == NULL. */ if (id == NULL) return 0; intf = interface->cur_altsetting; dev = interface_to_usbdev(interface); if (!usb_match_device(dev, id)) return 0; return usb_match_one_id_intf(dev, intf, id); } EXPORT_SYMBOL_GPL(usb_match_one_id); /** * usb_match_id - find first usb_device_id matching device or interface * @interface: the interface of interest * @id: array of usb_device_id structures, terminated by zero entry * * usb_match_id searches an array of usb_device_id's and returns * the first one matching the device or interface, or null. * This is used when binding (or rebinding) a driver to an interface. * Most USB device drivers will use this indirectly, through the usb core, * but some layered driver frameworks use it directly. * These device tables are exported with MODULE_DEVICE_TABLE, through * modutils, to support the driver loading functionality of USB hotplugging. * * Return: The first matching usb_device_id, or %NULL. * * What Matches: * * The "match_flags" element in a usb_device_id controls which * members are used. If the corresponding bit is set, the * value in the device_id must match its corresponding member * in the device or interface descriptor, or else the device_id * does not match. * * "driver_info" is normally used only by device drivers, * but you can create a wildcard "matches anything" usb_device_id * as a driver's "modules.usbmap" entry if you provide an id with * only a nonzero "driver_info" field. If you do this, the USB device * driver's probe() routine should use additional intelligence to * decide whether to bind to the specified interface. * * What Makes Good usb_device_id Tables: * * The match algorithm is very simple, so that intelligence in * driver selection must come from smart driver id records. * Unless you have good reasons to use another selection policy, * provide match elements only in related groups, and order match * specifiers from specific to general. Use the macros provided * for that purpose if you can. * * The most specific match specifiers use device descriptor * data. These are commonly used with product-specific matches; * the USB_DEVICE macro lets you provide vendor and product IDs, * and you can also match against ranges of product revisions. * These are widely used for devices with application or vendor * specific bDeviceClass values. * * Matches based on device class/subclass/protocol specifications * are slightly more general; use the USB_DEVICE_INFO macro, or * its siblings. These are used with single-function devices * where bDeviceClass doesn't specify that each interface has * its own class. * * Matches based on interface class/subclass/protocol are the * most general; they let drivers bind to any interface on a * multiple-function device. Use the USB_INTERFACE_INFO * macro, or its siblings, to match class-per-interface style * devices (as recorded in bInterfaceClass). * * Note that an entry created by USB_INTERFACE_INFO won't match * any interface if the device class is set to Vendor-Specific. * This is deliberate; according to the USB spec the meanings of * the interface class/subclass/protocol for these devices are also * vendor-specific, and hence matching against a standard product * class wouldn't work anyway. If you really want to use an * interface-based match for such a device, create a match record * that also specifies the vendor ID. (Unforunately there isn't a * standard macro for creating records like this.) * * Within those groups, remember that not all combinations are * meaningful. For example, don't give a product version range * without vendor and product IDs; or specify a protocol without * its associated class and subclass. */ const struct usb_device_id *usb_match_id(struct usb_interface *interface, const struct usb_device_id *id) { /* proc_connectinfo in devio.c may call us with id == NULL. */ if (id == NULL) return NULL; /* It is important to check that id->driver_info is nonzero, since an entry that is all zeroes except for a nonzero id->driver_info is the way to create an entry that indicates that the driver want to examine every device and interface. */ for (; id->idVendor || id->idProduct || id->bDeviceClass || id->bInterfaceClass || id->driver_info; id++) { if (usb_match_one_id(interface, id)) return id; } return NULL; } EXPORT_SYMBOL_GPL(usb_match_id); const struct usb_device_id *usb_device_match_id(struct usb_device *udev, const struct usb_device_id *id) { if (!id) return NULL; for (; id->idVendor || id->idProduct ; id++) { if (usb_match_device(udev, id)) return id; } return NULL; } EXPORT_SYMBOL_GPL(usb_device_match_id); bool usb_driver_applicable(struct usb_device *udev, const struct usb_device_driver *udrv) { if (udrv->id_table && udrv->match) return usb_device_match_id(udev, udrv->id_table) != NULL && udrv->match(udev); if (udrv->id_table) return usb_device_match_id(udev, udrv->id_table) != NULL; if (udrv->match) return udrv->match(udev); return false; } static int usb_device_match(struct device *dev, const struct device_driver *drv) { /* devices and interfaces are handled separately */ if (is_usb_device(dev)) { struct usb_device *udev; const struct usb_device_driver *udrv; /* interface drivers never match devices */ if (!is_usb_device_driver(drv)) return 0; udev = to_usb_device(dev); udrv = to_usb_device_driver(drv); /* If the device driver under consideration does not have a * id_table or a match function, then let the driver's probe * function decide. */ if (!udrv->id_table && !udrv->match) return 1; return usb_driver_applicable(udev, udrv); } else if (is_usb_interface(dev)) { struct usb_interface *intf; const struct usb_driver *usb_drv; const struct usb_device_id *id; /* device drivers never match interfaces */ if (is_usb_device_driver(drv)) return 0; intf = to_usb_interface(dev); usb_drv = to_usb_driver(drv); id = usb_match_id(intf, usb_drv->id_table); if (id) return 1; id = usb_match_dynamic_id(intf, usb_drv); if (id) return 1; } return 0; } static int usb_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct usb_device *usb_dev; if (is_usb_device(dev)) { usb_dev = to_usb_device(dev); } else if (is_usb_interface(dev)) { const struct usb_interface *intf = to_usb_interface(dev); usb_dev = interface_to_usbdev(intf); } else { return 0; } if (usb_dev->devnum < 0) { /* driver is often null here; dev_dbg() would oops */ pr_debug("usb %s: already deleted?\n", dev_name(dev)); return -ENODEV; } if (!usb_dev->bus) { pr_debug("usb %s: bus removed?\n", dev_name(dev)); return -ENODEV; } /* per-device configurations are common */ if (add_uevent_var(env, "PRODUCT=%x/%x/%x", le16_to_cpu(usb_dev->descriptor.idVendor), le16_to_cpu(usb_dev->descriptor.idProduct), le16_to_cpu(usb_dev->descriptor.bcdDevice))) return -ENOMEM; /* class-based driver binding models */ if (add_uevent_var(env, "TYPE=%d/%d/%d", usb_dev->descriptor.bDeviceClass, usb_dev->descriptor.bDeviceSubClass, usb_dev->descriptor.bDeviceProtocol)) return -ENOMEM; return 0; } static int __usb_bus_reprobe_drivers(struct device *dev, void *data) { struct usb_device_driver *new_udriver = data; struct usb_device *udev; int ret; /* Don't reprobe if current driver isn't usb_generic_driver */ if (dev->driver != &usb_generic_driver.driver) return 0; udev = to_usb_device(dev); if (!usb_driver_applicable(udev, new_udriver)) return 0; ret = device_reprobe(dev); if (ret && ret != -EPROBE_DEFER) dev_err(dev, "Failed to reprobe device (error %d)\n", ret); return 0; } bool is_usb_device_driver(const struct device_driver *drv) { return drv->probe == usb_probe_device; } /** * usb_register_device_driver - register a USB device (not interface) driver * @new_udriver: USB operations for the device driver * @owner: module owner of this driver. * * Registers a USB device driver with the USB core. The list of * unattached devices will be rescanned whenever a new driver is * added, allowing the new driver to attach to any recognized devices. * * Return: A negative error code on failure and 0 on success. */ int usb_register_device_driver(struct usb_device_driver *new_udriver, struct module *owner) { int retval = 0; if (usb_disabled()) return -ENODEV; new_udriver->driver.name = new_udriver->name; new_udriver->driver.bus = &usb_bus_type; new_udriver->driver.probe = usb_probe_device; new_udriver->driver.remove = usb_unbind_device; new_udriver->driver.owner = owner; new_udriver->driver.dev_groups = new_udriver->dev_groups; retval = driver_register(&new_udriver->driver); if (!retval) { pr_info("%s: registered new device driver %s\n", usbcore_name, new_udriver->name); /* * Check whether any device could be better served with * this new driver */ bus_for_each_dev(&usb_bus_type, NULL, new_udriver, __usb_bus_reprobe_drivers); } else { pr_err("%s: error %d registering device driver %s\n", usbcore_name, retval, new_udriver->name); } return retval; } EXPORT_SYMBOL_GPL(usb_register_device_driver); /** * usb_deregister_device_driver - unregister a USB device (not interface) driver * @udriver: USB operations of the device driver to unregister * Context: must be able to sleep * * Unlinks the specified driver from the internal USB driver list. */ void usb_deregister_device_driver(struct usb_device_driver *udriver) { pr_info("%s: deregistering device driver %s\n", usbcore_name, udriver->name); driver_unregister(&udriver->driver); } EXPORT_SYMBOL_GPL(usb_deregister_device_driver); /** * usb_register_driver - register a USB interface driver * @new_driver: USB operations for the interface driver * @owner: module owner of this driver. * @mod_name: module name string * * Registers a USB interface driver with the USB core. The list of * unattached interfaces will be rescanned whenever a new driver is * added, allowing the new driver to attach to any recognized interfaces. * * Return: A negative error code on failure and 0 on success. * * NOTE: if you want your driver to use the USB major number, you must call * usb_register_dev() to enable that functionality. This function no longer * takes care of that. */ int usb_register_driver(struct usb_driver *new_driver, struct module *owner, const char *mod_name) { int retval = 0; if (usb_disabled()) return -ENODEV; new_driver->driver.name = new_driver->name; new_driver->driver.bus = &usb_bus_type; new_driver->driver.probe = usb_probe_interface; new_driver->driver.remove = usb_unbind_interface; new_driver->driver.shutdown = usb_shutdown_interface; new_driver->driver.owner = owner; new_driver->driver.mod_name = mod_name; new_driver->driver.dev_groups = new_driver->dev_groups; INIT_LIST_HEAD(&new_driver->dynids.list); retval = driver_register(&new_driver->driver); if (retval) goto out; retval = usb_create_newid_files(new_driver); if (retval) goto out_newid; pr_info("%s: registered new interface driver %s\n", usbcore_name, new_driver->name); return 0; out_newid: driver_unregister(&new_driver->driver); out: pr_err("%s: error %d registering interface driver %s\n", usbcore_name, retval, new_driver->name); return retval; } EXPORT_SYMBOL_GPL(usb_register_driver); /** * usb_deregister - unregister a USB interface driver * @driver: USB operations of the interface driver to unregister * Context: must be able to sleep * * Unlinks the specified driver from the internal USB driver list. * * NOTE: If you called usb_register_dev(), you still need to call * usb_deregister_dev() to clean up your driver's allocated minor numbers, * this * call will no longer do it for you. */ void usb_deregister(struct usb_driver *driver) { pr_info("%s: deregistering interface driver %s\n", usbcore_name, driver->name); usb_remove_newid_files(driver); driver_unregister(&driver->driver); usb_free_dynids(driver); } EXPORT_SYMBOL_GPL(usb_deregister); /* Forced unbinding of a USB interface driver, either because * it doesn't support pre_reset/post_reset/reset_resume or * because it doesn't support suspend/resume. * * The caller must hold @intf's device's lock, but not @intf's lock. */ void usb_forced_unbind_intf(struct usb_interface *intf) { struct usb_driver *driver = to_usb_driver(intf->dev.driver); dev_dbg(&intf->dev, "forced unbind\n"); usb_driver_release_interface(driver, intf); /* Mark the interface for later rebinding */ intf->needs_binding = 1; } /* * Unbind drivers for @udev's marked interfaces. These interfaces have * the needs_binding flag set, for example by usb_resume_interface(). * * The caller must hold @udev's device lock. */ static void unbind_marked_interfaces(struct usb_device *udev) { struct usb_host_config *config; int i; struct usb_interface *intf; config = udev->actconfig; if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { intf = config->interface[i]; if (intf->dev.driver && intf->needs_binding) usb_forced_unbind_intf(intf); } } } /* Delayed forced unbinding of a USB interface driver and scan * for rebinding. * * The caller must hold @intf's device's lock, but not @intf's lock. * * Note: Rebinds will be skipped if a system sleep transition is in * progress and the PM "complete" callback hasn't occurred yet. */ static void usb_rebind_intf(struct usb_interface *intf) { int rc; /* Delayed unbind of an existing driver */ if (intf->dev.driver) usb_forced_unbind_intf(intf); /* Try to rebind the interface */ if (!intf->dev.power.is_prepared) { intf->needs_binding = 0; rc = device_attach(&intf->dev); if (rc < 0 && rc != -EPROBE_DEFER) dev_warn(&intf->dev, "rebind failed: %d\n", rc); } } /* * Rebind drivers to @udev's marked interfaces. These interfaces have * the needs_binding flag set. * * The caller must hold @udev's device lock. */ static void rebind_marked_interfaces(struct usb_device *udev) { struct usb_host_config *config; int i; struct usb_interface *intf; config = udev->actconfig; if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { intf = config->interface[i]; if (intf->needs_binding) usb_rebind_intf(intf); } } } /* * Unbind all of @udev's marked interfaces and then rebind all of them. * This ordering is necessary because some drivers claim several interfaces * when they are first probed. * * The caller must hold @udev's device lock. */ void usb_unbind_and_rebind_marked_interfaces(struct usb_device *udev) { unbind_marked_interfaces(udev); rebind_marked_interfaces(udev); } #ifdef CONFIG_PM /* Unbind drivers for @udev's interfaces that don't support suspend/resume * There is no check for reset_resume here because it can be determined * only during resume whether reset_resume is needed. * * The caller must hold @udev's device lock. */ static void unbind_no_pm_drivers_interfaces(struct usb_device *udev) { struct usb_host_config *config; int i; struct usb_interface *intf; struct usb_driver *drv; config = udev->actconfig; if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { intf = config->interface[i]; if (intf->dev.driver) { drv = to_usb_driver(intf->dev.driver); if (!drv->suspend || !drv->resume) usb_forced_unbind_intf(intf); } } } } static int usb_suspend_device(struct usb_device *udev, pm_message_t msg) { struct usb_device_driver *udriver; int status = 0; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) goto done; /* For devices that don't have a driver, we do a generic suspend. */ if (udev->dev.driver) udriver = to_usb_device_driver(udev->dev.driver); else { udev->do_remote_wakeup = 0; udriver = &usb_generic_driver; } if (udriver->suspend) status = udriver->suspend(udev, msg); if (status == 0 && udriver->generic_subclass) status = usb_generic_driver_suspend(udev, msg); done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); return status; } static int usb_resume_device(struct usb_device *udev, pm_message_t msg) { struct usb_device_driver *udriver; int status = 0; if (udev->state == USB_STATE_NOTATTACHED) goto done; /* Can't resume it if it doesn't have a driver. */ if (udev->dev.driver == NULL) { status = -ENOTCONN; goto done; } /* Non-root devices on a full/low-speed bus must wait for their * companion high-speed root hub, in case a handoff is needed. */ if (!PMSG_IS_AUTO(msg) && udev->parent && udev->bus->hs_companion) device_pm_wait_for_dev(&udev->dev, &udev->bus->hs_companion->root_hub->dev); if (udev->quirks & USB_QUIRK_RESET_RESUME) udev->reset_resume = 1; udriver = to_usb_device_driver(udev->dev.driver); if (udriver->generic_subclass) status = usb_generic_driver_resume(udev, msg); if (status == 0 && udriver->resume) status = udriver->resume(udev, msg); done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); return status; } static int usb_suspend_interface(struct usb_device *udev, struct usb_interface *intf, pm_message_t msg) { struct usb_driver *driver; int status = 0; if (udev->state == USB_STATE_NOTATTACHED || intf->condition == USB_INTERFACE_UNBOUND) goto done; driver = to_usb_driver(intf->dev.driver); /* at this time we know the driver supports suspend */ status = driver->suspend(intf, msg); if (status && !PMSG_IS_AUTO(msg)) dev_err(&intf->dev, "suspend error %d\n", status); done: dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status); return status; } static int usb_resume_interface(struct usb_device *udev, struct usb_interface *intf, pm_message_t msg, int reset_resume) { struct usb_driver *driver; int status = 0; if (udev->state == USB_STATE_NOTATTACHED) goto done; /* Don't let autoresume interfere with unbinding */ if (intf->condition == USB_INTERFACE_UNBINDING) goto done; /* Can't resume it if it doesn't have a driver. */ if (intf->condition == USB_INTERFACE_UNBOUND) { /* Carry out a deferred switch to altsetting 0 */ if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) { usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); intf->needs_altsetting0 = 0; } goto done; } /* Don't resume if the interface is marked for rebinding */ if (intf->needs_binding) goto done; driver = to_usb_driver(intf->dev.driver); if (reset_resume) { if (driver->reset_resume) { status = driver->reset_resume(intf); if (status) dev_err(&intf->dev, "%s error %d\n", "reset_resume", status); } else { intf->needs_binding = 1; dev_dbg(&intf->dev, "no reset_resume for driver %s?\n", driver->name); } } else { status = driver->resume(intf); if (status) dev_err(&intf->dev, "resume error %d\n", status); } done: dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status); /* Later we will unbind the driver and/or reprobe, if necessary */ return status; } /** * usb_suspend_both - suspend a USB device and its interfaces * @udev: the usb_device to suspend * @msg: Power Management message describing this state transition * * This is the central routine for suspending USB devices. It calls the * suspend methods for all the interface drivers in @udev and then calls * the suspend method for @udev itself. When the routine is called in * autosuspend, if an error occurs at any stage, all the interfaces * which were suspended are resumed so that they remain in the same * state as the device, but when called from system sleep, all error * from suspend methods of interfaces and the non-root-hub device itself * are simply ignored, so all suspended interfaces are only resumed * to the device's state when @udev is root-hub and its suspend method * returns failure. * * Autosuspend requests originating from a child device or an interface * driver may be made without the protection of @udev's device lock, but * all other suspend calls will hold the lock. Usbcore will insure that * method calls do not arrive during bind, unbind, or reset operations. * However drivers must be prepared to handle suspend calls arriving at * unpredictable times. * * This routine can run only in process context. * * Return: 0 if the suspend succeeded. */ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) { int status = 0; int i = 0, n = 0; struct usb_interface *intf; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) goto done; if (msg.event == PM_EVENT_SUSPEND && usb_offload_check(udev)) { dev_dbg(&udev->dev, "device offloaded, skip suspend.\n"); udev->offload_at_suspend = 1; } /* Suspend all the interfaces and then udev itself */ if (udev->actconfig) { n = udev->actconfig->desc.bNumInterfaces; for (i = n - 1; i >= 0; --i) { intf = udev->actconfig->interface[i]; /* * Don't suspend interfaces with remote wakeup while * the controller is active. This preserves pending * interrupt urbs, allowing interrupt events to be * handled during system suspend. */ if (udev->offload_at_suspend && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip suspend.\n"); continue; } status = usb_suspend_interface(udev, intf, msg); /* Ignore errors during system sleep transitions */ if (!PMSG_IS_AUTO(msg)) status = 0; if (status != 0) break; } } if (status == 0) { if (!udev->offload_at_suspend) status = usb_suspend_device(udev, msg); /* * Ignore errors from non-root-hub devices during * system sleep transitions. For the most part, * these devices should go to low power anyway when * the entire bus is suspended. */ if (udev->parent && !PMSG_IS_AUTO(msg)) status = 0; /* * If the device is inaccessible, don't try to resume * suspended interfaces and just return the error. */ if (status && status != -EBUSY) { int err; u16 devstat; err = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, &devstat); if (err) { dev_err(&udev->dev, "Failed to suspend device, error %d\n", status); goto done; } } } /* If the suspend failed, resume interfaces that did get suspended */ if (status != 0) { if (udev->actconfig) { msg.event ^= (PM_EVENT_SUSPEND | PM_EVENT_RESUME); while (++i < n) { intf = udev->actconfig->interface[i]; usb_resume_interface(udev, intf, msg, 0); } } /* If the suspend succeeded then prevent any more URB submissions * and flush any outstanding URBs. */ } else { udev->can_submit = 0; if (!udev->offload_at_suspend) { for (i = 0; i < 16; ++i) { usb_hcd_flush_endpoint(udev, udev->ep_out[i]); usb_hcd_flush_endpoint(udev, udev->ep_in[i]); } } } done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); return status; } /** * usb_resume_both - resume a USB device and its interfaces * @udev: the usb_device to resume * @msg: Power Management message describing this state transition * * This is the central routine for resuming USB devices. It calls the * resume method for @udev and then calls the resume methods for all * the interface drivers in @udev. * * Autoresume requests originating from a child device or an interface * driver may be made without the protection of @udev's device lock, but * all other resume calls will hold the lock. Usbcore will insure that * method calls do not arrive during bind, unbind, or reset operations. * However drivers must be prepared to handle resume calls arriving at * unpredictable times. * * This routine can run only in process context. * * Return: 0 on success. */ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) { int status = 0; int i; struct usb_interface *intf; if (udev->state == USB_STATE_NOTATTACHED) { status = -ENODEV; goto done; } udev->can_submit = 1; /* Resume the device */ if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume) { if (!udev->offload_at_suspend) status = usb_resume_device(udev, msg); else dev_dbg(&udev->dev, "device offloaded, skip resume.\n"); } /* Resume the interfaces */ if (status == 0 && udev->actconfig) { for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { intf = udev->actconfig->interface[i]; /* * Interfaces with remote wakeup aren't suspended * while the controller is active. This preserves * pending interrupt urbs, allowing interrupt events * to be handled during system suspend. */ if (udev->offload_at_suspend && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip resume.\n"); continue; } usb_resume_interface(udev, intf, msg, udev->reset_resume); } } udev->offload_at_suspend = 0; usb_mark_last_busy(udev); done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); if (!status) udev->reset_resume = 0; return status; } static void choose_wakeup(struct usb_device *udev, pm_message_t msg) { int w; /* * For FREEZE/QUIESCE, disable remote wakeups so no interrupts get * generated. */ if (msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_QUIESCE) { w = 0; } else { /* * Enable remote wakeup if it is allowed, even if no interface * drivers actually want it. */ w = device_may_wakeup(&udev->dev); } /* * If the device is autosuspended with the wrong wakeup setting, * autoresume now so the setting can be changed. */ if (udev->state == USB_STATE_SUSPENDED && w != udev->do_remote_wakeup) pm_runtime_resume(&udev->dev); udev->do_remote_wakeup = w; } /* The device lock is held by the PM core */ int usb_suspend(struct device *dev, pm_message_t msg) { struct usb_device *udev = to_usb_device(dev); int r; unbind_no_pm_drivers_interfaces(udev); /* From now on we are sure all drivers support suspend/resume * but not necessarily reset_resume() * so we may still need to unbind and rebind upon resume */ choose_wakeup(udev, msg); r = usb_suspend_both(udev, msg); if (r) return r; if (udev->quirks & USB_QUIRK_DISCONNECT_SUSPEND) usb_port_disable(udev); return 0; } /* The device lock is held by the PM core */ int usb_resume_complete(struct device *dev) { struct usb_device *udev = to_usb_device(dev); /* For PM complete calls, all we do is rebind interfaces * whose needs_binding flag is set */ if (udev->state != USB_STATE_NOTATTACHED) rebind_marked_interfaces(udev); return 0; } /* The device lock is held by the PM core */ int usb_resume(struct device *dev, pm_message_t msg) { struct usb_device *udev = to_usb_device(dev); int status; /* For all calls, take the device back to full power and * tell the PM core in case it was autosuspended previously. * Unbind the interfaces that will need rebinding later, * because they fail to support reset_resume. * (This can't be done in usb_resume_interface() * above because it doesn't own the right set of locks.) */ status = usb_resume_both(udev, msg); if (status == 0) { pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); unbind_marked_interfaces(udev); } /* Avoid PM error messages for devices disconnected while suspended * as we'll display regular disconnect messages just a bit later. */ if (status == -ENODEV || status == -ESHUTDOWN) status = 0; return status; } /** * usb_enable_autosuspend - allow a USB device to be autosuspended * @udev: the USB device which may be autosuspended * * This routine allows @udev to be autosuspended. An autosuspend won't * take place until the autosuspend_delay has elapsed and all the other * necessary conditions are satisfied. * * The caller must hold @udev's device lock. */ void usb_enable_autosuspend(struct usb_device *udev) { pm_runtime_allow(&udev->dev); } EXPORT_SYMBOL_GPL(usb_enable_autosuspend); /** * usb_disable_autosuspend - prevent a USB device from being autosuspended * @udev: the USB device which may not be autosuspended * * This routine prevents @udev from being autosuspended and wakes it up * if it is already autosuspended. * * The caller must hold @udev's device lock. */ void usb_disable_autosuspend(struct usb_device *udev) { pm_runtime_forbid(&udev->dev); } EXPORT_SYMBOL_GPL(usb_disable_autosuspend); /** * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces * @udev: the usb_device to autosuspend * * This routine should be called when a core subsystem is finished using * @udev and wants to allow it to autosuspend. Examples would be when * @udev's device file in usbfs is closed or after a configuration change. * * @udev's usage counter is decremented; if it drops to 0 and all the * interfaces are inactive then a delayed autosuspend will be attempted. * The attempt may fail (see autosuspend_check()). * * The caller must hold @udev's device lock. * * This routine can run only in process context. */ void usb_autosuspend_device(struct usb_device *udev) { int status; usb_mark_last_busy(udev); status = pm_runtime_put_sync_autosuspend(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), status); } /** * usb_autoresume_device - immediately autoresume a USB device and its interfaces * @udev: the usb_device to autoresume * * This routine should be called when a core subsystem wants to use @udev * and needs to guarantee that it is not suspended. No autosuspend will * occur until usb_autosuspend_device() is called. (Note that this will * not prevent suspend events originating in the PM core.) Examples would * be when @udev's device file in usbfs is opened or when a remote-wakeup * request is received. * * @udev's usage counter is incremented to prevent subsequent autosuspends. * However if the autoresume fails then the usage counter is re-decremented. * * The caller must hold @udev's device lock. * * This routine can run only in process context. * * Return: 0 on success. A negative error code otherwise. */ int usb_autoresume_device(struct usb_device *udev) { int status; status = pm_runtime_resume_and_get(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), status); return status; } /** * usb_autopm_put_interface - decrement a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be decremented * * This routine should be called by an interface driver when it is * finished using @intf and wants to allow it to autosuspend. A typical * example would be a character-device driver when its device file is * closed. * * The routine decrements @intf's usage counter. When the counter reaches * 0, a delayed autosuspend request for @intf's device is attempted. The * attempt may fail (see autosuspend_check()). * * This routine can run only in process context. */ void usb_autopm_put_interface(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); int status; usb_mark_last_busy(udev); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface); /** * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be decremented * * This routine does much the same thing as usb_autopm_put_interface(): * It decrements @intf's usage counter and schedules a delayed * autosuspend request if the counter is <= 0. The difference is that it * does not perform any synchronization; callers should hold a private * lock and handle all synchronization issues themselves. * * Typically a driver would call this routine during an URB's completion * handler, if no more URBs were pending. * * This routine can run in atomic context. */ void usb_autopm_put_interface_async(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); int status; usb_mark_last_busy(udev); status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async); /** * usb_autopm_put_interface_no_suspend - decrement a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be decremented * * This routine decrements @intf's usage counter but does not carry out an * autosuspend. * * This routine can run in atomic context. */ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); pm_runtime_put_noidle(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend); /** * usb_autopm_get_interface - increment a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be incremented * * This routine should be called by an interface driver when it wants to * use @intf and needs to guarantee that it is not suspended. In addition, * the routine prevents @intf from being autosuspended subsequently. (Note * that this will not prevent suspend events originating in the PM core.) * This prevention will persist until usb_autopm_put_interface() is called * or @intf is unbound. A typical example would be a character-device * driver when its device file is opened. * * @intf's usage counter is incremented to prevent subsequent autosuspends. * However if the autoresume fails then the counter is re-decremented. * * This routine can run only in process context. * * Return: 0 on success. */ int usb_autopm_get_interface(struct usb_interface *intf) { int status; status = pm_runtime_resume_and_get(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); return status; } EXPORT_SYMBOL_GPL(usb_autopm_get_interface); /** * usb_autopm_get_interface_async - increment a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be incremented * * This routine does much the same thing as * usb_autopm_get_interface(): It increments @intf's usage counter and * queues an autoresume request if the device is suspended. The * differences are that it does not perform any synchronization (callers * should hold a private lock and handle all synchronization issues * themselves), and it does not autoresume the device directly (it only * queues a request). After a successful call, the device may not yet be * resumed. * * This routine can run in atomic context. * * Return: 0 on success. A negative error code otherwise. */ int usb_autopm_get_interface_async(struct usb_interface *intf) { int status; status = pm_runtime_get(&intf->dev); if (status < 0 && status != -EINPROGRESS) pm_runtime_put_noidle(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); if (status > 0 || status == -EINPROGRESS) status = 0; return status; } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async); /** * usb_autopm_get_interface_no_resume - increment a USB interface's PM-usage counter * @intf: the usb_interface whose counter should be incremented * * This routine increments @intf's usage counter but does not carry out an * autoresume. * * This routine can run in atomic context. */ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); pm_runtime_get_noresume(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume); /* Internal routine to check whether we may autosuspend a device. */ static int autosuspend_check(struct usb_device *udev) { int w, i; struct usb_interface *intf; if (udev->state == USB_STATE_NOTATTACHED) return -ENODEV; /* Fail if autosuspend is disabled, or any interfaces are in use, or * any interface drivers require remote wakeup but it isn't available. */ w = 0; if (udev->actconfig) { for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { intf = udev->actconfig->interface[i]; /* We don't need to check interfaces that are * disabled for runtime PM. Either they are unbound * or else their drivers don't support autosuspend * and so they are permanently active. */ if (intf->dev.power.disable_depth) continue; if (atomic_read(&intf->dev.power.usage_count) > 0) return -EBUSY; w |= intf->needs_remote_wakeup; /* Don't allow autosuspend if the device will need * a reset-resume and any of its interface drivers * doesn't include support or needs remote wakeup. */ if (udev->quirks & USB_QUIRK_RESET_RESUME) { struct usb_driver *driver; driver = to_usb_driver(intf->dev.driver); if (!driver->reset_resume || intf->needs_remote_wakeup) return -EOPNOTSUPP; } } } if (w && !device_can_wakeup(&udev->dev)) { dev_dbg(&udev->dev, "remote wakeup needed for autosuspend\n"); return -EOPNOTSUPP; } /* * If the device is a direct child of the root hub and the HCD * doesn't handle wakeup requests, don't allow autosuspend when * wakeup is needed. */ if (w && udev->parent == udev->bus->root_hub && bus_to_hcd(udev->bus)->cant_recv_wakeups) { dev_dbg(&udev->dev, "HCD doesn't handle wakeup requests\n"); return -EOPNOTSUPP; } udev->do_remote_wakeup = w; return 0; } int usb_runtime_suspend(struct device *dev) { struct usb_device *udev = to_usb_device(dev); int status; /* A USB device can be suspended if it passes the various autosuspend * checks. Runtime suspend for a USB device means suspending all the * interfaces and then the device itself. */ if (autosuspend_check(udev) != 0) return -EAGAIN; status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND); /* Allow a retry if autosuspend failed temporarily */ if (status == -EAGAIN || status == -EBUSY) usb_mark_last_busy(udev); /* * The PM core reacts badly unless the return code is 0, * -EAGAIN, or -EBUSY, so always return -EBUSY on an error * (except for root hubs, because they don't suspend through * an upstream port like other USB devices). */ if (status != 0 && udev->parent) return -EBUSY; return status; } int usb_runtime_resume(struct device *dev) { struct usb_device *udev = to_usb_device(dev); int status; /* Runtime resume for a USB device means resuming both the device * and all its interfaces. */ status = usb_resume_both(udev, PMSG_AUTO_RESUME); return status; } int usb_runtime_idle(struct device *dev) { struct usb_device *udev = to_usb_device(dev); /* An idle USB device can be suspended if it passes the various * autosuspend checks. */ if (autosuspend_check(udev) == 0) pm_runtime_autosuspend(dev); /* Tell the core not to suspend it, though. */ return -EBUSY; } static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); int ret = -EPERM; if (hcd->driver->set_usb2_hw_lpm) { ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable); if (!ret) udev->usb2_hw_lpm_enabled = enable; } return ret; } int usb_enable_usb2_hardware_lpm(struct usb_device *udev) { if (!udev->usb2_hw_lpm_capable || !udev->usb2_hw_lpm_allowed || udev->usb2_hw_lpm_enabled) return 0; return usb_set_usb2_hardware_lpm(udev, 1); } int usb_disable_usb2_hardware_lpm(struct usb_device *udev) { if (!udev->usb2_hw_lpm_enabled) return 0; return usb_set_usb2_hardware_lpm(udev, 0); } #endif /* CONFIG_PM */ const struct bus_type usb_bus_type = { .name = "usb", .match = usb_device_match, .uevent = usb_uevent, .need_parent_lock = true, };
5 17 17 17 17 17 17 17 16 4 3 1 4 27 28 28 10 10 10 10 10 3 3 1 9 9 8 8 8 10 10 10 1 4 3 3 2 2 1 1 16 16 144 144 145 15 16 16 16 10 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 // SPDX-License-Identifier: GPL-2.0-only /* Event cache for netfilter. */ /* * (C) 2005 Harald Welte <laforge@gnumonks.org> * (C) 2005 Patrick McHardy <kaber@trash.net> * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/netfilter.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/export.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> static DEFINE_MUTEX(nf_ct_ecache_mutex); #define DYING_NULLS_VAL ((1 << 30) + 1) #define ECACHE_MAX_JIFFIES msecs_to_jiffies(10) #define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10) enum retry_state { STATE_CONGESTED, STATE_RESTART, STATE_DONE, }; struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); return &cnet->ecache; } #if IS_MODULE(CONFIG_NF_CT_NETLINK) EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache); #endif static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) { unsigned long stop = jiffies + ECACHE_MAX_JIFFIES; struct hlist_nulls_head evicted_list; enum retry_state ret = STATE_DONE; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int sent; INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL); next: sent = 0; spin_lock_bh(&cnet->ecache.dying_lock); hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); /* The worker owns all entries, ct remains valid until nf_ct_put * in the loop below. */ if (nf_conntrack_event(IPCT_DESTROY, ct)) { ret = STATE_CONGESTED; break; } hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list); if (time_after(stop, jiffies)) { ret = STATE_RESTART; break; } if (sent++ > 16) { spin_unlock_bh(&cnet->ecache.dying_lock); cond_resched(); goto next; } } spin_unlock_bh(&cnet->ecache.dying_lock); hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); nf_ct_put(ct); cond_resched(); } return ret; } static void ecache_work(struct work_struct *work) { struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); int ret, delay = -1; ret = ecache_work_evict_list(cnet); switch (ret) { case STATE_CONGESTED: delay = ECACHE_RETRY_JIFFIES; break; case STATE_RESTART: delay = 0; break; case STATE_DONE: break; } if (delay >= 0) schedule_delayed_work(&cnet->ecache.dwork, delay); } static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, const u32 events, const u32 missed, const struct nf_ct_event *item) { struct net *net = nf_ct_net(item->ct); struct nf_ct_event_notifier *notify; u32 old, want; int ret; if (!((events | missed) & e->ctmask)) return 0; rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) { rcu_read_unlock(); return 0; } ret = notify->ct_event(events | missed, item); rcu_read_unlock(); if (likely(ret >= 0 && missed == 0)) return 0; do { old = READ_ONCE(e->missed); if (ret < 0) want = old | events; else want = old & ~missed; } while (cmpxchg(&e->missed, old, want) != old); return ret; } static void nf_ct_ecache_tstamp_refresh(struct nf_conntrack_ecache *e) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP if (local64_read(&e->timestamp)) local64_set(&e->timestamp, ktime_get_real_ns()); #endif } int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, u32 portid, int report) { struct nf_conntrack_ecache *e; struct nf_ct_event item; unsigned int missed; int ret; if (!nf_ct_is_confirmed(ct)) return 0; e = nf_ct_ecache_find(ct); if (!e) return 0; memset(&item, 0, sizeof(item)); item.ct = ct; item.portid = e->portid ? e->portid : portid; item.report = report; /* This is a resent of a destroy event? If so, skip missed */ missed = e->portid ? 0 : e->missed; nf_ct_ecache_tstamp_refresh(e); ret = __nf_conntrack_eventmask_report(e, events, missed, &item); if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { /* This is a destroy event that has been triggered by a process, * we store the PORTID to include it in the retransmission. */ if (e->portid == 0 && portid != 0) e->portid = portid; } return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) { struct nf_conntrack_ecache *e; struct nf_ct_event item; unsigned int events; if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) return; e = nf_ct_ecache_find(ct); if (e == NULL) return; events = xchg(&e->cache, 0); item.ct = ct; item.portid = 0; item.report = 0; /* We make a copy of the missed event cache without taking * the lock, thus we may send missed events twice. However, * this does not harm and it happens very rarely. */ __nf_conntrack_eventmask_report(e, events, e->missed, &item); } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report) { struct net *net = nf_ct_exp_net(exp); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) goto out_unlock; e = nf_ct_ecache_find(exp->master); if (!e) goto out_unlock; if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .portid = portid, .report = report }; notify->exp_event(1 << event, &item); } out_unlock: rcu_read_unlock(); } void nf_conntrack_register_notifier(struct net *net, const struct nf_ct_event_notifier *new) { struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); WARN_ON_ONCE(notify); rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); void nf_conntrack_unregister_notifier(struct net *net) { mutex_lock(&nf_ct_ecache_mutex); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); /* synchronize_rcu() is called after netns pre_exit */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (state == NFCT_ECACHE_DESTROY_FAIL && !delayed_work_pending(&cnet->ecache.dwork)) { schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { if (!hlist_nulls_empty(&cnet->ecache.dying_list)) mod_delayed_work(system_percpu_wq, &cnet->ecache.dwork, 0); else net->ct.ecache_dwork_pending = false; } } static void nf_ct_ecache_tstamp_new(const struct nf_conn *ct, struct nf_conntrack_ecache *e) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP u64 ts = 0; if (nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) ts = ktime_get_real_ns(); local64_set(&e->timestamp, ts); #endif } bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; switch (net->ct.sysctl_events) { case 0: /* assignment via template / ruleset? ignore sysctl. */ if (ctmask || expmask) break; return true; case 2: /* autodetect: no event listener, don't allocate extension. */ if (!READ_ONCE(nf_ctnetlink_has_listener)) return true; fallthrough; case 1: /* always allocate an extension. */ if (!ctmask && !expmask) { ctmask = ~0; expmask = ~0; } break; default: WARN_ON_ONCE(1); return true; } e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); if (e) { nf_ct_ecache_tstamp_new(ct, e); e->ctmask = ctmask; e->expmask = expmask; } return e != NULL; } EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add); #define NF_CT_EVENTS_DEFAULT 2 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; void nf_conntrack_ecache_pernet_init(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); net->ct.sysctl_events = nf_ct_events; INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL); spin_lock_init(&cnet->ecache.dying_lock); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } void nf_conntrack_ecache_pernet_fini(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); cancel_delayed_work_sync(&cnet->ecache.dwork); }
23 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 /* * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * * DRM core CRTC related functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Authors: * Keith Packard * Eric Anholt <eric@anholt.net> * Dave Airlie <airlied@linux.ie> * Jesse Barnes <jesse.barnes@intel.com> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/dynamic_debug.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_uapi.h> #include <drm/drm_bridge.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_crtc_helper_internal.h" DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", "DRM_UT_DRIVER", "DRM_UT_KMS", "DRM_UT_PRIME", "DRM_UT_ATOMIC", "DRM_UT_VBL", "DRM_UT_STATE", "DRM_UT_LEASE", "DRM_UT_DP", "DRM_UT_DRMRES"); /** * DOC: overview * * The CRTC modeset helper library provides a default set_config implementation * in drm_crtc_helper_set_config(). Plus a few other convenience functions using * the same callbacks which drivers can use to e.g. restore the modeset * configuration on resume with drm_helper_resume_force_mode(). * * Note that this helper library doesn't track the current power state of CRTCs * and encoders. It can call callbacks like &drm_encoder_helper_funcs.dpms even * though the hardware is already in the desired state. This deficiency has been * fixed in the atomic helpers. * * The driver callbacks are mostly compatible with the atomic modeset helpers, * except for the handling of the primary plane: Atomic helpers require that the * primary plane is implemented as a real standalone plane and not directly tied * to the CRTC state. For easier transition this library provides functions to * implement the old semantics required by the CRTC helpers using the new plane * and atomic helper callbacks. * * Drivers are strongly urged to convert to the atomic helpers (by way of first * converting to the plane helpers). New drivers must not use these functions * but need to implement the atomic interface instead, potentially using the * atomic helpers for that. * * These legacy modeset helpers use the same function table structures as * all other modesetting helpers. See the documentation for struct * &drm_crtc_helper_funcs, &struct drm_encoder_helper_funcs and struct * &drm_connector_helper_funcs. */ /** * drm_helper_encoder_in_use - check if a given encoder is in use * @encoder: encoder to check * * Checks whether @encoder is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @encoder is used, false otherwise. */ bool drm_helper_encoder_in_use(struct drm_encoder *encoder) { struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) { drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_WARN_ON(dev, !drm_modeset_is_locked(&dev->mode_config.connection_mutex)); } drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder == encoder) { drm_connector_list_iter_end(&conn_iter); return true; } } drm_connector_list_iter_end(&conn_iter); return false; } EXPORT_SYMBOL(drm_helper_encoder_in_use); /** * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config * @crtc: CRTC to check * * Checks whether @crtc is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @crtc is used, false otherwise. */ bool drm_helper_crtc_in_use(struct drm_crtc *crtc) { struct drm_encoder *encoder; struct drm_device *dev = crtc->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_for_each_encoder(encoder, dev) if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder)) return true; return false; } EXPORT_SYMBOL(drm_helper_crtc_in_use); static void drm_encoder_disable(struct drm_encoder *encoder) { const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->disable) (*encoder_funcs->disable)(encoder); else if (encoder_funcs->dpms) (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); } static void __drm_helper_disable_unused_functions(struct drm_device *dev) { struct drm_encoder *encoder; struct drm_crtc *crtc; drm_warn_on_modeset_not_all_locked(dev); drm_for_each_encoder(encoder, dev) { if (!drm_helper_encoder_in_use(encoder)) { drm_encoder_disable(encoder); /* disconnect encoder from any connector */ encoder->crtc = NULL; } } drm_for_each_crtc(crtc, dev) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) { if (crtc_funcs->disable) (*crtc_funcs->disable)(crtc); else (*crtc_funcs->dpms)(crtc, DRM_MODE_DPMS_OFF); crtc->primary->fb = NULL; } } } /** * drm_helper_disable_unused_functions - disable unused objects * @dev: DRM device * * This function walks through the entire mode setting configuration of @dev. It * will remove any CRTC links of unused encoders and encoder links of * disconnected connectors. Then it will disable all unused encoders and CRTCs * either by calling their disable callback if available or by calling their * dpms callback with DRM_MODE_DPMS_OFF. * * NOTE: * * This function is part of the legacy modeset helper library and will cause * major confusion with atomic drivers. This is because atomic helpers guarantee * to never call ->disable() hooks on a disabled function, or ->enable() hooks * on an enabled functions. drm_helper_disable_unused_functions() on the other * hand throws such guarantees into the wind and calls disable hooks * unconditionally on unused functions. */ void drm_helper_disable_unused_functions(struct drm_device *dev) { drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_disable_unused_functions); /* * Check the CRTC we're going to map each output to vs. its current * CRTC. If they don't match, we have to disable the output and the CRTC * since the driver will have to re-route things. */ static void drm_crtc_prepare_encoders(struct drm_device *dev) { const struct drm_encoder_helper_funcs *encoder_funcs; struct drm_encoder *encoder; drm_for_each_encoder(encoder, dev) { encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable unused encoders */ if (encoder->crtc == NULL) drm_encoder_disable(encoder); } } /** * drm_crtc_helper_set_mode - internal helper to set a mode * @crtc: CRTC to program * @mode: mode to use * @x: horizontal offset into the surface * @y: vertical offset into the surface * @old_fb: old framebuffer, for cleanup * * Try to set @mode on @crtc. Give @crtc and its associated connectors a chance * to fixup or reject the mode prior to trying to set it. This is an internal * helper that drivers could e.g. use to update properties that require the * entire output pipe to be disabled and re-enabled in a new configuration. For * example for changing whether audio is enabled on a hdmi link or for changing * panel fitter or dither attributes. It is also called by the * drm_crtc_helper_set_config() helper function to drive the mode setting * sequence. * * Returns: * True if the mode was set successfully, false otherwise. */ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, int x, int y, struct drm_framebuffer *old_fb) { struct drm_device *dev = crtc->dev; struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode; const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; const struct drm_encoder_helper_funcs *encoder_funcs; int saved_x, saved_y; bool saved_enabled; struct drm_encoder *encoder; bool ret = true; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_warn_on_modeset_not_all_locked(dev); saved_enabled = crtc->enabled; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) return true; adjusted_mode = drm_mode_duplicate(dev, mode); if (!adjusted_mode) { crtc->enabled = saved_enabled; return false; } drm_mode_init(&saved_mode, &crtc->mode); drm_mode_init(&saved_hwmode, &crtc->hwmode); saved_x = crtc->x; saved_y = crtc->y; /* Update crtc values up front so the driver can rely on them for mode * setting. */ drm_mode_copy(&crtc->mode, mode); crtc->x = x; crtc->y = y; /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->mode_fixup) { if (!(ret = encoder_funcs->mode_fixup(encoder, mode, adjusted_mode))) { drm_dbg_kms(dev, "[ENCODER:%d:%s] mode fixup failed\n", encoder->base.id, encoder->name); goto done; } } } if (crtc_funcs->mode_fixup) { if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) { drm_dbg_kms(dev, "[CRTC:%d:%s] mode fixup failed\n", crtc->base.id, crtc->name); goto done; } } drm_dbg_kms(dev, "[CRTC:%d:%s]\n", crtc->base.id, crtc->name); drm_mode_copy(&crtc->hwmode, adjusted_mode); /* Prepare the encoders and CRTCs before setting the mode. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable the encoders as the first thing we do. */ if (encoder_funcs->prepare) encoder_funcs->prepare(encoder); } drm_crtc_prepare_encoders(dev); crtc_funcs->prepare(crtc); /* Set up the DPLL and any encoders state that needs to adjust or depend * on the DPLL. */ ret = !crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb); if (!ret) goto done; drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; drm_dbg_kms(dev, "[ENCODER:%d:%s] set [MODE:%s]\n", encoder->base.id, encoder->name, mode->name); if (encoder_funcs->mode_set) encoder_funcs->mode_set(encoder, mode, adjusted_mode); } /* Now enable the clocks, plane, pipe, and connectors that we set up. */ crtc_funcs->commit(crtc); drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->commit) encoder_funcs->commit(encoder); } /* Calculate and store various constants which * are later needed by vblank and swap-completion * timestamping. They are derived from true hwmode. */ drm_calc_timestamping_constants(crtc, &crtc->hwmode); /* FIXME: add subpixel order */ done: drm_mode_destroy(dev, adjusted_mode); if (!ret) { crtc->enabled = saved_enabled; drm_mode_copy(&crtc->mode, &saved_mode); drm_mode_copy(&crtc->hwmode, &saved_hwmode); crtc->x = saved_x; crtc->y = saved_y; } return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_mode); /** * drm_crtc_helper_atomic_check() - Helper to check CRTC atomic-state * @crtc: CRTC to check * @state: atomic state object * * Provides a default CRTC-state check handler for CRTCs that only have * one primary plane attached to it. This is often the case for the CRTC * of simple framebuffers. * * RETURNS: * Zero on success, or an errno code otherwise. */ int drm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc); if (!new_crtc_state->enable) return 0; return drm_atomic_helper_check_crtc_primary_plane(new_crtc_state); } EXPORT_SYMBOL(drm_crtc_helper_atomic_check); static void drm_crtc_helper_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_connector *connector; struct drm_encoder *encoder; /* Decouple all encoders and their attached connectors from this crtc */ drm_for_each_encoder(encoder, dev) { struct drm_connector_list_iter conn_iter; if (encoder->crtc != crtc) continue; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder != encoder) continue; connector->encoder = NULL; /* * drm_helper_disable_unused_functions() ought to be * doing this, but since we've decoupled the encoder * from the connector above, the required connection * between them is henceforth no longer available. */ connector->dpms = DRM_MODE_DPMS_OFF; /* we keep a reference while the encoder is bound */ drm_connector_put(connector); } drm_connector_list_iter_end(&conn_iter); } __drm_helper_disable_unused_functions(dev); } /* * For connectors that support multiple encoders, either the * .atomic_best_encoder() or .best_encoder() operation must be implemented. */ struct drm_encoder * drm_connector_get_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; drm_WARN_ON(connector->dev, hweight32(connector->possible_encoders) > 1); drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; } /** * drm_crtc_helper_set_config - set a new config from userspace * @set: mode set configuration * @ctx: lock acquire context, not used here * * The drm_crtc_helper_set_config() helper function implements the of * &drm_crtc_funcs.set_config callback for drivers using the legacy CRTC * helpers. * * It first tries to locate the best encoder for each connector by calling the * connector @drm_connector_helper_funcs.best_encoder helper operation. * * After locating the appropriate encoders, the helper function will call the * mode_fixup encoder and CRTC helper operations to adjust the requested mode, * or reject it completely in which case an error will be returned to the * application. If the new configuration after mode adjustment is identical to * the current configuration the helper function will return without performing * any other operation. * * If the adjusted mode is identical to the current mode but changes to the * frame buffer need to be applied, the drm_crtc_helper_set_config() function * will call the CRTC &drm_crtc_helper_funcs.mode_set_base helper operation. * * If the adjusted mode differs from the current mode, or if the * ->mode_set_base() helper operation is not provided, the helper function * performs a full mode set sequence by calling the ->prepare(), ->mode_set() * and ->commit() CRTC and encoder helper operations, in that order. * Alternatively it can also use the dpms and disable helper operations. For * details see &struct drm_crtc_helper_funcs and struct * &drm_encoder_helper_funcs. * * This function is deprecated. New drivers must implement atomic modeset * support, for which this function is unsuitable. Instead drivers should use * drm_atomic_helper_set_config(). * * Returns: * Returns 0 on success, negative errno numbers on failure. */ int drm_crtc_helper_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct drm_crtc **save_encoder_crtcs, *new_crtc; struct drm_encoder **save_connector_encoders, *new_encoder, *encoder; bool mode_changed = false; /* if true do a full mode set */ bool fb_changed = false; /* if true and !mode_changed just do a flip */ struct drm_connector *connector; struct drm_connector_list_iter conn_iter; int count = 0, ro, fail = 0; const struct drm_crtc_helper_funcs *crtc_funcs; struct drm_mode_set save_set; int ret; int i; BUG_ON(!set); BUG_ON(!set->crtc); BUG_ON(!set->crtc->helper_private); /* Enforce sane interface api - has been abused by the fb helper. */ BUG_ON(!set->mode && set->fb); BUG_ON(set->fb && set->num_connectors == 0); crtc_funcs = set->crtc->helper_private; dev = set->crtc->dev; drm_dbg_kms(dev, "\n"); drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); if (!set->mode) set->fb = NULL; if (set->fb) { drm_dbg_kms(dev, "[CRTC:%d:%s] [FB:%d] #connectors=%d (x y) (%i %i)\n", set->crtc->base.id, set->crtc->name, set->fb->base.id, (int)set->num_connectors, set->x, set->y); } else { drm_dbg_kms(dev, "[CRTC:%d:%s] [NOFB]\n", set->crtc->base.id, set->crtc->name); drm_crtc_helper_disable(set->crtc); return 0; } drm_warn_on_modeset_not_all_locked(dev); /* * Allocate space for the backup of all (non-pointer) encoder and * connector data. */ save_encoder_crtcs = kcalloc(dev->mode_config.num_encoder, sizeof(struct drm_crtc *), GFP_KERNEL); if (!save_encoder_crtcs) return -ENOMEM; save_connector_encoders = kcalloc(dev->mode_config.num_connector, sizeof(struct drm_encoder *), GFP_KERNEL); if (!save_connector_encoders) { kfree(save_encoder_crtcs); return -ENOMEM; } /* * Copy data. Note that driver private data is not affected. * Should anything bad happen only the expected state is * restored, not the drivers personal bookkeeping. */ count = 0; drm_for_each_encoder(encoder, dev) { save_encoder_crtcs[count++] = encoder->crtc; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) save_connector_encoders[count++] = connector->encoder; drm_connector_list_iter_end(&conn_iter); save_set.crtc = set->crtc; save_set.mode = &set->crtc->mode; save_set.x = set->crtc->x; save_set.y = set->crtc->y; save_set.fb = set->crtc->primary->fb; /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ if (set->crtc->primary->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->primary->fb == NULL) { drm_dbg_kms(dev, "[CRTC:%d:%s] no fb, full mode set\n", set->crtc->base.id, set->crtc->name); mode_changed = true; } else if (set->fb->format != set->crtc->primary->fb->format) { mode_changed = true; } else fb_changed = true; } if (set->x != set->crtc->x || set->y != set->crtc->y) fb_changed = true; if (!drm_mode_equal(set->mode, &set->crtc->mode)) { drm_dbg_kms(dev, "[CRTC:%d:%s] modes are different, full mode set:\n", set->crtc->base.id, set->crtc->name); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(&set->crtc->mode)); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(set->mode)); mode_changed = true; } /* take a reference on all unbound connectors in set, reuse the * already taken reference for bound connectors */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_get(set->connectors[ro]); } /* a) traverse passed in connector list and get encoders for them */ count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; new_encoder = connector->encoder; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) { if (connector_funcs->best_encoder) new_encoder = connector_funcs->best_encoder(connector); else new_encoder = drm_connector_get_single_encoder(connector); /* if we can't get an encoder for a connector we are setting now - then fail */ if (new_encoder == NULL) /* don't break so fail path works correct */ fail = 1; if (connector->dpms != DRM_MODE_DPMS_ON) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] DPMS not on, full mode switch\n", connector->base.id, connector->name); mode_changed = true; } break; } } if (new_encoder != connector->encoder) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] encoder changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; /* If the encoder is reused for another connector, then * the appropriate crtc will be set later. */ if (connector->encoder) connector->encoder->crtc = NULL; connector->encoder = new_encoder; } } drm_connector_list_iter_end(&conn_iter); if (fail) { ret = -EINVAL; goto fail; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (!connector->encoder) continue; if (connector->encoder->crtc == set->crtc) new_crtc = NULL; else new_crtc = connector->encoder->crtc; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) new_crtc = set->crtc; } /* Make sure the new CRTC will work with the encoder */ if (new_crtc && !drm_encoder_crtc_ok(connector->encoder, new_crtc)) { ret = -EINVAL; drm_connector_list_iter_end(&conn_iter); goto fail; } if (new_crtc != connector->encoder->crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] CRTC changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; connector->encoder->crtc = new_crtc; } if (new_crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [CRTC:%d:%s]\n", connector->base.id, connector->name, new_crtc->base.id, new_crtc->name); } else { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [NOCRTC]\n", connector->base.id, connector->name); } } drm_connector_list_iter_end(&conn_iter); /* mode_set_base is not a required function */ if (fb_changed && !crtc_funcs->mode_set_base) mode_changed = true; if (mode_changed) { if (drm_helper_crtc_in_use(set->crtc)) { drm_dbg_kms(dev, "[CRTC:%d:%s] attempting to set mode from userspace: " DRM_MODE_FMT "\n", set->crtc->base.id, set->crtc->name, DRM_MODE_ARG(set->mode)); set->crtc->primary->fb = set->fb; if (!drm_crtc_helper_set_mode(set->crtc, set->mode, set->x, set->y, save_set.fb)) { drm_err(dev, "[CRTC:%d:%s] failed to set mode\n", set->crtc->base.id, set->crtc->name); set->crtc->primary->fb = save_set.fb; ret = -EINVAL; goto fail; } drm_dbg_kms(dev, "[CRTC:%d:%s] Setting connector DPMS state to on\n", set->crtc->base.id, set->crtc->name); for (i = 0; i < set->num_connectors; i++) { drm_dbg_kms(dev, "\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, set->connectors[i]->name); set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); } } __drm_helper_disable_unused_functions(dev); } else if (fb_changed) { set->crtc->x = set->x; set->crtc->y = set->y; set->crtc->primary->fb = set->fb; ret = crtc_funcs->mode_set_base(set->crtc, set->x, set->y, save_set.fb); if (ret != 0) { set->crtc->x = save_set.x; set->crtc->y = save_set.y; set->crtc->primary->fb = save_set.fb; goto fail; } } kfree(save_connector_encoders); kfree(save_encoder_crtcs); return 0; fail: /* Restore all previous data. */ count = 0; drm_for_each_encoder(encoder, dev) { encoder->crtc = save_encoder_crtcs[count++]; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) connector->encoder = save_connector_encoders[count++]; drm_connector_list_iter_end(&conn_iter); /* after fail drop reference on all unbound connectors in set, let * bound connectors keep their reference */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_put(set->connectors[ro]); } /* Try to restore the config */ if (mode_changed && !drm_crtc_helper_set_mode(save_set.crtc, save_set.mode, save_set.x, save_set.y, save_set.fb)) drm_err(dev, "failed to restore config after modeset failure\n"); kfree(save_connector_encoders); kfree(save_encoder_crtcs); return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_config); static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder == encoder) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /* Helper which handles bridge ordering around encoder dpms */ static void drm_helper_encoder_dpms(struct drm_encoder *encoder, int mode) { const struct drm_encoder_helper_funcs *encoder_funcs; encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->dpms) encoder_funcs->dpms(encoder, mode); } static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = crtc->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder && connector->encoder->crtc == crtc) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /** * drm_helper_connector_dpms() - connector dpms helper implementation * @connector: affected connector * @mode: DPMS mode * * The drm_helper_connector_dpms() helper function implements the * &drm_connector_funcs.dpms callback for drivers using the legacy CRTC * helpers. * * This is the main helper function provided by the CRTC helper framework for * implementing the DPMS connector attribute. It computes the new desired DPMS * state for all encoders and CRTCs in the output mesh and calls the * &drm_crtc_helper_funcs.dpms and &drm_encoder_helper_funcs.dpms callbacks * provided by the driver. * * This function is deprecated. New drivers must implement atomic modeset * support, where DPMS is handled in the DRM core. * * Returns: * Always returns 0. */ int drm_helper_connector_dpms(struct drm_connector *connector, int mode) { struct drm_encoder *encoder = connector->encoder; struct drm_crtc *crtc = encoder ? encoder->crtc : NULL; int old_dpms, encoder_dpms = DRM_MODE_DPMS_OFF; drm_WARN_ON(connector->dev, drm_drv_uses_atomic_modeset(connector->dev)); if (mode == connector->dpms) return 0; old_dpms = connector->dpms; connector->dpms = mode; if (encoder) encoder_dpms = drm_helper_choose_encoder_dpms(encoder); /* from off to on, do crtc then encoder */ if (mode < old_dpms) { if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); } /* from on to off, do encoder then crtc */ if (mode > old_dpms) { if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } return 0; } EXPORT_SYMBOL(drm_helper_connector_dpms); /** * drm_helper_resume_force_mode - force-restore mode setting configuration * @dev: drm_device which should be restored * * Drivers which use the mode setting helpers can use this function to * force-restore the mode setting configuration e.g. on resume or when something * else might have trampled over the hw state (like some overzealous old BIOSen * tended to do). * * This helper doesn't provide a error return value since restoring the old * config should never fail due to resource allocation issues since the driver * has successfully set the restored configuration already. Hence this should * boil down to the equivalent of a few dpms on calls, which also don't provide * an error code. * * Drivers where simply restoring an old configuration again might fail (e.g. * due to slight differences in allocating shared resources when the * configuration is restored in a different order than when userspace set it up) * need to use their own restore logic. * * This function is deprecated. New drivers should implement atomic mode- * setting and use the atomic suspend/resume helpers. * * See also: * drm_atomic_helper_suspend(), drm_atomic_helper_resume() */ void drm_helper_resume_force_mode(struct drm_device *dev) { struct drm_crtc *crtc; struct drm_encoder *encoder; const struct drm_crtc_helper_funcs *crtc_funcs; int encoder_dpms; bool ret; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) { if (!crtc->enabled) continue; ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->primary->fb); /* Restoring the old config should never fail! */ if (ret == false) drm_err(dev, "failed to set mode on crtc %p\n", crtc); /* Turn off outputs that were already powered off */ if (drm_helper_choose_crtc_dpms(crtc)) { drm_for_each_encoder(encoder, dev) { if(encoder->crtc != crtc) continue; encoder_dpms = drm_helper_choose_encoder_dpms( encoder); drm_helper_encoder_dpms(encoder, encoder_dpms); } crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } /* disable the unused connectors while restoring the modesetting */ __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_resume_force_mode); /** * drm_helper_force_disable_all - Forcibly turn off all enabled CRTCs * @dev: DRM device whose CRTCs to turn off * * Drivers may want to call this on unload to ensure that all displays are * unlit and the GPU is in a consistent, low power state. Takes modeset locks. * * Note: This should only be used by non-atomic legacy drivers. For an atomic * version look at drm_atomic_helper_shutdown(). * * Returns: * Zero on success, error code on failure. */ int drm_helper_force_disable_all(struct drm_device *dev) { struct drm_crtc *crtc; int ret = 0; drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) if (crtc->enabled) { struct drm_mode_set set = { .crtc = crtc, }; ret = drm_mode_set_config_internal(&set); if (ret) goto out; } out: drm_modeset_unlock_all(dev); return ret; } EXPORT_SYMBOL(drm_helper_force_disable_all);
2 2 2 1 1 1 1 2 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018 Samsung Electronics Co., Ltd. */ #include <linux/jhash.h> #include <linux/slab.h> #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/wait.h> #include <linux/hashtable.h> #include <net/net_namespace.h> #include <net/genetlink.h> #include <linux/socket.h> #include <linux/workqueue.h> #include "vfs_cache.h" #include "transport_ipc.h" #include "server.h" #include "smb_common.h" #include "mgmt/user_config.h" #include "mgmt/share_config.h" #include "mgmt/user_session.h" #include "mgmt/tree_connect.h" #include "mgmt/ksmbd_ida.h" #include "connection.h" #include "transport_tcp.h" #include "transport_rdma.h" #define IPC_WAIT_TIMEOUT (2 * HZ) #define IPC_MSG_HASH_BITS 3 static DEFINE_HASHTABLE(ipc_msg_table, IPC_MSG_HASH_BITS); static DECLARE_RWSEM(ipc_msg_table_lock); static DEFINE_MUTEX(startup_lock); static DEFINE_IDA(ipc_ida); static unsigned int ksmbd_tools_pid; static bool ksmbd_ipc_validate_version(struct genl_info *m) { if (m->genlhdr->version != KSMBD_GENL_VERSION) { pr_err("%s. ksmbd: %d, kernel module: %d. %s.\n", "Daemon and kernel module version mismatch", m->genlhdr->version, KSMBD_GENL_VERSION, "User-space ksmbd should terminate"); return false; } return true; } struct ksmbd_ipc_msg { unsigned int type; unsigned int sz; unsigned char payload[]; }; struct ipc_msg_table_entry { unsigned int handle; unsigned int type; wait_queue_head_t wait; struct hlist_node ipc_table_hlist; void *response; unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; static int handle_startup_event(struct sk_buff *skb, struct genl_info *info); static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info); static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, [KSMBD_EVENT_HEARTBEAT_REQUEST] = { .len = sizeof(struct ksmbd_heartbeat), }, [KSMBD_EVENT_STARTING_UP] = { .len = sizeof(struct ksmbd_startup_request), }, [KSMBD_EVENT_SHUTTING_DOWN] = { .len = sizeof(struct ksmbd_shutdown_request), }, [KSMBD_EVENT_LOGIN_REQUEST] = { .len = sizeof(struct ksmbd_login_request), }, [KSMBD_EVENT_LOGIN_RESPONSE] = { .len = sizeof(struct ksmbd_login_response), }, [KSMBD_EVENT_SHARE_CONFIG_REQUEST] = { .len = sizeof(struct ksmbd_share_config_request), }, [KSMBD_EVENT_SHARE_CONFIG_RESPONSE] = { .len = sizeof(struct ksmbd_share_config_response), }, [KSMBD_EVENT_TREE_CONNECT_REQUEST] = { .len = sizeof(struct ksmbd_tree_connect_request), }, [KSMBD_EVENT_TREE_CONNECT_RESPONSE] = { .len = sizeof(struct ksmbd_tree_connect_response), }, [KSMBD_EVENT_TREE_DISCONNECT_REQUEST] = { .len = sizeof(struct ksmbd_tree_disconnect_request), }, [KSMBD_EVENT_LOGOUT_REQUEST] = { .len = sizeof(struct ksmbd_logout_request), }, [KSMBD_EVENT_RPC_REQUEST] = { }, [KSMBD_EVENT_RPC_RESPONSE] = { }, [KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST] = { }, [KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE] = { }, [KSMBD_EVENT_LOGIN_REQUEST_EXT] = { .len = sizeof(struct ksmbd_login_request), }, [KSMBD_EVENT_LOGIN_RESPONSE_EXT] = { .len = sizeof(struct ksmbd_login_response_ext), }, }; static struct genl_ops ksmbd_genl_ops[] = { { .cmd = KSMBD_EVENT_UNSPEC, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_HEARTBEAT_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_STARTING_UP, .doit = handle_startup_event, }, { .cmd = KSMBD_EVENT_SHUTTING_DOWN, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_LOGIN_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_LOGIN_RESPONSE, .doit = handle_generic_event, }, { .cmd = KSMBD_EVENT_SHARE_CONFIG_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_SHARE_CONFIG_RESPONSE, .doit = handle_generic_event, }, { .cmd = KSMBD_EVENT_TREE_CONNECT_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_TREE_CONNECT_RESPONSE, .doit = handle_generic_event, }, { .cmd = KSMBD_EVENT_TREE_DISCONNECT_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_LOGOUT_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_RPC_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_RPC_RESPONSE, .doit = handle_generic_event, }, { .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE, .doit = handle_generic_event, }, { .cmd = KSMBD_EVENT_LOGIN_REQUEST_EXT, .doit = handle_unsupported_event, }, { .cmd = KSMBD_EVENT_LOGIN_RESPONSE_EXT, .doit = handle_generic_event, }, }; static struct genl_family ksmbd_genl_family = { .name = KSMBD_GENL_NAME, .version = KSMBD_GENL_VERSION, .hdrsize = 0, .maxattr = KSMBD_EVENT_MAX, .netnsok = true, .module = THIS_MODULE, .ops = ksmbd_genl_ops, .n_ops = ARRAY_SIZE(ksmbd_genl_ops), .resv_start_op = KSMBD_EVENT_LOGIN_RESPONSE_EXT + 1, }; static void ksmbd_nl_init_fixup(void) { int i; for (i = 0; i < ARRAY_SIZE(ksmbd_genl_ops); i++) ksmbd_genl_ops[i].validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP; ksmbd_genl_family.policy = ksmbd_nl_policy; } static int rpc_context_flags(struct ksmbd_session *sess) { if (user_guest(sess->user)) return KSMBD_RPC_RESTRICTED_CONTEXT; return 0; } static void ipc_update_last_active(void) { if (server_conf.ipc_timeout) server_conf.ipc_last_active = jiffies; } static struct ksmbd_ipc_msg *ipc_msg_alloc(size_t sz) { struct ksmbd_ipc_msg *msg; size_t msg_sz = sz + sizeof(struct ksmbd_ipc_msg); msg = kvzalloc(msg_sz, KSMBD_DEFAULT_GFP); if (msg) msg->sz = sz; return msg; } static void ipc_msg_free(struct ksmbd_ipc_msg *msg) { kvfree(msg); } static void ipc_msg_handle_free(int handle) { if (handle >= 0) ksmbd_release_id(&ipc_ida, handle); } static int handle_response(int type, void *payload, size_t sz) { unsigned int handle; struct ipc_msg_table_entry *entry; int ret = 0; /* Prevent 4-byte read beyond declared payload size */ if (sz < sizeof(unsigned int)) return -EINVAL; handle = *(unsigned int *)payload; ipc_update_last_active(); down_read(&ipc_msg_table_lock); hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) { if (handle != entry->handle) continue; entry->response = NULL; /* * Response message type value should be equal to * request message type + 1. */ if (entry->type + 1 != type) { pr_err("Waiting for IPC type %d, got %d. Ignore.\n", entry->type + 1, type); continue; } entry->response = kvzalloc(sz, KSMBD_DEFAULT_GFP); if (!entry->response) { ret = -ENOMEM; break; } memcpy(entry->response, payload, sz); entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; } up_read(&ipc_msg_table_lock); return ret; } static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) { int ret; ksmbd_set_fd_limit(req->file_max); server_conf.flags = req->flags; server_conf.signing = req->signing; server_conf.tcp_port = req->tcp_port; server_conf.ipc_timeout = req->ipc_timeout * HZ; if (check_mul_overflow(req->deadtime, SMB_ECHO_INTERVAL, &server_conf.deadtime)) { ret = -EINVAL; goto out; } server_conf.share_fake_fscaps = req->share_fake_fscaps; ksmbd_init_domain(req->sub_auth); if (req->smb2_max_read) init_smb2_max_read_size(req->smb2_max_read); if (req->smb2_max_write) init_smb2_max_write_size(req->smb2_max_write); if (req->smb2_max_trans) init_smb2_max_trans_size(req->smb2_max_trans); if (req->smb2_max_credits) { init_smb2_max_credits(req->smb2_max_credits); server_conf.max_inflight_req = req->smb2_max_credits; } if (req->smbd_max_io_size) init_smbd_max_io_size(req->smbd_max_io_size); if (req->max_connections) server_conf.max_connections = req->max_connections; if (req->max_ip_connections) server_conf.max_ip_connections = req->max_ip_connections; ret = ksmbd_set_netbios_name(req->netbios_name); ret |= ksmbd_set_server_string(req->server_string); ret |= ksmbd_set_work_group(req->work_group); server_conf.bind_interfaces_only = req->bind_interfaces_only; ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req), req->ifc_list_sz); out: if (ret) { pr_err("Server configuration error: %s %s %s\n", req->netbios_name, req->server_string, req->work_group); return ret; } if (req->min_prot[0]) { ret = ksmbd_lookup_protocol_idx(req->min_prot); if (ret >= 0) server_conf.min_protocol = ret; } if (req->max_prot[0]) { ret = ksmbd_lookup_protocol_idx(req->max_prot); if (ret >= 0) server_conf.max_protocol = ret; } if (server_conf.ipc_timeout) schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout); return 0; } static int handle_startup_event(struct sk_buff *skb, struct genl_info *info) { int ret = 0; #ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; #endif if (!ksmbd_ipc_validate_version(info)) return -EINVAL; if (!info->attrs[KSMBD_EVENT_STARTING_UP]) return -EINVAL; mutex_lock(&startup_lock); if (!ksmbd_server_configurable()) { mutex_unlock(&startup_lock); pr_err("Server reset is in progress, can't start daemon\n"); return -EINVAL; } if (ksmbd_tools_pid) { if (ksmbd_ipc_heartbeat_request() == 0) { ret = -EINVAL; goto out; } pr_err("Reconnect to a new user space daemon\n"); } else { struct ksmbd_startup_request *req; req = nla_data(info->attrs[info->genlhdr->cmd]); ret = ipc_server_config_on_startup(req); if (ret) goto out; server_queue_ctrl_init_work(); } ksmbd_tools_pid = info->snd_portid; ipc_update_last_active(); out: mutex_unlock(&startup_lock); return ret; } static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) { pr_err("Unknown IPC event: %d, ignore.\n", info->genlhdr->cmd); return -EINVAL; } static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) { void *payload; int sz; int type = info->genlhdr->cmd; #ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; #endif if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } if (!ksmbd_ipc_validate_version(info)) return -EINVAL; if (!info->attrs[type]) return -EINVAL; payload = nla_data(info->attrs[info->genlhdr->cmd]); sz = nla_len(info->attrs[info->genlhdr->cmd]); return handle_response(type, payload, sz); } static int ipc_msg_send(struct ksmbd_ipc_msg *msg) { struct genlmsghdr *nlh; struct sk_buff *skb; int ret = -EINVAL; if (!ksmbd_tools_pid) return ret; skb = genlmsg_new(msg->sz, KSMBD_DEFAULT_GFP); if (!skb) return -ENOMEM; nlh = genlmsg_put(skb, 0, 0, &ksmbd_genl_family, 0, msg->type); if (!nlh) goto out; ret = nla_put(skb, msg->type, msg->sz, msg->payload); if (ret) { genlmsg_cancel(skb, nlh); goto out; } genlmsg_end(skb, nlh); ret = genlmsg_unicast(&init_net, skb, ksmbd_tools_pid); if (!ret) ipc_update_last_active(); return ret; out: nlmsg_free(skb); return ret; } static int ipc_validate_msg(struct ipc_msg_table_entry *entry) { unsigned int msg_sz = entry->msg_sz; switch (entry->type) { case KSMBD_EVENT_RPC_REQUEST: { struct ksmbd_rpc_command *resp = entry->response; msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; break; } case KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST: { struct ksmbd_spnego_authen_response *resp = entry->response; msg_sz = sizeof(struct ksmbd_spnego_authen_response) + resp->session_key_len + resp->spnego_blob_len; break; } case KSMBD_EVENT_SHARE_CONFIG_REQUEST: { struct ksmbd_share_config_response *resp = entry->response; if (resp->payload_sz) { if (resp->payload_sz < resp->veto_list_sz) return -EINVAL; msg_sz = sizeof(struct ksmbd_share_config_response) + resp->payload_sz; } break; } case KSMBD_EVENT_LOGIN_REQUEST_EXT: { struct ksmbd_login_response_ext *resp = entry->response; if (resp->ngroups) { msg_sz = sizeof(struct ksmbd_login_response_ext) + resp->ngroups * sizeof(gid_t); } } } return entry->msg_sz != msg_sz ? -EINVAL : 0; } static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; int ret; if ((int)handle < 0) return NULL; entry.type = msg->type; entry.response = NULL; init_waitqueue_head(&entry.wait); down_write(&ipc_msg_table_lock); entry.handle = handle; hash_add(ipc_msg_table, &entry.ipc_table_hlist, entry.handle); up_write(&ipc_msg_table_lock); ret = ipc_msg_send(msg); if (ret) goto out; ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); if (entry.response) { ret = ipc_validate_msg(&entry); if (ret) { kvfree(entry.response); entry.response = NULL; } } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); up_write(&ipc_msg_table_lock); return entry.response; } static int ksmbd_ipc_heartbeat_request(void) { struct ksmbd_ipc_msg *msg; int ret; msg = ipc_msg_alloc(sizeof(struct ksmbd_heartbeat)); if (!msg) return -EINVAL; msg->type = KSMBD_EVENT_HEARTBEAT_REQUEST; ret = ipc_msg_send(msg); ipc_msg_free(msg); return ret; } struct ksmbd_login_response *ksmbd_ipc_login_request(const char *account) { struct ksmbd_ipc_msg *msg; struct ksmbd_login_request *req; struct ksmbd_login_response *resp; if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_login_request)); if (!msg) return NULL; msg->type = KSMBD_EVENT_LOGIN_REQUEST; req = (struct ksmbd_login_request *)msg->payload; req->handle = ksmbd_acquire_id(&ipc_ida); strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_handle_free(req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_login_response_ext *ksmbd_ipc_login_request_ext(const char *account) { struct ksmbd_ipc_msg *msg; struct ksmbd_login_request *req; struct ksmbd_login_response_ext *resp; if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_login_request)); if (!msg) return NULL; msg->type = KSMBD_EVENT_LOGIN_REQUEST_EXT; req = (struct ksmbd_login_request *)msg->payload; req->handle = ksmbd_acquire_id(&ipc_ida); strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_handle_free(req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_spnego_authen_response * ksmbd_ipc_spnego_authen_request(const char *spnego_blob, int blob_len) { struct ksmbd_ipc_msg *msg; struct ksmbd_spnego_authen_request *req; struct ksmbd_spnego_authen_response *resp; if (blob_len > KSMBD_IPC_MAX_PAYLOAD) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_spnego_authen_request) + blob_len + 1); if (!msg) return NULL; msg->type = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST; req = (struct ksmbd_spnego_authen_request *)msg->payload; req->handle = ksmbd_acquire_id(&ipc_ida); req->spnego_blob_len = blob_len; memcpy(req->spnego_blob, spnego_blob, blob_len); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_handle_free(req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_tree_connect_response * ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess, struct ksmbd_share_config *share, struct ksmbd_tree_connect *tree_conn, struct sockaddr *peer_addr) { struct ksmbd_ipc_msg *msg; struct ksmbd_tree_connect_request *req; struct ksmbd_tree_connect_response *resp; if (strlen(user_name(sess->user)) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ) return NULL; if (strlen(share->name) >= KSMBD_REQ_MAX_SHARE_NAME) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_connect_request)); if (!msg) return NULL; msg->type = KSMBD_EVENT_TREE_CONNECT_REQUEST; req = (struct ksmbd_tree_connect_request *)msg->payload; req->handle = ksmbd_acquire_id(&ipc_ida); req->account_flags = sess->user->flags; req->session_id = sess->id; req->connect_id = tree_conn->id; strscpy(req->account, user_name(sess->user), KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); strscpy(req->share, share->name, KSMBD_REQ_MAX_SHARE_NAME); snprintf(req->peer_addr, sizeof(req->peer_addr), "%pIS", peer_addr); if (peer_addr->sa_family == AF_INET6) req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_IPV6; if (test_session_flag(sess, CIFDS_SESSION_FLAG_SMB2)) req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_SMB2; resp = ipc_msg_send_request(msg, req->handle); ipc_msg_handle_free(req->handle); ipc_msg_free(msg); return resp; } int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id, unsigned long long connect_id) { struct ksmbd_ipc_msg *msg; struct ksmbd_tree_disconnect_request *req; int ret; msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_disconnect_request)); if (!msg) return -ENOMEM; msg->type = KSMBD_EVENT_TREE_DISCONNECT_REQUEST; req = (struct ksmbd_tree_disconnect_request *)msg->payload; req->session_id = session_id; req->connect_id = connect_id; ret = ipc_msg_send(msg); ipc_msg_free(msg); return ret; } int ksmbd_ipc_logout_request(const char *account, int flags) { struct ksmbd_ipc_msg *msg; struct ksmbd_logout_request *req; int ret; if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ) return -EINVAL; msg = ipc_msg_alloc(sizeof(struct ksmbd_logout_request)); if (!msg) return -ENOMEM; msg->type = KSMBD_EVENT_LOGOUT_REQUEST; req = (struct ksmbd_logout_request *)msg->payload; req->account_flags = flags; strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); ret = ipc_msg_send(msg); ipc_msg_free(msg); return ret; } struct ksmbd_share_config_response * ksmbd_ipc_share_config_request(const char *name) { struct ksmbd_ipc_msg *msg; struct ksmbd_share_config_request *req; struct ksmbd_share_config_response *resp; if (strlen(name) >= KSMBD_REQ_MAX_SHARE_NAME) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_share_config_request)); if (!msg) return NULL; msg->type = KSMBD_EVENT_SHARE_CONFIG_REQUEST; req = (struct ksmbd_share_config_request *)msg->payload; req->handle = ksmbd_acquire_id(&ipc_ida); strscpy(req->share_name, name, KSMBD_REQ_MAX_SHARE_NAME); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_handle_free(req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_rpc_command *ksmbd_rpc_open(struct ksmbd_session *sess, int handle) { struct ksmbd_ipc_msg *msg; struct ksmbd_rpc_command *req; struct ksmbd_rpc_command *resp; msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command)); if (!msg) return NULL; msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; req->flags = ksmbd_session_rpc_method(sess, handle); req->flags |= KSMBD_RPC_OPEN_METHOD; req->payload_sz = 0; resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_rpc_command *ksmbd_rpc_close(struct ksmbd_session *sess, int handle) { struct ksmbd_ipc_msg *msg; struct ksmbd_rpc_command *req; struct ksmbd_rpc_command *resp; msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command)); if (!msg) return NULL; msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; req->flags = ksmbd_session_rpc_method(sess, handle); req->flags |= KSMBD_RPC_CLOSE_METHOD; req->payload_sz = 0; resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle, void *payload, size_t payload_sz) { struct ksmbd_ipc_msg *msg; struct ksmbd_rpc_command *req; struct ksmbd_rpc_command *resp; if (payload_sz > KSMBD_IPC_MAX_PAYLOAD) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1); if (!msg) return NULL; lockdep_assert_not_held(&sess->rpc_lock); down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; req->flags = ksmbd_session_rpc_method(sess, handle); req->flags |= rpc_context_flags(sess); req->flags |= KSMBD_RPC_WRITE_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle) { struct ksmbd_ipc_msg *msg; struct ksmbd_rpc_command *req; struct ksmbd_rpc_command *resp; msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command)); if (!msg) return NULL; lockdep_assert_not_held(&sess->rpc_lock); down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; req->flags = ksmbd_session_rpc_method(sess, handle); req->flags |= rpc_context_flags(sess); req->flags |= KSMBD_RPC_READ_METHOD; req->payload_sz = 0; up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); return resp; } struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle, void *payload, size_t payload_sz) { struct ksmbd_ipc_msg *msg; struct ksmbd_rpc_command *req; struct ksmbd_rpc_command *resp; if (payload_sz > KSMBD_IPC_MAX_PAYLOAD) return NULL; msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1); if (!msg) return NULL; lockdep_assert_not_held(&sess->rpc_lock); down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; req->flags = ksmbd_session_rpc_method(sess, handle); req->flags |= rpc_context_flags(sess); req->flags |= KSMBD_RPC_IOCTL_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); return resp; } static int __ipc_heartbeat(void) { unsigned long delta; if (!ksmbd_server_running()) return 0; if (time_after(jiffies, server_conf.ipc_last_active)) { delta = (jiffies - server_conf.ipc_last_active); } else { ipc_update_last_active(); schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout); return 0; } if (delta < server_conf.ipc_timeout) { schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout - delta); return 0; } if (ksmbd_ipc_heartbeat_request() == 0) { schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout); return 0; } mutex_lock(&startup_lock); WRITE_ONCE(server_conf.state, SERVER_STATE_RESETTING); server_conf.ipc_last_active = 0; ksmbd_tools_pid = 0; pr_err("No IPC daemon response for %lus\n", delta / HZ); mutex_unlock(&startup_lock); return -EINVAL; } static void ipc_timer_heartbeat(struct work_struct *w) { if (__ipc_heartbeat()) server_queue_ctrl_reset_work(); } int ksmbd_ipc_id_alloc(void) { return ksmbd_acquire_id(&ipc_ida); } void ksmbd_rpc_id_free(int handle) { ksmbd_release_id(&ipc_ida, handle); } void ksmbd_ipc_release(void) { cancel_delayed_work_sync(&ipc_timer_work); genl_unregister_family(&ksmbd_genl_family); } void ksmbd_ipc_soft_reset(void) { mutex_lock(&startup_lock); ksmbd_tools_pid = 0; cancel_delayed_work_sync(&ipc_timer_work); mutex_unlock(&startup_lock); } int ksmbd_ipc_init(void) { int ret = 0; ksmbd_nl_init_fixup(); INIT_DELAYED_WORK(&ipc_timer_work, ipc_timer_heartbeat); ret = genl_register_family(&ksmbd_genl_family); if (ret) { pr_err("Failed to register KSMBD netlink interface %d\n", ret); cancel_delayed_work_sync(&ipc_timer_work); } return ret; }
323 355 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ #include <linux/static_key.h> #include <linux/objtool.h> #include <linux/linkage.h> #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/msr-index.h> #include <asm/unwind_hints.h> #include <asm/percpu.h> /* * Call depth tracking for Intel SKL CPUs to address the RSB underflow * issue in software. * * The tracking does not use a counter. It uses uses arithmetic shift * right on call entry and logical shift left on return. * * The depth tracking variable is initialized to 0x8000.... when the call * depth is zero. The arithmetic shift right sign extends the MSB and * saturates after the 12th call. The shift count is 5 for both directions * so the tracking covers 12 nested calls. * * Call * 0: 0x8000000000000000 0x0000000000000000 * 1: 0xfc00000000000000 0xf000000000000000 * ... * 11: 0xfffffffffffffff8 0xfffffffffffffc00 * 12: 0xffffffffffffffff 0xffffffffffffffe0 * * After a return buffer fill the depth is credited 12 calls before the * next stuffing has to take place. * * There is a inaccuracy for situations like this: * * 10 calls * 5 returns * 3 calls * 4 returns * 3 calls * .... * * The shift count might cause this to be off by one in either direction, * but there is still a cushion vs. the RSB depth. The algorithm does not * claim to be perfect and it can be speculated around by the CPU, but it * is considered that it obfuscates the problem enough to make exploitation * extremely difficult. */ #define RET_DEPTH_SHIFT 5 #define RSB_RET_STUFF_LOOPS 16 #define RET_DEPTH_INIT 0x8000000000000000ULL #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL #define RET_DEPTH_CREDIT 0xffffffffffffffffULL #ifdef CONFIG_CALL_THUNKS_DEBUG # define CALL_THUNKS_DEBUG_INC_CALLS \ incq PER_CPU_VAR(__x86_call_count); # define CALL_THUNKS_DEBUG_INC_RETS \ incq PER_CPU_VAR(__x86_ret_count); # define CALL_THUNKS_DEBUG_INC_STUFFS \ incq PER_CPU_VAR(__x86_stuffs_count); # define CALL_THUNKS_DEBUG_INC_CTXSW \ incq PER_CPU_VAR(__x86_ctxsw_count); #else # define CALL_THUNKS_DEBUG_INC_CALLS # define CALL_THUNKS_DEBUG_INC_RETS # define CALL_THUNKS_DEBUG_INC_STUFFS # define CALL_THUNKS_DEBUG_INC_CTXSW #endif #if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) #include <asm/asm-offsets.h> #define CREDIT_CALL_DEPTH \ movq $-1, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH \ xor %eax, %eax; \ bts $63, %rax; \ movq %rax, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH_FROM_CALL \ movb $0xfc, %al; \ shl $56, %rax; \ movq %rax, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ sarq $5, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #else #define CREDIT_CALL_DEPTH #define RESET_CALL_DEPTH #define RESET_CALL_DEPTH_FROM_CALL #define INCREMENT_CALL_DEPTH #endif /* * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to * eliminate potentially bogus entries from the RSB, and sometimes * purely to ensure that it doesn't get empty, which on some CPUs would * allow predictions from other (unwanted!) sources to be used. * * We define a CPP macro such that it can be used from both .S files and * inline assembly. It's possible to do a .macro and then include that * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. */ #define RETPOLINE_THUNK_SIZE 32 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. */ #define __FILL_RETURN_SLOT \ ANNOTATE_INTRA_FUNCTION_CALL; \ call 772f; \ int3; \ 772: /* * Stuff the entire RSB. * * Google experimented with loop-unrolling and this turned out to be * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ #ifdef CONFIG_X86_64 #define __FILL_RETURN_BUFFER(reg, nr) \ mov $(nr/2), reg; \ 771: \ __FILL_RETURN_SLOT \ __FILL_RETURN_SLOT \ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ dec reg; \ jnz 771b; \ /* barrier for jnz misprediction */ \ lfence; \ CREDIT_CALL_DEPTH \ CALL_THUNKS_DEBUG_INC_CTXSW #else /* * i386 doesn't unconditionally have LFENCE, as such it can't * do a loop. */ #define __FILL_RETURN_BUFFER(reg, nr) \ .rept nr; \ __FILL_RETURN_SLOT; \ .endr; \ add $(BITS_PER_LONG/8) * nr, %_ASM_SP; #endif /* * Stuff a single RSB slot. * * To mitigate Post-Barrier RSB speculation, one CALL instruction must be * forced to retire before letting a RET instruction execute. * * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed * before this point. */ #define __FILL_ONE_RETURN \ __FILL_RETURN_SLOT \ add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; #ifdef __ASSEMBLER__ /* * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions * vs RETBleed validation. */ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE /* * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should * eventually turn into its own annotation. */ .macro VALIDATE_UNRET_END #if defined(CONFIG_NOINSTR_VALIDATION) && \ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif .endm /* * Emits a conditional CS prefix that is compatible with * -mindirect-branch-cs-prefix. */ .macro __CS_PREFIX reg:req .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 .ifc \reg,\rs .byte 0x2e .endif .endr .endm /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. * * NOTE: these do not take kCFI into account and are thus not comparable to C * indirect calls, take care when using. The target of these should be an ENDBR * instruction irrespective of kCFI. */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_MITIGATION_RETPOLINE __CS_PREFIX \reg jmp __x86_indirect_thunk_\reg #else jmp *%\reg int3 #endif .endm .macro CALL_NOSPEC reg:req #ifdef CONFIG_MITIGATION_RETPOLINE __CS_PREFIX \reg call __x86_indirect_thunk_\reg #else call *%\reg #endif .endm /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2 .Lskip_rsb_\@: .endm /* * The CALL to srso_alias_untrain_ret() must be patched in directly at * the spot where untraining must be done, ie., srso_alias_untrain_ret() * must be the target of a CALL instruction instead of indirectly * jumping to a wrapper which then calls it. Therefore, this macro is * called outside of __UNTRAIN_RET below, for the time being, before the * kernel can support nested alternatives with arbitrary nesting. */ .macro CALL_UNTRAIN_RET #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif .endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * write_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. */ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END CALL_UNTRAIN_RET ALTERNATIVE_2 "", \ "call write_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif .endm #define UNTRAIN_RET \ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) #define UNTRAIN_RET_VM \ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) #define UNTRAIN_RET_FROM_CALL \ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) .macro CALL_DEPTH_ACCOUNT #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING ALTERNATIVE "", \ __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm /* * Macro to execute VERW insns that mitigate transient data sampling * attacks such as MDS or TSA. On affected systems a microcode update * overloaded VERW insns to also clear the CPU buffers. VERW clobbers * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ .macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm #define CLEAR_CPU_BUFFERS \ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF #define VM_CLEAR_CPU_BUFFERS \ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP .endm .macro CLEAR_BRANCH_HISTORY_VMEXIT ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT .endm #else #define CLEAR_BRANCH_HISTORY #define CLEAR_BRANCH_HISTORY_VMEXIT #endif #else /* __ASSEMBLER__ */ #define ITS_THUNK_SIZE 64 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; typedef u8 its_thunk_t[ITS_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; extern its_thunk_t __x86_indirect_its_thunk_array[]; #ifdef CONFIG_MITIGATION_RETHUNK extern void __x86_return_thunk(void); #else static inline void __x86_return_thunk(void) {} #endif #ifdef CONFIG_MITIGATION_UNRET_ENTRY extern void retbleed_return_thunk(void); #else static inline void retbleed_return_thunk(void) {} #endif extern void srso_alias_untrain_ret(void); #ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); #else static inline void srso_return_thunk(void) {} static inline void srso_alias_return_thunk(void) {} #endif #ifdef CONFIG_MITIGATION_ITS extern void its_return_thunk(void); #else static inline void its_return_thunk(void) {} #endif extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void write_ibpb(void); #ifdef CONFIG_X86_64 extern void clear_bhb_loop(void); #endif extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING extern void call_depth_return_thunk(void); #define CALL_DEPTH_ACCOUNT \ ALTERNATIVE("", \ __stringify(INCREMENT_CALL_DEPTH), \ X86_FEATURE_CALL_DEPTH) DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); #ifdef CONFIG_CALL_THUNKS_DEBUG DECLARE_PER_CPU(u64, __x86_call_count); DECLARE_PER_CPU(u64, __x86_ret_count); DECLARE_PER_CPU(u64, __x86_stuffs_count); DECLARE_PER_CPU(u64, __x86_ctxsw_count); #endif #else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ static inline void call_depth_return_thunk(void) {} #define CALL_DEPTH_ACCOUNT "" #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ #ifdef CONFIG_MITIGATION_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include <asm/GEN-for-each-reg.h> #undef GEN #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; #include <asm/GEN-for-each-reg.h> #undef GEN #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; #include <asm/GEN-for-each-reg.h> #undef GEN #ifdef CONFIG_X86_64 /* * Emits a conditional CS prefix that is compatible with * -mindirect-branch-cs-prefix. */ #define __CS_PREFIX(reg) \ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\rs," reg "\n" \ ".byte 0x2e\n" \ ".endif\n" \ ".endr\n" /* * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ #define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ "call __x86_indirect_thunk_%V[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET * here, anyway. */ # define CALL_NOSPEC \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ "904: call 901b;\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, SPECTRE_V2_RETPOLINE, SPECTRE_V2_LFENCE, SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_STRICT_PREFERRED, SPECTRE_V2_USER_PRCTL, SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_AUTO, SPEC_STORE_BYPASS_DISABLE, SPEC_STORE_BYPASS_PRCTL, SPEC_STORE_BYPASS_SECCOMP, }; static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)), [feature] "i" (feature) : "memory"); } DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); static inline void indirect_branch_prediction_barrier(void) { asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) : ASM_CALL_CONSTRAINT :: "rax", "rcx", "rdx", "memory"); } /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. * * (Implemented as CPP macros due to header hell.) */ #define firmware_restrict_branch_speculation_start() \ do { \ preempt_disable(); \ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); extern u16 x86_verw_sel; #include <asm/segment.h> /** * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; /* * Has to be the memory-operand variant because only that * guarantees the CPU buffer flush functionality according to * documentation. The register-operand variant does not. * Works with any segment selector, but a valid writable * data segment is the fastest variant. * * "cc" clobber is required because VERW modifies ZF. */ asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } /** * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ static __always_inline void x86_idle_clear_cpu_buffers(void) { if (static_branch_likely(&cpu_buf_idle_clear)) x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 #ifndef LLC_CONN_H #define LLC_CONN_H /* * Copyright (c) 1997 by Procom Technology, Inc. * 2001, 2002 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/timer.h> #include <net/llc_if.h> #include <net/sock.h> #include <linux/llc.h> #define LLC_EVENT 1 #define LLC_PACKET 2 #define LLC2_P_TIME 2 #define LLC2_ACK_TIME 1 #define LLC2_REJ_TIME 3 #define LLC2_BUSY_TIME 3 struct llc_timer { struct timer_list timer; unsigned long expire; /* timer expire time */ }; struct llc_sock { /* struct sock must be the first member of llc_sock */ struct sock sk; struct sockaddr_llc addr; /* address sock is bound to */ u8 state; /* state of connection */ struct llc_sap *sap; /* pointer to parent SAP */ struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; u8 first_pdu_Ns; u8 npta; struct llc_timer ack_timer; struct llc_timer pf_cycle_timer; struct llc_timer rej_sent_timer; struct llc_timer busy_state_timer; /* ind busy clr at remote LLC */ u8 vS; /* seq# next in-seq I-PDU tx'd*/ u8 vR; /* seq# next in-seq I-PDU rx'd*/ u32 n2; /* max nbr re-tx's for timeout*/ u32 n1; /* max nbr octets in I PDU */ u8 k; /* tx window size; max = 127 */ u8 rw; /* rx window size; max = 127 */ u8 p_flag; /* state flags */ u8 f_flag; u8 s_flag; u8 data_flag; u8 remote_busy_flag; u8 cause_flag; struct sk_buff_head pdu_unack_q; /* PUDs sent/waiting ack */ u16 link; /* network layer link number */ u8 X; /* a temporary variable */ u8 ack_pf; /* this flag indicates what is the P-bit of acknowledge */ u8 failed_data_req; /* recognize that already exist a failed llc_data_req_handler (tx_buffer_full or unacceptable state */ u8 dec_step; u8 inc_cntr; u8 dec_cntr; u8 connect_step; u8 last_nr; /* NR of last pdu received */ u32 rx_pdu_hdr; /* used for saving header of last pdu received and caused sending FRMR. Used for resending FRMR */ u32 cmsg_flags; struct hlist_node dev_hash_node; }; static inline struct llc_sock *llc_sk(const struct sock *sk) { return (struct llc_sock *)sk; } static __inline__ void llc_set_backlog_type(struct sk_buff *skb, char type) { skb->cb[sizeof(skb->cb) - 1] = type; } static __inline__ char llc_backlog_type(struct sk_buff *skb) { return skb->cb[sizeof(skb->cb) - 1]; } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked); struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, struct llc_addr *laddr, const struct net *net); void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk); void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); u8 llc_data_accept_state(u8 state); void llc_build_offset_table(void); #endif /* LLC_CONN_H */
8 8 8 8 8 8 8 8 8 8 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar * * This file contains the /proc/irq/ handling code. */ #include <linux/irq.h> #include <linux/gfp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/mutex.h> #include "internals.h" /* * Access rules: * * procfs protects read/write of /proc/irq/N/ files against a * concurrent free of the interrupt descriptor. remove_proc_entry() * immediately prevents new read/writes to happen and waits for * already running read/write functions to complete. * * We remove the proc entries first and then delete the interrupt * descriptor from the radix tree and free it. So it is guaranteed * that irq_to_desc(N) is valid as long as the read/writes are * permitted by procfs. * * The read from /proc/interrupts is a different problem because there * is no protection. So the lookup and the access to irqdesc * information must be protected by sparse_irq_lock. */ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP enum { AFFINITY, AFFINITY_LIST, EFFECTIVE, EFFECTIVE_LIST, }; static int show_irq_affinity(int type, struct seq_file *m) { struct irq_desc *desc = irq_to_desc((long)m->private); const struct cpumask *mask; switch (type) { case AFFINITY: case AFFINITY_LIST: mask = desc->irq_common_data.affinity; if (irq_move_pending(&desc->irq_data)) mask = irq_desc_get_pending_mask(desc); break; case EFFECTIVE: case EFFECTIVE_LIST: #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK mask = irq_data_get_effective_affinity_mask(&desc->irq_data); break; #endif default: return -EINVAL; } switch (type) { case AFFINITY_LIST: case EFFECTIVE_LIST: seq_printf(m, "%*pbl\n", cpumask_pr_args(mask)); break; case AFFINITY: case EFFECTIVE: seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); break; } return 0; } static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); cpumask_var_t mask; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; scoped_guard(raw_spinlock_irq, &desc->lock) { if (desc->affinity_hint) cpumask_copy(mask, desc->affinity_hint); } seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); return 0; } int no_irq_affinity; static int irq_affinity_proc_show(struct seq_file *m, void *v) { return show_irq_affinity(AFFINITY, m); } static int irq_affinity_list_proc_show(struct seq_file *m, void *v) { return show_irq_affinity(AFFINITY_LIST, m); } #ifndef CONFIG_AUTO_IRQ_AFFINITY static inline int irq_select_affinity_usr(unsigned int irq) { /* * If the interrupt is started up already then this fails. The * interrupt is assigned to an online CPU already. There is no * point to move it around randomly. Tell user space that the * selected mask is bogus. * * If not then any change to the affinity is pointless because the * startup code invokes irq_setup_affinity() which will select * a online CPU anyway. */ return -EINVAL; } #else /* ALPHA magic affinity auto selector. Keep it for historical reasons. */ static inline int irq_select_affinity_usr(unsigned int irq) { return irq_select_affinity(irq); } #endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) { unsigned int irq = (int)(long)pde_data(file_inode(file)); cpumask_var_t new_value; int err; if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) return -EPERM; if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; if (type) err = cpumask_parselist_user(buffer, count, new_value); else err = cpumask_parse_user(buffer, count, new_value); if (err) goto free_cpumask; /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ if (!cpumask_intersects(new_value, cpu_online_mask)) { /* * Special case for empty set - allow the architecture code * to set default SMP affinity. */ err = irq_select_affinity_usr(irq) ? -EINVAL : count; } else { err = irq_set_affinity(irq, new_value); if (!err) err = count; } free_cpumask: free_cpumask_var(new_value); return err; } static ssize_t irq_affinity_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { return write_irq_affinity(0, file, buffer, count, pos); } static ssize_t irq_affinity_list_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { return write_irq_affinity(1, file, buffer, count, pos); } static int irq_affinity_proc_open(struct inode *inode, struct file *file) { return single_open(file, irq_affinity_proc_show, pde_data(inode)); } static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) { return single_open(file, irq_affinity_list_proc_show, pde_data(inode)); } static const struct proc_ops irq_affinity_proc_ops = { .proc_open = irq_affinity_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_write = irq_affinity_proc_write, }; static const struct proc_ops irq_affinity_list_proc_ops = { .proc_open = irq_affinity_list_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_write = irq_affinity_list_proc_write, }; #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK static int irq_effective_aff_proc_show(struct seq_file *m, void *v) { return show_irq_affinity(EFFECTIVE, m); } static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v) { return show_irq_affinity(EFFECTIVE_LIST, m); } #endif static int default_affinity_show(struct seq_file *m, void *v) { seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity)); return 0; } static ssize_t default_affinity_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { cpumask_var_t new_value; int err; if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); if (err) goto out; /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ if (!cpumask_intersects(new_value, cpu_online_mask)) { err = -EINVAL; goto out; } cpumask_copy(irq_default_affinity, new_value); err = count; out: free_cpumask_var(new_value); return err; } static int default_affinity_open(struct inode *inode, struct file *file) { return single_open(file, default_affinity_show, pde_data(inode)); } static const struct proc_ops default_affinity_proc_ops = { .proc_open = default_affinity_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_write = default_affinity_write, }; static int irq_node_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long) m->private); seq_printf(m, "%d\n", irq_desc_get_node(desc)); return 0; } #endif static int irq_spurious_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long) m->private); seq_printf(m, "count %u\n" "unhandled %u\n" "last_unhandled %u ms\n", desc->irq_count, desc->irqs_unhandled, jiffies_to_msecs(desc->last_unhandled)); return 0; } #define MAX_NAMELEN 128 static bool name_unique(unsigned int irq, struct irqaction *new_action) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; guard(raw_spinlock_irq)(&desc->lock); for_each_action_of_desc(desc, action) { if ((action != new_action) && action->name && !strcmp(new_action->name, action->name)) return false; } return true; } void register_handler_proc(unsigned int irq, struct irqaction *action) { char name[MAX_NAMELEN]; struct irq_desc *desc = irq_to_desc(irq); if (!desc->dir || action->dir || !action->name || !name_unique(irq, action)) return; snprintf(name, MAX_NAMELEN, "%s", action->name); /* create /proc/irq/1234/handler/ */ action->dir = proc_mkdir(name, desc->dir); } #undef MAX_NAMELEN #define MAX_NAMELEN 10 void register_irq_proc(unsigned int irq, struct irq_desc *desc) { static DEFINE_MUTEX(register_lock); void __maybe_unused *irqp = (void *)(unsigned long) irq; char name [MAX_NAMELEN]; if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; /* * irq directories are registered only when a handler is * added, not when the descriptor is created, so multiple * tasks might try to register at the same time. */ guard(mutex)(&register_lock); if (desc->dir) return; /* create /proc/irq/1234 */ sprintf(name, "%u", irq); desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) return; #ifdef CONFIG_SMP umode_t umode = S_IRUGO; if (irq_can_set_affinity_usr(desc->irq_data.irq)) umode |= S_IWUSR; /* create /proc/irq/<irq>/smp_affinity */ proc_create_data("smp_affinity", umode, desc->dir, &irq_affinity_proc_ops, irqp); /* create /proc/irq/<irq>/affinity_hint */ proc_create_single_data("affinity_hint", 0444, desc->dir, irq_affinity_hint_proc_show, irqp); /* create /proc/irq/<irq>/smp_affinity_list */ proc_create_data("smp_affinity_list", umode, desc->dir, &irq_affinity_list_proc_ops, irqp); proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, irqp); # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK proc_create_single_data("effective_affinity", 0444, desc->dir, irq_effective_aff_proc_show, irqp); proc_create_single_data("effective_affinity_list", 0444, desc->dir, irq_effective_aff_list_proc_show, irqp); # endif #endif proc_create_single_data("spurious", 0444, desc->dir, irq_spurious_proc_show, (void *)(long)irq); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { char name [MAX_NAMELEN]; if (!root_irq_dir || !desc->dir) return; #ifdef CONFIG_SMP remove_proc_entry("smp_affinity", desc->dir); remove_proc_entry("affinity_hint", desc->dir); remove_proc_entry("smp_affinity_list", desc->dir); remove_proc_entry("node", desc->dir); # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK remove_proc_entry("effective_affinity", desc->dir); remove_proc_entry("effective_affinity_list", desc->dir); # endif #endif remove_proc_entry("spurious", desc->dir); sprintf(name, "%u", irq); remove_proc_entry(name, root_irq_dir); } #undef MAX_NAMELEN void unregister_handler_proc(unsigned int irq, struct irqaction *action) { proc_remove(action->dir); } static void register_default_affinity_proc(void) { #ifdef CONFIG_SMP proc_create("irq/default_smp_affinity", 0644, NULL, &default_affinity_proc_ops); #endif } void init_irq_proc(void) { unsigned int irq; struct irq_desc *desc; /* create /proc/irq */ root_irq_dir = proc_mkdir("irq", NULL); if (!root_irq_dir) return; register_default_affinity_proc(); /* * Create entries for all existing IRQs. */ for_each_irq_desc(irq, desc) register_irq_proc(irq, desc); } #ifdef CONFIG_GENERIC_IRQ_SHOW int __weak arch_show_interrupts(struct seq_file *p, int prec) { return 0; } #ifndef ACTUAL_NR_IRQS # define ACTUAL_NR_IRQS irq_get_nr_irqs() #endif int show_interrupts(struct seq_file *p, void *v) { const unsigned int nr_irqs = irq_get_nr_irqs(); static int prec; int i = *(loff_t *) v, j; struct irqaction *action; struct irq_desc *desc; if (i > ACTUAL_NR_IRQS) return 0; if (i == ACTUAL_NR_IRQS) return arch_show_interrupts(p, prec); /* print header and calculate the width of the first column */ if (i == 0) { for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) j *= 10; seq_printf(p, "%*s", prec + 8, ""); for_each_online_cpu(j) seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); } guard(rcu)(); desc = irq_to_desc(i); if (!desc || irq_settings_is_hidden(desc)) return 0; if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs) return 0; seq_printf(p, "%*d:", prec, i); for_each_online_cpu(j) { unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0; seq_put_decimal_ull_width(p, " ", cnt, 10); } seq_putc(p, ' '); guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); else if (desc->irq_data.chip->name) seq_printf(p, "%8s", desc->irq_data.chip->name); else seq_printf(p, "%8s", "-"); } else { seq_printf(p, "%8s", "None"); } if (desc->irq_data.domain) seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); else seq_printf(p, " %*s", prec, ""); #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); #endif if (desc->name) seq_printf(p, "-%-8s", desc->name); action = desc->action; if (action) { seq_printf(p, " %s", action->name); while ((action = action->next) != NULL) seq_printf(p, ", %s", action->name); } seq_putc(p, '\n'); return 0; } #endif
5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 // SPDX-License-Identifier: GPL-2.0-or-later /* * libata-eh.c - libata error handling * * Copyright 2006 Tejun Heo <htejun@gmail.com> * * libata documentation is available via 'make {ps|pdf}docs', * as Documentation/driver-api/libata.rst * * Hardware documentation available from http://www.t13.org/ and * http://www.sata-io.org/ */ #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/export.h> #include <linux/pci.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_device.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include "../scsi/scsi_transport_api.h" #include <linux/libata.h> #include <trace/events/libata.h> #include "libata.h" enum { /* speed down verdicts */ ATA_EH_SPDN_NCQ_OFF = (1 << 0), ATA_EH_SPDN_SPEED_DOWN = (1 << 1), ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), /* error flags */ ATA_EFLAG_IS_IO = (1 << 0), ATA_EFLAG_DUBIOUS_XFER = (1 << 1), ATA_EFLAG_OLD_ER = (1 << 31), /* error categories */ ATA_ECAT_NONE = 0, ATA_ECAT_ATA_BUS = 1, ATA_ECAT_TOUT_HSM = 2, ATA_ECAT_UNK_DEV = 3, ATA_ECAT_DUBIOUS_NONE = 4, ATA_ECAT_DUBIOUS_ATA_BUS = 5, ATA_ECAT_DUBIOUS_TOUT_HSM = 6, ATA_ECAT_DUBIOUS_UNK_DEV = 7, ATA_ECAT_NR = 8, ATA_EH_CMD_DFL_TIMEOUT = 5000, /* always put at least this amount of time between resets */ ATA_EH_RESET_COOL_DOWN = 5000, /* Waiting in ->prereset can never be reliable. It's * sometimes nice to wait there but it can't be depended upon; * otherwise, we wouldn't be resetting. Just give it enough * time for most drives to spin up. */ ATA_EH_PRERESET_TIMEOUT = 10000, ATA_EH_FASTDRAIN_INTERVAL = 3000, ATA_EH_UA_TRIES = 5, /* probe speed down parameters, see ata_eh_schedule_probe() */ ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ ATA_EH_PROBE_TRIALS = 2, }; /* The following table determines how we sequence resets. Each entry * represents timeout for that try. The first try can be soft or * hardreset. All others are hardreset if available. In most cases * the first reset w/ 10sec timeout should succeed. Following entries * are mostly for error handling, hotplug and those outlier devices that * take an exceptionally long time to recover from reset. */ static const unsigned int ata_eh_reset_timeouts[] = { 10000, /* most drives spin up by 10sec */ 10000, /* > 99% working drives spin up before 20sec */ 35000, /* give > 30 secs of idleness for outlier devices */ 5000, /* and sweet one last chance */ UINT_MAX, /* > 1 min has elapsed, give up */ }; static const unsigned int ata_eh_identify_timeouts[] = { 5000, /* covers > 99% of successes and not too boring on failures */ 10000, /* combined time till here is enough even for media access */ 30000, /* for true idiots */ UINT_MAX, }; static const unsigned int ata_eh_revalidate_timeouts[] = { 15000, /* Some drives are slow to read log pages when waking-up */ 15000, /* combined time till here is enough even for media access */ UINT_MAX, }; static const unsigned int ata_eh_flush_timeouts[] = { 15000, /* be generous with flush */ 15000, /* ditto */ 30000, /* and even more generous */ UINT_MAX, }; static const unsigned int ata_eh_other_timeouts[] = { 5000, /* same rationale as identify timeout */ 10000, /* ditto */ /* but no merciful 30sec for other commands, it just isn't worth it */ UINT_MAX, }; struct ata_eh_cmd_timeout_ent { const u8 *commands; const unsigned int *timeouts; }; /* The following table determines timeouts to use for EH internal * commands. Each table entry is a command class and matches the * commands the entry applies to and the timeout table to use. * * On the retry after a command timed out, the next timeout value from * the table is used. If the table doesn't contain further entries, * the last value is used. * * ehc->cmd_timeout_idx keeps track of which timeout to use per * command class, so if SET_FEATURES times out on the first try, the * next try will use the second timeout value only for that class. */ #define CMDS(cmds...) (const u8 []){ cmds, 0 } static const struct ata_eh_cmd_timeout_ent ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), .timeouts = ata_eh_identify_timeouts, }, { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), .timeouts = ata_eh_revalidate_timeouts, }, { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_FEATURES), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), .timeouts = ata_eh_flush_timeouts }, { .commands = CMDS(ATA_CMD_VERIFY), .timeouts = ata_eh_reset_timeouts }, }; #undef CMDS static void __ata_port_freeze(struct ata_port *ap); #ifdef CONFIG_PM static void ata_eh_handle_port_suspend(struct ata_port *ap); static void ata_eh_handle_port_resume(struct ata_port *ap); #else /* CONFIG_PM */ static void ata_eh_handle_port_suspend(struct ata_port *ap) { } static void ata_eh_handle_port_resume(struct ata_port *ap) { } #endif /* CONFIG_PM */ static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, va_list args) { ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, ATA_EH_DESC_LEN - ehi->desc_len, fmt, args); } /** * __ata_ehi_push_desc - push error description without adding separator * @ehi: target EHI * @fmt: printf format string * * Format string according to @fmt and append it to @ehi->desc. * * LOCKING: * spin_lock_irqsave(host lock) */ void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) { va_list args; va_start(args, fmt); __ata_ehi_pushv_desc(ehi, fmt, args); va_end(args); } EXPORT_SYMBOL_GPL(__ata_ehi_push_desc); /** * ata_ehi_push_desc - push error description with separator * @ehi: target EHI * @fmt: printf format string * * Format string according to @fmt and append it to @ehi->desc. * If @ehi->desc is not empty, ", " is added in-between. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) { va_list args; if (ehi->desc_len) __ata_ehi_push_desc(ehi, ", "); va_start(args, fmt); __ata_ehi_pushv_desc(ehi, fmt, args); va_end(args); } EXPORT_SYMBOL_GPL(ata_ehi_push_desc); /** * ata_ehi_clear_desc - clean error description * @ehi: target EHI * * Clear @ehi->desc. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_ehi_clear_desc(struct ata_eh_info *ehi) { ehi->desc[0] = '\0'; ehi->desc_len = 0; } EXPORT_SYMBOL_GPL(ata_ehi_clear_desc); /** * ata_port_desc - append port description * @ap: target ATA port * @fmt: printf format string * * Format string according to @fmt and append it to port * description. If port description is not empty, " " is added * in-between. This function is to be used while initializing * ata_host. The description is printed on host registration. * * LOCKING: * None. */ void ata_port_desc(struct ata_port *ap, const char *fmt, ...) { va_list args; WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); if (ap->link.eh_info.desc_len) __ata_ehi_push_desc(&ap->link.eh_info, " "); va_start(args, fmt); __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); va_end(args); } EXPORT_SYMBOL_GPL(ata_port_desc); #ifdef CONFIG_PCI /** * ata_port_pbar_desc - append PCI BAR description * @ap: target ATA port * @bar: target PCI BAR * @offset: offset into PCI BAR * @name: name of the area * * If @offset is negative, this function formats a string which * contains the name, address, size and type of the BAR and * appends it to the port description. If @offset is zero or * positive, only name and offsetted address is appended. * * LOCKING: * None. */ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); char *type = ""; unsigned long long start, len; if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) type = "m"; else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) type = "i"; start = (unsigned long long)pci_resource_start(pdev, bar); len = (unsigned long long)pci_resource_len(pdev, bar); if (offset < 0) ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); else ata_port_desc(ap, "%s 0x%llx", name, start + (unsigned long long)offset); } EXPORT_SYMBOL_GPL(ata_port_pbar_desc); #endif /* CONFIG_PCI */ static int ata_lookup_timeout_table(u8 cmd) { int i; for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { const u8 *cur; for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) if (*cur == cmd) return i; } return -1; } /** * ata_internal_cmd_timeout - determine timeout for an internal command * @dev: target device * @cmd: internal command to be issued * * Determine timeout for internal command @cmd for @dev. * * LOCKING: * EH context. * * RETURNS: * Determined timeout. */ unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) { struct ata_eh_context *ehc = &dev->link->eh_context; int ent = ata_lookup_timeout_table(cmd); int idx; if (ent < 0) return ATA_EH_CMD_DFL_TIMEOUT; idx = ehc->cmd_timeout_idx[dev->devno][ent]; return ata_eh_cmd_timeout_table[ent].timeouts[idx]; } /** * ata_internal_cmd_timed_out - notification for internal command timeout * @dev: target device * @cmd: internal command which timed out * * Notify EH that internal command @cmd for @dev timed out. This * function should be called only for commands whose timeouts are * determined using ata_internal_cmd_timeout(). * * LOCKING: * EH context. */ void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) { struct ata_eh_context *ehc = &dev->link->eh_context; int ent = ata_lookup_timeout_table(cmd); int idx; if (ent < 0) return; idx = ehc->cmd_timeout_idx[dev->devno][ent]; if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX) ehc->cmd_timeout_idx[dev->devno][ent]++; } static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, unsigned int err_mask) { struct ata_ering_entry *ent; WARN_ON(!err_mask); ering->cursor++; ering->cursor %= ATA_ERING_SIZE; ent = &ering->ring[ering->cursor]; ent->eflags = eflags; ent->err_mask = err_mask; ent->timestamp = get_jiffies_64(); } static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) { struct ata_ering_entry *ent = &ering->ring[ering->cursor]; if (ent->err_mask) return ent; return NULL; } int ata_ering_map(struct ata_ering *ering, int (*map_fn)(struct ata_ering_entry *, void *), void *arg) { int idx, rc = 0; struct ata_ering_entry *ent; idx = ering->cursor; do { ent = &ering->ring[idx]; if (!ent->err_mask) break; rc = map_fn(ent, arg); if (rc) break; idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; } while (idx != ering->cursor); return rc; } static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) { ent->eflags |= ATA_EFLAG_OLD_ER; return 0; } static void ata_ering_clear(struct ata_ering *ering) { ata_ering_map(ering, ata_ering_clear_cb, NULL); } static unsigned int ata_eh_dev_action(struct ata_device *dev) { struct ata_eh_context *ehc = &dev->link->eh_context; return ehc->i.action | ehc->i.dev_action[dev->devno]; } static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, struct ata_eh_info *ehi, unsigned int action) { struct ata_device *tdev; if (!dev) { ehi->action &= ~action; ata_for_each_dev(tdev, link, ALL) ehi->dev_action[tdev->devno] &= ~action; } else { /* doesn't make sense for port-wide EH actions */ WARN_ON(!(action & ATA_EH_PERDEV_MASK)); /* break ehi->action into ehi->dev_action */ if (ehi->action & action) { ata_for_each_dev(tdev, link, ALL) ehi->dev_action[tdev->devno] |= ehi->action & action; ehi->action &= ~action; } /* turn off the specified per-dev action */ ehi->dev_action[dev->devno] &= ~action; } } /** * ata_eh_acquire - acquire EH ownership * @ap: ATA port to acquire EH ownership for * * Acquire EH ownership for @ap. This is the basic exclusion * mechanism for ports sharing a host. Only one port hanging off * the same host can claim the ownership of EH. * * LOCKING: * EH context. */ void ata_eh_acquire(struct ata_port *ap) { mutex_lock(&ap->host->eh_mutex); WARN_ON_ONCE(ap->host->eh_owner); ap->host->eh_owner = current; } /** * ata_eh_release - release EH ownership * @ap: ATA port to release EH ownership for * * Release EH ownership for @ap if the caller. The caller must * have acquired EH ownership using ata_eh_acquire() previously. * * LOCKING: * EH context. */ void ata_eh_release(struct ata_port *ap) { WARN_ON_ONCE(ap->host->eh_owner != current); ap->host->eh_owner = NULL; mutex_unlock(&ap->host->eh_mutex); } static void ata_eh_dev_disable(struct ata_device *dev) { ata_acpi_on_disable(dev); ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); dev->class++; /* * From now till the next successful probe, ering is used to * track probe failures. Clear accumulated device error info. */ ata_ering_clear(&dev->ering); ata_dev_free_resources(dev); } static void ata_eh_unload(struct ata_port *ap) { struct ata_link *link; struct ata_device *dev; unsigned long flags; /* * Unless we are restarting, transition all enabled devices to * standby power mode. */ if (system_state != SYSTEM_RESTART) { ata_for_each_link(link, ap, PMP_FIRST) { ata_for_each_dev(dev, link, ENABLED) ata_dev_power_set_standby(dev); } } /* * Restore SControl IPM and SPD for the next driver and * disable attached devices. */ ata_for_each_link(link, ap, PMP_FIRST) { sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); ata_for_each_dev(dev, link, ENABLED) ata_eh_dev_disable(dev); } /* freeze and set UNLOADED */ spin_lock_irqsave(ap->lock, flags); ata_port_freeze(ap); /* won't be thawed */ ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ ap->pflags |= ATA_PFLAG_UNLOADED; spin_unlock_irqrestore(ap->lock, flags); } /** * ata_scsi_error - SCSI layer error handler callback * @host: SCSI host on which error occurred * * Handles SCSI-layer-thrown error events. * * LOCKING: * Inherited from SCSI layer (none, can sleep) * * RETURNS: * Zero. */ void ata_scsi_error(struct Scsi_Host *host) { struct ata_port *ap = ata_shost_to_port(host); unsigned long flags; LIST_HEAD(eh_work_q); spin_lock_irqsave(host->host_lock, flags); list_splice_init(&host->eh_cmd_q, &eh_work_q); spin_unlock_irqrestore(host->host_lock, flags); ata_scsi_cmd_error_handler(host, ap, &eh_work_q); /* If we timed raced normal completion and there is nothing to recover nr_timedout == 0 why exactly are we doing error recovery ? */ ata_scsi_port_error_handler(host, ap); /* finish or retry handled scmd's and clean up */ WARN_ON(!list_empty(&eh_work_q)); } /** * ata_scsi_cmd_error_handler - error callback for a list of commands * @host: scsi host containing the port * @ap: ATA port within the host * @eh_work_q: list of commands to process * * process the given list of commands and return those finished to the * ap->eh_done_q. This function is the first part of the libata error * handler which processes a given list of failed commands. */ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_work_q) { int i; unsigned long flags; struct scsi_cmnd *scmd, *tmp; int nr_timedout = 0; /* make sure sff pio task is not running */ ata_sff_flush_pio_task(ap); /* synchronize with host lock and sort out timeouts */ /* * For EH, all qcs are finished in one of three ways - * normal completion, error completion, and SCSI timeout. * Both completions can race against SCSI timeout. When normal * completion wins, the qc never reaches EH. When error * completion wins, the qc has ATA_QCFLAG_EH set. * * When SCSI timeout wins, things are a bit more complex. * Normal or error completion can occur after the timeout but * before this point. In such cases, both types of * completions are honored. A scmd is determined to have * timed out iff its associated qc is active and not failed. */ spin_lock_irqsave(ap->lock, flags); /* * This must occur under the ap->lock as we don't want * a polled recovery to race the real interrupt handler * * The lost_interrupt handler checks for any completed but * non-notified command and completes much like an IRQ handler. * * We then fall into the error recovery code which will treat * this as if normal completion won the race */ if (ap->ops->lost_interrupt) ap->ops->lost_interrupt(ap); list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { struct ata_queued_cmd *qc; /* * If the scmd was added to EH, via ata_qc_schedule_eh() -> * scsi_timeout() -> scsi_eh_scmd_add(), scsi_timeout() will * have set DID_TIME_OUT (since libata does not have an abort * handler). Thus, to clear DID_TIME_OUT, clear the host byte. */ set_host_byte(scmd, DID_OK); ata_qc_for_each_raw(ap, qc, i) { if (qc->flags & ATA_QCFLAG_ACTIVE && qc->scsicmd == scmd) break; } if (i < ATA_MAX_QUEUE) { /* the scmd has an associated qc */ if (!(qc->flags & ATA_QCFLAG_EH)) { /* which hasn't failed yet, timeout */ set_host_byte(scmd, DID_TIME_OUT); qc->err_mask |= AC_ERR_TIMEOUT; qc->flags |= ATA_QCFLAG_EH; nr_timedout++; } } else { /* Normal completion occurred after * SCSI timeout but before this point. * Successfully complete it. */ scmd->retries = scmd->allowed; scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } } /* * If we have timed out qcs. They belong to EH from * this point but the state of the controller is * unknown. Freeze the port to make sure the IRQ * handler doesn't diddle with those qcs. This must * be done atomically w.r.t. setting ATA_QCFLAG_EH. */ if (nr_timedout) __ata_port_freeze(ap); /* initialize eh_tries */ ap->eh_tries = ATA_EH_MAX_TRIES; spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL(ata_scsi_cmd_error_handler); /** * ata_scsi_port_error_handler - recover the port after the commands * @host: SCSI host containing the port * @ap: the ATA port * * Handle the recovery of the port @ap after all the commands * have been recovered. */ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) { unsigned long flags; struct ata_link *link; /* acquire EH ownership */ ata_eh_acquire(ap); repeat: /* kill fast drain timer */ timer_delete_sync(&ap->fastdrain_timer); /* process port resume request */ ata_eh_handle_port_resume(ap); /* fetch & clear EH info */ spin_lock_irqsave(ap->lock, flags); ata_for_each_link(link, ap, HOST_FIRST) { struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev; memset(&link->eh_context, 0, sizeof(link->eh_context)); link->eh_context.i = link->eh_info; memset(&link->eh_info, 0, sizeof(link->eh_info)); ata_for_each_dev(dev, link, ENABLED) { int devno = dev->devno; ehc->saved_xfer_mode[devno] = dev->xfer_mode; if (ata_ncq_enabled(dev)) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ if (ap->pflags & ATA_PFLAG_RESUMING) { dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; } } } ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; ap->pflags &= ~ATA_PFLAG_EH_PENDING; ap->excl_link = NULL; /* don't maintain exclusion over EH */ spin_unlock_irqrestore(ap->lock, flags); /* invoke EH, skip if unloading or suspended */ if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) ap->ops->error_handler(ap); else { /* if unloading, commence suicide */ if ((ap->pflags & ATA_PFLAG_UNLOADING) && !(ap->pflags & ATA_PFLAG_UNLOADED)) ata_eh_unload(ap); ata_eh_finish(ap); } /* process port suspend request */ ata_eh_handle_port_suspend(ap); /* * Exception might have happened after ->error_handler recovered the * port but before this point. Repeat EH in such case. */ spin_lock_irqsave(ap->lock, flags); if (ap->pflags & ATA_PFLAG_EH_PENDING) { if (--ap->eh_tries) { spin_unlock_irqrestore(ap->lock, flags); goto repeat; } ata_port_err(ap, "EH pending after %d tries, giving up\n", ATA_EH_MAX_TRIES); ap->pflags &= ~ATA_PFLAG_EH_PENDING; } /* this run is complete, make sure EH info is clear */ ata_for_each_link(link, ap, HOST_FIRST) memset(&link->eh_info, 0, sizeof(link->eh_info)); /* * end eh (clear host_eh_scheduled) while holding ap->lock such that if * exception occurs after this point but before EH completion, SCSI * midlayer will re-initiate EH. */ ap->ops->end_eh(ap); spin_unlock_irqrestore(ap->lock, flags); ata_eh_release(ap); scsi_eh_flush_done_q(&ap->eh_done_q); /* clean up */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_RESUMING; if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && !(ap->flags & ATA_FLAG_SAS_HOST)) schedule_delayed_work(&ap->hotplug_task, 0); if (ap->pflags & ATA_PFLAG_RECOVERED) ata_port_info(ap, "EH complete\n"); ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); /* tell wait_eh that we're done */ ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; wake_up_all(&ap->eh_wait_q); spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); /** * ata_port_wait_eh - Wait for the currently pending EH to complete * @ap: Port to wait EH for * * Wait until the currently pending EH is complete. * * LOCKING: * Kernel thread context (may sleep). */ void ata_port_wait_eh(struct ata_port *ap) { unsigned long flags; DEFINE_WAIT(wait); retry: spin_lock_irqsave(ap->lock, flags); while (ata_port_eh_scheduled(ap)) { prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(ap->lock, flags); schedule(); spin_lock_irqsave(ap->lock, flags); } finish_wait(&ap->eh_wait_q, &wait); spin_unlock_irqrestore(ap->lock, flags); /* make sure SCSI EH is complete */ if (scsi_host_in_recovery(ap->scsi_host)) { ata_msleep(ap, 10); goto retry; } } EXPORT_SYMBOL_GPL(ata_port_wait_eh); static unsigned int ata_eh_nr_in_flight(struct ata_port *ap) { struct ata_queued_cmd *qc; unsigned int tag; unsigned int nr = 0; /* count only non-internal commands */ ata_qc_for_each(ap, qc, tag) { if (qc) nr++; } return nr; } void ata_eh_fastdrain_timerfn(struct timer_list *t) { struct ata_port *ap = timer_container_of(ap, t, fastdrain_timer); unsigned long flags; unsigned int cnt; spin_lock_irqsave(ap->lock, flags); cnt = ata_eh_nr_in_flight(ap); /* are we done? */ if (!cnt) goto out_unlock; if (cnt == ap->fastdrain_cnt) { struct ata_queued_cmd *qc; unsigned int tag; /* No progress during the last interval, tag all * in-flight qcs as timed out and freeze the port. */ ata_qc_for_each(ap, qc, tag) { if (qc) qc->err_mask |= AC_ERR_TIMEOUT; } ata_port_freeze(ap); } else { /* some qcs have finished, give it another chance */ ap->fastdrain_cnt = cnt; ap->fastdrain_timer.expires = ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); add_timer(&ap->fastdrain_timer); } out_unlock: spin_unlock_irqrestore(ap->lock, flags); } /** * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain * @ap: target ATA port * @fastdrain: activate fast drain * * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain * is non-zero and EH wasn't pending before. Fast drain ensures * that EH kicks in in timely manner. * * LOCKING: * spin_lock_irqsave(host lock) */ static void ata_eh_set_pending(struct ata_port *ap, bool fastdrain) { unsigned int cnt; /* already scheduled? */ if (ap->pflags & ATA_PFLAG_EH_PENDING) return; ap->pflags |= ATA_PFLAG_EH_PENDING; if (!fastdrain) return; /* do we have in-flight qcs? */ cnt = ata_eh_nr_in_flight(ap); if (!cnt) return; /* activate fast drain */ ap->fastdrain_cnt = cnt; ap->fastdrain_timer.expires = ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); add_timer(&ap->fastdrain_timer); } /** * ata_qc_schedule_eh - schedule qc for error handling * @qc: command to schedule error handling for * * Schedule error handling for @qc. EH will kick in as soon as * other commands are drained. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; qc->flags |= ATA_QCFLAG_EH; ata_eh_set_pending(ap, true); /* The following will fail if timeout has already expired. * ata_scsi_error() takes care of such scmds on EH entry. * Note that ATA_QCFLAG_EH is unconditionally set after * this function completes. */ blk_abort_request(scsi_cmd_to_rq(qc->scsicmd)); } /** * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine * @ap: ATA port to schedule EH for * * LOCKING: inherited from ata_port_schedule_eh * spin_lock_irqsave(host lock) */ void ata_std_sched_eh(struct ata_port *ap) { if (ap->pflags & ATA_PFLAG_INITIALIZING) return; ata_eh_set_pending(ap, true); scsi_schedule_eh(ap->scsi_host); trace_ata_std_sched_eh(ap); } EXPORT_SYMBOL_GPL(ata_std_sched_eh); /** * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine * @ap: ATA port to end EH for * * In the libata object model there is a 1:1 mapping of ata_port to * shost, so host fields can be directly manipulated under ap->lock, in * the libsas case we need to hold a lock at the ha->level to coordinate * these events. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_std_end_eh(struct ata_port *ap) { struct Scsi_Host *host = ap->scsi_host; host->host_eh_scheduled = 0; } EXPORT_SYMBOL(ata_std_end_eh); /** * ata_port_schedule_eh - schedule error handling without a qc * @ap: ATA port to schedule EH for * * Schedule error handling for @ap. EH will kick in as soon as * all commands are drained. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_port_schedule_eh(struct ata_port *ap) { /* see: ata_std_sched_eh, unless you know better */ ap->ops->sched_eh(ap); } EXPORT_SYMBOL_GPL(ata_port_schedule_eh); static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) { struct ata_queued_cmd *qc; int tag, nr_aborted = 0; /* we're gonna abort all commands, no need for fast drain */ ata_eh_set_pending(ap, false); /* include internal tag in iteration */ ata_qc_for_each_with_internal(ap, qc, tag) { if (qc && (!link || qc->dev->link == link)) { qc->flags |= ATA_QCFLAG_EH; ata_qc_complete(qc); nr_aborted++; } } if (!nr_aborted) ata_port_schedule_eh(ap); return nr_aborted; } /** * ata_link_abort - abort all qc's on the link * @link: ATA link to abort qc's for * * Abort all active qc's active on @link and schedule EH. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Number of aborted qc's. */ int ata_link_abort(struct ata_link *link) { return ata_do_link_abort(link->ap, link); } EXPORT_SYMBOL_GPL(ata_link_abort); /** * ata_port_abort - abort all qc's on the port * @ap: ATA port to abort qc's for * * Abort all active qc's of @ap and schedule EH. * * LOCKING: * spin_lock_irqsave(host_set lock) * * RETURNS: * Number of aborted qc's. */ int ata_port_abort(struct ata_port *ap) { return ata_do_link_abort(ap, NULL); } EXPORT_SYMBOL_GPL(ata_port_abort); /** * __ata_port_freeze - freeze port * @ap: ATA port to freeze * * This function is called when HSM violation or some other * condition disrupts normal operation of the port. Frozen port * is not allowed to perform any operation until the port is * thawed, which usually follows a successful reset. * * ap->ops->freeze() callback can be used for freezing the port * hardware-wise (e.g. mask interrupt and stop DMA engine). If a * port cannot be frozen hardware-wise, the interrupt handler * must ack and clear interrupts unconditionally while the port * is frozen. * * LOCKING: * spin_lock_irqsave(host lock) */ static void __ata_port_freeze(struct ata_port *ap) { if (ap->ops->freeze) ap->ops->freeze(ap); ap->pflags |= ATA_PFLAG_FROZEN; trace_ata_port_freeze(ap); } /** * ata_port_freeze - abort & freeze port * @ap: ATA port to freeze * * Abort and freeze @ap. The freeze operation must be called * first, because some hardware requires special operations * before the taskfile registers are accessible. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Number of aborted commands. */ int ata_port_freeze(struct ata_port *ap) { __ata_port_freeze(ap); return ata_port_abort(ap); } EXPORT_SYMBOL_GPL(ata_port_freeze); /** * ata_eh_freeze_port - EH helper to freeze port * @ap: ATA port to freeze * * Freeze @ap. * * LOCKING: * None. */ void ata_eh_freeze_port(struct ata_port *ap) { unsigned long flags; spin_lock_irqsave(ap->lock, flags); __ata_port_freeze(ap); spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL_GPL(ata_eh_freeze_port); /** * ata_eh_thaw_port - EH helper to thaw port * @ap: ATA port to thaw * * Thaw frozen port @ap. * * LOCKING: * None. */ void ata_eh_thaw_port(struct ata_port *ap) { unsigned long flags; spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_FROZEN; if (ap->ops->thaw) ap->ops->thaw(ap); spin_unlock_irqrestore(ap->lock, flags); trace_ata_port_thaw(ap); } static void ata_eh_scsidone(struct scsi_cmnd *scmd) { /* nada */ } static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct scsi_cmnd *scmd = qc->scsicmd; unsigned long flags; spin_lock_irqsave(ap->lock, flags); qc->scsidone = ata_eh_scsidone; __ata_qc_complete(qc); WARN_ON(ata_tag_valid(qc->tag)); spin_unlock_irqrestore(ap->lock, flags); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } /** * ata_eh_qc_complete - Complete an active ATA command from EH * @qc: Command to complete * * Indicate to the mid and upper layers that an ATA command has * completed. To be used from EH. */ void ata_eh_qc_complete(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; scmd->retries = scmd->allowed; __ata_eh_qc_complete(qc); } /** * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH * @qc: Command to retry * * Indicate to the mid and upper layers that an ATA command * should be retried. To be used from EH. * * SCSI midlayer limits the number of retries to scmd->allowed. * scmd->allowed is incremented for commands which get retried * due to unrelated failures (qc->err_mask is zero). */ void ata_eh_qc_retry(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; if (!qc->err_mask) scmd->allowed++; __ata_eh_qc_complete(qc); } /** * ata_dev_disable - disable ATA device * @dev: ATA device to disable * * Disable @dev. * * Locking: * EH context. */ void ata_dev_disable(struct ata_device *dev) { if (!ata_dev_enabled(dev)) return; ata_dev_warn(dev, "disable device\n"); ata_eh_dev_disable(dev); } EXPORT_SYMBOL_GPL(ata_dev_disable); /** * ata_eh_detach_dev - detach ATA device * @dev: ATA device to detach * * Detach @dev. * * LOCKING: * None. */ void ata_eh_detach_dev(struct ata_device *dev) { struct ata_link *link = dev->link; struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; /* * If the device is still enabled, transition it to standby power mode * (i.e. spin down HDDs) and disable it. */ if (ata_dev_enabled(dev)) { ata_dev_power_set_standby(dev); ata_eh_dev_disable(dev); } spin_lock_irqsave(ap->lock, flags); dev->flags &= ~ATA_DFLAG_DETACH; if (ata_scsi_offline_dev(dev)) { dev->flags |= ATA_DFLAG_DETACHED; ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; } /* clear per-dev EH info */ ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); ehc->saved_xfer_mode[dev->devno] = 0; ehc->saved_ncq_enabled &= ~(1 << dev->devno); spin_unlock_irqrestore(ap->lock, flags); } /** * ata_eh_about_to_do - about to perform eh_action * @link: target ATA link * @dev: target ATA dev for per-dev action (can be NULL) * @action: action about to be performed * * Called just before performing EH actions to clear related bits * in @link->eh_info such that eh actions are not unnecessarily * repeated. * * LOCKING: * None. */ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, unsigned int action) { struct ata_port *ap = link->ap; struct ata_eh_info *ehi = &link->eh_info; struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action); spin_lock_irqsave(ap->lock, flags); ata_eh_clear_action(link, dev, ehi, action); /* About to take EH action, set RECOVERED. Ignore actions on * slave links as master will do them again. */ if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) ap->pflags |= ATA_PFLAG_RECOVERED; spin_unlock_irqrestore(ap->lock, flags); } /** * ata_eh_done - EH action complete * @link: ATA link for which EH actions are complete * @dev: target ATA dev for per-dev action (can be NULL) * @action: action just completed * * Called right after performing EH actions to clear related bits * in @link->eh_context. * * LOCKING: * None. */ void ata_eh_done(struct ata_link *link, struct ata_device *dev, unsigned int action) { struct ata_eh_context *ehc = &link->eh_context; trace_ata_eh_done(link, dev ? dev->devno : 0, action); ata_eh_clear_action(link, dev, &ehc->i, action); } /** * ata_err_string - convert err_mask to descriptive string * @err_mask: error mask to convert to string * * Convert @err_mask to descriptive string. Errors are * prioritized according to severity and only the most severe * error is reported. * * LOCKING: * None. * * RETURNS: * Descriptive string for @err_mask */ static const char *ata_err_string(unsigned int err_mask) { if (err_mask & AC_ERR_HOST_BUS) return "host bus error"; if (err_mask & AC_ERR_ATA_BUS) return "ATA bus error"; if (err_mask & AC_ERR_TIMEOUT) return "timeout"; if (err_mask & AC_ERR_HSM) return "HSM violation"; if (err_mask & AC_ERR_SYSTEM) return "internal error"; if (err_mask & AC_ERR_MEDIA) return "media error"; if (err_mask & AC_ERR_INVALID) return "invalid argument"; if (err_mask & AC_ERR_DEV) return "device error"; if (err_mask & AC_ERR_NCQ) return "NCQ error"; if (err_mask & AC_ERR_NODEV_HINT) return "Polling detection error"; return "unknown error"; } /** * atapi_eh_tur - perform ATAPI TEST_UNIT_READY * @dev: target ATAPI device * @r_sense_key: out parameter for sense_key * * Perform ATAPI TEST_UNIT_READY. * * LOCKING: * EH context (may sleep). * * RETURNS: * 0 on success, AC_ERR_* mask on failure. */ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) { u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; struct ata_taskfile tf; unsigned int err_mask; ata_tf_init(dev, &tf); tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_NODATA; err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); if (err_mask == AC_ERR_DEV) *r_sense_key = tf.error >> 4; return err_mask; } /** * ata_eh_decide_disposition - Disposition a qc based on sense data * @qc: qc to examine * * For a regular SCSI command, the SCSI completion callback (scsi_done()) * will call scsi_complete(), which will call scsi_decide_disposition(), * which will call scsi_check_sense(). scsi_complete() finally calls * scsi_finish_command(). This is fine for SCSI, since any eventual sense * data is usually returned in the completion itself (without invoking SCSI * EH). However, for a QC, we always need to fetch the sense data * explicitly using SCSI EH. * * A command that is completed via SCSI EH will instead be completed using * scsi_eh_flush_done_q(), which will call scsi_finish_command() directly * (without ever calling scsi_check_sense()). * * For a command that went through SCSI EH, it is the responsibility of the * SCSI EH strategy handler to call scsi_decide_disposition(), see e.g. how * scsi_eh_get_sense() calls scsi_decide_disposition() for SCSI LLDDs that * do not get the sense data as part of the completion. * * Thus, for QC commands that went via SCSI EH, we need to call * scsi_check_sense() ourselves, similar to how scsi_eh_get_sense() calls * scsi_decide_disposition(), which calls scsi_check_sense(), in order to * set the correct SCSI ML byte (if any). * * LOCKING: * EH context. * * RETURNS: * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE */ enum scsi_disposition ata_eh_decide_disposition(struct ata_queued_cmd *qc) { return scsi_check_sense(qc->scsicmd); } /** * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to * * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK * SENSE. This function is an EH helper. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * true if sense data could be fetched, false otherwise. */ static bool ata_eh_request_sense(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; struct ata_device *dev = qc->dev; struct ata_taskfile tf; unsigned int err_mask; if (ata_port_is_frozen(qc->ap)) { ata_dev_warn(dev, "sense data available but port frozen\n"); return false; } if (!ata_id_sense_reporting_enabled(dev->id)) { ata_dev_warn(qc->dev, "sense data reporting disabled\n"); return false; } ata_tf_init(dev, &tf); tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; tf.command = ATA_CMD_REQ_SENSE_DATA; tf.protocol = ATA_PROT_NODATA; err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); /* Ignore err_mask; ATA_ERR might be set */ if (tf.status & ATA_SENSE) { if (ata_scsi_sense_is_valid(tf.lbah, tf.lbam, tf.lbal)) { /* Set sense without also setting scsicmd->result */ scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, cmd->sense_buffer, tf.lbah, tf.lbam, tf.lbal); qc->flags |= ATA_QCFLAG_SENSE_VALID; return true; } } else { ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", tf.status, err_mask); } return false; } /** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) * @dfl_sense_key: default sense key to use * * Perform ATAPI REQUEST_SENSE after the device reported CHECK * SENSE. This function is EH helper. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, AC_ERR_* mask on failure */ unsigned int atapi_eh_request_sense(struct ata_device *dev, u8 *sense_buf, u8 dfl_sense_key) { u8 cdb[ATAPI_CDB_LEN] = { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; struct ata_port *ap = dev->link->ap; struct ata_taskfile tf; memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); /* initialize sense_buf with the error register, * for the case where they are -not- overwritten */ sense_buf[0] = 0x70; sense_buf[2] = dfl_sense_key; /* some devices time out if garbage left in tf */ ata_tf_init(dev, &tf); tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; /* * Do not use DMA if the connected device only supports PIO, even if the * port prefers PIO commands via DMA. * * Ideally, we should call atapi_check_dma() to check if it is safe for * the LLD to use DMA for REQUEST_SENSE, but we don't have a qc. * Since we can't check the command, perhaps we should only use pio? */ if ((ap->flags & ATA_FLAG_PIO_DMA) && !(dev->flags & ATA_DFLAG_PIO)) { tf.protocol = ATAPI_PROT_DMA; tf.feature |= ATAPI_PKT_DMA; } else { tf.protocol = ATAPI_PROT_PIO; tf.lbam = SCSI_SENSE_BUFFERSIZE; tf.lbah = 0; } return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, sense_buf, SCSI_SENSE_BUFFERSIZE, 0); } /** * ata_eh_analyze_serror - analyze SError for a failed port * @link: ATA link to analyze SError for * * Analyze SError if available and further determine cause of * failure. * * LOCKING: * None. */ static void ata_eh_analyze_serror(struct ata_link *link) { struct ata_eh_context *ehc = &link->eh_context; u32 serror = ehc->i.serror; unsigned int err_mask = 0, action = 0; u32 hotplug_mask; if (serror & (SERR_PERSISTENT | SERR_DATA)) { err_mask |= AC_ERR_ATA_BUS; action |= ATA_EH_RESET; } if (serror & SERR_PROTOCOL) { err_mask |= AC_ERR_HSM; action |= ATA_EH_RESET; } if (serror & SERR_INTERNAL) { err_mask |= AC_ERR_SYSTEM; action |= ATA_EH_RESET; } /* Determine whether a hotplug event has occurred. Both * SError.N/X are considered hotplug events for enabled or * host links. For disabled PMP links, only N bit is * considered as X bit is left at 1 for link plugging. */ if (link->lpm_policy > ATA_LPM_MAX_POWER) hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; else hotplug_mask = SERR_PHYRDY_CHG; if (serror & hotplug_mask) ata_ehi_hotplugged(&ehc->i); ehc->i.err_mask |= err_mask; ehc->i.action |= action; } /** * ata_eh_analyze_tf - analyze taskfile of a failed qc * @qc: qc to analyze * * Analyze taskfile of @qc and further determine cause of * failure. This function also requests ATAPI sense data if * available. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * Determined recovery action */ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc) { const struct ata_taskfile *tf = &qc->result_tf; unsigned int tmp, action = 0; u8 stat = tf->status, err = tf->error; if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { qc->err_mask |= AC_ERR_HSM; return ATA_EH_RESET; } if (stat & (ATA_ERR | ATA_DF)) { qc->err_mask |= AC_ERR_DEV; /* * Sense data reporting does not work if the * device fault bit is set. */ if (stat & ATA_DF) stat &= ~ATA_SENSE; } else { return 0; } switch (qc->dev->class) { case ATA_DEV_ATA: case ATA_DEV_ZAC: /* * Fetch the sense data explicitly if: * -It was a non-NCQ command that failed, or * -It was a NCQ command that failed, but the sense data * was not included in the NCQ command error log * (i.e. NCQ autosense is not supported by the device). */ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && (stat & ATA_SENSE) && ata_eh_request_sense(qc)) set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION); if (err & ATA_ICRC) qc->err_mask |= AC_ERR_ATA_BUS; if (err & (ATA_UNC | ATA_AMNF)) qc->err_mask |= AC_ERR_MEDIA; if (err & ATA_IDNF) qc->err_mask |= AC_ERR_INVALID; break; case ATA_DEV_ATAPI: if (!ata_port_is_frozen(qc->ap)) { tmp = atapi_eh_request_sense(qc->dev, qc->scsicmd->sense_buffer, qc->result_tf.error >> 4); if (!tmp) qc->flags |= ATA_QCFLAG_SENSE_VALID; else qc->err_mask |= tmp; } } if (qc->flags & ATA_QCFLAG_SENSE_VALID) { enum scsi_disposition ret = ata_eh_decide_disposition(qc); /* * SUCCESS here means that the sense code could be * evaluated and should be passed to the upper layers * for correct evaluation. * FAILED means the sense code could not be interpreted * and the device would need to be reset. * NEEDS_RETRY and ADD_TO_MLQUEUE means that the * command would need to be retried. */ if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { qc->flags |= ATA_QCFLAG_RETRY; qc->err_mask |= AC_ERR_OTHER; } else if (ret != SUCCESS) { qc->err_mask |= AC_ERR_HSM; } } if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) action |= ATA_EH_RESET; return action; } static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, int *xfer_ok) { int base = 0; if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) *xfer_ok = 1; if (!*xfer_ok) base = ATA_ECAT_DUBIOUS_NONE; if (err_mask & AC_ERR_ATA_BUS) return base + ATA_ECAT_ATA_BUS; if (err_mask & AC_ERR_TIMEOUT) return base + ATA_ECAT_TOUT_HSM; if (eflags & ATA_EFLAG_IS_IO) { if (err_mask & AC_ERR_HSM) return base + ATA_ECAT_TOUT_HSM; if ((err_mask & (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) return base + ATA_ECAT_UNK_DEV; } return 0; } struct speed_down_verdict_arg { u64 since; int xfer_ok; int nr_errors[ATA_ECAT_NR]; }; static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) { struct speed_down_verdict_arg *arg = void_arg; int cat; if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) return -1; cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, &arg->xfer_ok); arg->nr_errors[cat]++; return 0; } /** * ata_eh_speed_down_verdict - Determine speed down verdict * @dev: Device of interest * * This function examines error ring of @dev and determines * whether NCQ needs to be turned off, transfer speed should be * stepped down, or falling back to PIO is necessary. * * ECAT_ATA_BUS : ATA_BUS error for any command * * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for * IO commands * * ECAT_UNK_DEV : Unknown DEV error for IO commands * * ECAT_DUBIOUS_* : Identical to above three but occurred while * data transfer hasn't been verified. * * Verdicts are * * NCQ_OFF : Turn off NCQ. * * SPEED_DOWN : Speed down transfer speed but don't fall back * to PIO. * * FALLBACK_TO_PIO : Fall back to PIO. * * Even if multiple verdicts are returned, only one action is * taken per error. An action triggered by non-DUBIOUS errors * clears ering, while one triggered by DUBIOUS_* errors doesn't. * This is to expedite speed down decisions right after device is * initially configured. * * The following are speed down rules. #1 and #2 deal with * DUBIOUS errors. * * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. * * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors * occurred during last 5 mins, NCQ_OFF. * * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors * occurred during last 5 mins, FALLBACK_TO_PIO * * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred * during last 10 mins, NCQ_OFF. * * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. * * LOCKING: * Inherited from caller. * * RETURNS: * OR of ATA_EH_SPDN_* flags. */ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) { const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; u64 j64 = get_jiffies_64(); struct speed_down_verdict_arg arg; unsigned int verdict = 0; /* scan past 5 mins of error history */ memset(&arg, 0, sizeof(arg)); arg.since = j64 - min(j64, j5mins); ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) verdict |= ATA_EH_SPDN_SPEED_DOWN | ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; if (arg.nr_errors[ATA_ECAT_ATA_BUS] + arg.nr_errors[ATA_ECAT_TOUT_HSM] + arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; /* scan past 10 mins of error history */ memset(&arg, 0, sizeof(arg)); arg.since = j64 - min(j64, j10mins); ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) verdict |= ATA_EH_SPDN_NCQ_OFF; if (arg.nr_errors[ATA_ECAT_ATA_BUS] + arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) verdict |= ATA_EH_SPDN_SPEED_DOWN; return verdict; } /** * ata_eh_speed_down - record error and speed down if necessary * @dev: Failed device * @eflags: mask of ATA_EFLAG_* flags * @err_mask: err_mask of the error * * Record error and examine error history to determine whether * adjusting transmission speed is necessary. It also sets * transmission limits appropriately if such adjustment is * necessary. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * Determined recovery action. */ static unsigned int ata_eh_speed_down(struct ata_device *dev, unsigned int eflags, unsigned int err_mask) { struct ata_link *link = ata_dev_phys_link(dev); int xfer_ok = 0; unsigned int verdict; unsigned int action = 0; /* don't bother if Cat-0 error */ if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) return 0; /* record error and determine whether speed down is necessary */ ata_ering_record(&dev->ering, eflags, err_mask); verdict = ata_eh_speed_down_verdict(dev); /* turn off NCQ? */ if ((verdict & ATA_EH_SPDN_NCQ_OFF) && ata_ncq_enabled(dev)) { dev->flags |= ATA_DFLAG_NCQ_OFF; ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); goto done; } /* speed down? */ if (verdict & ATA_EH_SPDN_SPEED_DOWN) { /* speed down SATA link speed if possible */ if (sata_down_spd_limit(link, 0) == 0) { action |= ATA_EH_RESET; goto done; } /* lower transfer mode */ if (dev->spdn_cnt < 2) { static const int dma_dnxfer_sel[] = { ATA_DNXFER_DMA, ATA_DNXFER_40C }; static const int pio_dnxfer_sel[] = { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; int sel; if (dev->xfer_shift != ATA_SHIFT_PIO) sel = dma_dnxfer_sel[dev->spdn_cnt]; else sel = pio_dnxfer_sel[dev->spdn_cnt]; dev->spdn_cnt++; if (ata_down_xfermask_limit(dev, sel) == 0) { action |= ATA_EH_RESET; goto done; } } } /* Fall back to PIO? Slowing down to PIO is meaningless for * SATA ATA devices. Consider it only for PATA and SATAPI. */ if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && (dev->xfer_shift != ATA_SHIFT_PIO)) { if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { dev->spdn_cnt = 0; action |= ATA_EH_RESET; goto done; } } return 0; done: /* device has been slowed down, blow error history */ if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) ata_ering_clear(&dev->ering); return action; } /** * ata_eh_worth_retry - analyze error and decide whether to retry * @qc: qc to possibly retry * * Look at the cause of the error and decide if a retry * might be useful or not. We don't want to retry media errors * because the drive itself has probably already taken 10-30 seconds * doing its own internal retries before reporting the failure. */ static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) { if (qc->err_mask & AC_ERR_MEDIA) return 0; /* don't retry media errors */ if (qc->flags & ATA_QCFLAG_IO) return 1; /* otherwise retry anything from fs stack */ if (qc->err_mask & AC_ERR_INVALID) return 0; /* don't retry these */ return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ } /** * ata_eh_quiet - check if we need to be quiet about a command error * @qc: qc to check * * Look at the qc flags anbd its scsi command request flags to determine * if we need to be quiet about the command failure. */ static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) { if (qc->scsicmd && scsi_cmd_to_rq(qc->scsicmd)->rq_flags & RQF_QUIET) qc->flags |= ATA_QCFLAG_QUIET; return qc->flags & ATA_QCFLAG_QUIET; } static int ata_eh_get_non_ncq_success_sense(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_queued_cmd *qc; qc = __ata_qc_from_tag(ap, link->active_tag); if (!qc) return -EIO; if (!(qc->flags & ATA_QCFLAG_EH) || !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || qc->err_mask) return -EIO; if (!ata_eh_request_sense(qc)) return -EIO; /* * No point in checking the return value, since the command has already * completed successfully. */ ata_eh_decide_disposition(qc); return 0; } static void ata_eh_get_success_sense(struct ata_link *link) { struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev = link->device; struct ata_port *ap = link->ap; struct ata_queued_cmd *qc; int tag, ret = 0; if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE)) return; /* if frozen, we can't do much */ if (ata_port_is_frozen(ap)) { ata_dev_warn(dev, "successful sense data available but port frozen\n"); goto out; } /* * If the link has sactive set, then we have outstanding NCQ commands * and have to read the Successful NCQ Commands log to get the sense * data. Otherwise, we are dealing with a non-NCQ command and use * request sense ext command to retrieve the sense data. */ if (link->sactive) ret = ata_eh_get_ncq_success_sense(link); else ret = ata_eh_get_non_ncq_success_sense(link); if (ret) goto out; ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); return; out: /* * If we failed to get sense data for a successful command that ought to * have sense data, we cannot simply return BLK_STS_OK to user space. * This is because we can't know if the sense data that we couldn't get * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command * as success to user space would result in a silent data corruption. * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such * that SCSI will report these commands as BLK_STS_IOERR to user space. */ ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || qc->err_mask || ata_dev_phys_link(qc->dev) != link) continue; /* We managed to get sense for this success command, skip. */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) continue; /* This success command did not have any sense data, skip. */ if (!(qc->result_tf.status & ATA_SENSE)) continue; /* This success command had sense data, but we failed to get. */ ata_scsi_set_sense(dev, qc->scsicmd, ABORTED_COMMAND, 0, 0); qc->flags |= ATA_QCFLAG_SENSE_VALID; } ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); } /* * Check if a link is established. This is a relaxed version of * ata_phys_link_online() which accounts for the fact that this is potentially * called after changing the link power management policy, which may not be * reflected immediately in the SStatus register (e.g., we may still be seeing * the PHY in partial, slumber or devsleep Partial power management state. * So check that: * - A device is still present, that is, DET is 1h (Device presence detected * but Phy communication not established) or 3h (Device presence detected and * Phy communication established) * - Communication is established, that is, IPM is not 0h, indicating that PHY * is online or in a low power state. */ static bool ata_eh_link_established(struct ata_link *link) { u32 sstatus; u8 det, ipm; /* * For old IDE/PATA adapters that do not have a valid scr_read method, * or if reading the SStatus register fails, assume that the device is * present. Device probe will determine if that is really the case. */ if (sata_scr_read(link, SCR_STATUS, &sstatus)) return true; det = sstatus & 0x0f; ipm = (sstatus >> 8) & 0x0f; return (det & 0x01) && ipm; } /** * ata_eh_link_set_lpm - configure SATA interface power management * @link: link to configure * @policy: the link power management policy * @r_failed_dev: out parameter for failed device * * Enable SATA Interface power management. This will enable * Device Interface Power Management (DIPM) for min_power and * medium_power_with_dipm policies, and then call driver specific * callbacks for enabling Host Initiated Power management. * * LOCKING: * EH context. * * RETURNS: * 0 on success, -errno on failure. */ static int ata_eh_link_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, struct ata_device **r_failed_dev) { struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; enum ata_lpm_policy old_policy = link->lpm_policy; bool host_has_dipm = !(link->ap->flags & ATA_FLAG_NO_DIPM); unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; unsigned int err_mask; int rc; /* if the link or host doesn't do LPM, noop */ if (!IS_ENABLED(CONFIG_SATA_HOST) || (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) return 0; /* * This function currently assumes that it will never be supplied policy * ATA_LPM_UNKNOWN. */ if (WARN_ON_ONCE(policy == ATA_LPM_UNKNOWN)) return 0; ata_link_dbg(link, "Set LPM policy: %d -> %d\n", old_policy, policy); /* * DIPM is enabled only for ATA_LPM_MIN_POWER, * ATA_LPM_MIN_POWER_WITH_PARTIAL, and ATA_LPM_MED_POWER_WITH_DIPM, as * some devices misbehave when the host NACKs transition to SLUMBER. */ ata_for_each_dev(dev, link, ENABLED) { bool dev_has_hipm = ata_id_has_hipm(dev->id); bool dev_has_dipm = ata_id_has_dipm(dev->id); /* find the first enabled and LPM enabled devices */ if (!link_dev) link_dev = dev; if (!lpm_dev && (dev_has_hipm || (dev_has_dipm && host_has_dipm))) lpm_dev = dev; hints &= ~ATA_LPM_EMPTY; if (!dev_has_hipm) hints &= ~ATA_LPM_HIPM; /* disable DIPM before changing link config */ if (dev_has_dipm) { err_mask = ata_dev_set_feature(dev, SETFEATURES_SATA_DISABLE, SATA_DIPM); if (err_mask && err_mask != AC_ERR_DEV) { ata_dev_warn(dev, "failed to disable DIPM, Emask 0x%x\n", err_mask); rc = -EIO; goto fail; } } } if (ap) { rc = ap->ops->set_lpm(link, policy, hints); if (!rc && ap->slave_link) rc = ap->ops->set_lpm(ap->slave_link, policy, hints); } else rc = sata_pmp_set_lpm(link, policy, hints); /* * Attribute link config failure to the first (LPM) enabled * device on the link. */ if (rc) { if (rc == -EOPNOTSUPP) { link->flags |= ATA_LFLAG_NO_LPM; return 0; } dev = lpm_dev ? lpm_dev : link_dev; goto fail; } /* * Low level driver acked the transition. Issue DIPM command * with the new policy set. */ link->lpm_policy = policy; if (ap && ap->slave_link) ap->slave_link->lpm_policy = policy; /* * Host config updated, enable DIPM if transitioning to * ATA_LPM_MIN_POWER, ATA_LPM_MIN_POWER_WITH_PARTIAL, or * ATA_LPM_MED_POWER_WITH_DIPM. */ ata_for_each_dev(dev, link, ENABLED) { bool dev_has_dipm = ata_id_has_dipm(dev->id); if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && host_has_dipm && dev_has_dipm) { err_mask = ata_dev_set_feature(dev, SETFEATURES_SATA_ENABLE, SATA_DIPM); if (err_mask && err_mask != AC_ERR_DEV) { ata_dev_warn(dev, "failed to enable DIPM, Emask 0x%x\n", err_mask); rc = -EIO; goto fail; } } } link->last_lpm_change = jiffies; link->flags |= ATA_LFLAG_CHANGED; return 0; fail: /* restore the old policy */ link->lpm_policy = old_policy; if (ap && ap->slave_link) ap->slave_link->lpm_policy = old_policy; /* if no device or only one more chance is left, disable LPM */ if (!dev || ehc->tries[dev->devno] <= 2) { ata_link_warn(link, "disabling LPM on the link\n"); link->flags |= ATA_LFLAG_NO_LPM; } if (r_failed_dev) *r_failed_dev = dev; return rc; } /** * ata_eh_link_autopsy - analyze error and determine recovery action * @link: host link to perform autopsy on * * Analyze why @link failed and determine which recovery actions * are needed. This function also sets more detailed AC_ERR_* * values and fills sense data for ATAPI CHECK SENSE. * * LOCKING: * Kernel thread context (may sleep). */ static void ata_eh_link_autopsy(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_queued_cmd *qc; struct ata_device *dev; unsigned int all_err_mask = 0, eflags = 0; int tag, nr_failed = 0, nr_quiet = 0; u32 serror; int rc; if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) return; /* obtain and analyze SError */ rc = sata_scr_read(link, SCR_ERROR, &serror); if (rc == 0) { ehc->i.serror |= serror; ata_eh_analyze_serror(link); } else if (rc != -EOPNOTSUPP) { /* SError read failed, force reset and probing */ ehc->i.probe_mask |= ATA_ALL_DEVICES; ehc->i.action |= ATA_EH_RESET; ehc->i.err_mask |= AC_ERR_OTHER; } /* analyze NCQ failure */ ata_eh_analyze_ncq_error(link); /* * Check if this was a successful command that simply needs sense data. * Since the sense data is not part of the completion, we need to fetch * it using an additional command. Since this can't be done from irq * context, the sense data for successful commands are fetched by EH. */ ata_eh_get_success_sense(link); /* any real error trumps AC_ERR_OTHER */ if (ehc->i.err_mask & ~AC_ERR_OTHER) ehc->i.err_mask &= ~AC_ERR_OTHER; all_err_mask |= ehc->i.err_mask; ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || qc->flags & ATA_QCFLAG_RETRY || qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || ata_dev_phys_link(qc->dev) != link) continue; /* inherit upper level err_mask */ qc->err_mask |= ehc->i.err_mask; /* analyze TF */ ehc->i.action |= ata_eh_analyze_tf(qc); /* DEV errors are probably spurious in case of ATA_BUS error */ if (qc->err_mask & AC_ERR_ATA_BUS) qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | AC_ERR_INVALID); /* any real error trumps unknown error */ if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; /* * SENSE_VALID trumps dev/unknown error and revalidation. Upper * layers will determine whether the command is worth retrying * based on the sense data and device class/type. Otherwise, * determine directly if the command is worth retrying using its * error mask and flags. */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); else if (ata_eh_worth_retry(qc)) qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */ ehc->i.dev = qc->dev; all_err_mask |= qc->err_mask; if (qc->flags & ATA_QCFLAG_IO) eflags |= ATA_EFLAG_IS_IO; trace_ata_eh_link_autopsy_qc(qc); /* Count quiet errors */ if (ata_eh_quiet(qc)) nr_quiet++; nr_failed++; } /* If all failed commands requested silence, then be quiet */ if (nr_quiet == nr_failed) ehc->i.flags |= ATA_EHI_QUIET; /* enforce default EH actions */ if (ata_port_is_frozen(ap) || all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) ehc->i.action |= ATA_EH_RESET; else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) ehc->i.action |= ATA_EH_REVALIDATE; /* If we have offending qcs and the associated failed device, * perform per-dev EH action only on the offending device. */ if (ehc->i.dev) { ehc->i.dev_action[ehc->i.dev->devno] |= ehc->i.action & ATA_EH_PERDEV_MASK; ehc->i.action &= ~ATA_EH_PERDEV_MASK; } /* propagate timeout to host link */ if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; /* record error and consider speeding down */ dev = ehc->i.dev; if (!dev && ((ata_link_max_devices(link) == 1 && ata_dev_enabled(link->device)))) dev = link->device; if (dev) { if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) eflags |= ATA_EFLAG_DUBIOUS_XFER; ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); } } /** * ata_eh_autopsy - analyze error and determine recovery action * @ap: host port to perform autopsy on * * Analyze all links of @ap and determine why they failed and * which recovery actions are needed. * * LOCKING: * Kernel thread context (may sleep). */ void ata_eh_autopsy(struct ata_port *ap) { struct ata_link *link; ata_for_each_link(link, ap, EDGE) ata_eh_link_autopsy(link); /* Handle the frigging slave link. Autopsy is done similarly * but actions and flags are transferred over to the master * link and handled from there. */ if (ap->slave_link) { struct ata_eh_context *mehc = &ap->link.eh_context; struct ata_eh_context *sehc = &ap->slave_link->eh_context; /* transfer control flags from master to slave */ sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; /* perform autopsy on the slave link */ ata_eh_link_autopsy(ap->slave_link); /* transfer actions from slave to master and clear slave */ ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); mehc->i.action |= sehc->i.action; mehc->i.dev_action[1] |= sehc->i.dev_action[1]; mehc->i.flags |= sehc->i.flags; ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); } /* Autopsy of fanout ports can affect host link autopsy. * Perform host link autopsy last. */ if (sata_pmp_attached(ap)) ata_eh_link_autopsy(&ap->link); } /** * ata_get_cmd_name - get name for ATA command * @command: ATA command code to get name for * * Return a textual name of the given command or "unknown" * * LOCKING: * None */ const char *ata_get_cmd_name(u8 command) { #ifdef CONFIG_ATA_VERBOSE_ERROR static const struct { u8 command; const char *text; } cmd_descr[] = { { ATA_CMD_DEV_RESET, "DEVICE RESET" }, { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, { ATA_CMD_STANDBY, "STANDBY" }, { ATA_CMD_IDLE, "IDLE" }, { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, { ATA_CMD_NOP, "NOP" }, { ATA_CMD_FLUSH, "FLUSH CACHE" }, { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, { ATA_CMD_SERVICE, "SERVICE" }, { ATA_CMD_READ, "READ DMA" }, { ATA_CMD_READ_EXT, "READ DMA EXT" }, { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, { ATA_CMD_WRITE, "WRITE DMA" }, { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" }, { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, { ATA_CMD_SET_FEATURES, "SET FEATURES" }, { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, { ATA_CMD_SLEEP, "SLEEP" }, { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, { ATA_CMD_PMP_READ, "READ BUFFER" }, { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, { ATA_CMD_SMART, "SMART" }, { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, { ATA_CMD_RESTORE, "RECALIBRATE" }, { 0, NULL } /* terminate list */ }; unsigned int i; for (i = 0; cmd_descr[i].text; i++) if (cmd_descr[i].command == command) return cmd_descr[i].text; #endif return "unknown"; } EXPORT_SYMBOL_GPL(ata_get_cmd_name); /** * ata_eh_link_report - report error handling to user * @link: ATA link EH is going on * * Report EH to user. * * LOCKING: * None. */ static void ata_eh_link_report(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_queued_cmd *qc; const char *frozen, *desc; char tries_buf[16] = ""; int tag, nr_failed = 0; if (ehc->i.flags & ATA_EHI_QUIET) return; desc = NULL; if (ehc->i.desc[0] != '\0') desc = ehc->i.desc; ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || ata_dev_phys_link(qc->dev) != link || ((qc->flags & ATA_QCFLAG_QUIET) && qc->err_mask == AC_ERR_DEV)) continue; if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) continue; nr_failed++; } if (!nr_failed && !ehc->i.err_mask) return; frozen = ""; if (ata_port_is_frozen(ap)) frozen = " frozen"; if (ap->eh_tries < ATA_EH_MAX_TRIES) snprintf(tries_buf, sizeof(tries_buf), " t%d", ap->eh_tries); if (ehc->i.dev) { ata_dev_err(ehc->i.dev, "exception Emask 0x%x " "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", ehc->i.err_mask, link->sactive, ehc->i.serror, ehc->i.action, frozen, tries_buf); if (desc) ata_dev_err(ehc->i.dev, "%s\n", desc); } else { ata_link_err(link, "exception Emask 0x%x " "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", ehc->i.err_mask, link->sactive, ehc->i.serror, ehc->i.action, frozen, tries_buf); if (desc) ata_link_err(link, "%s\n", desc); } #ifdef CONFIG_ATA_VERBOSE_ERROR if (ehc->i.serror) ata_link_err(link, "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", ehc->i.serror & SERR_DATA ? "UnrecovData " : "", ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", ehc->i.serror & SERR_CRC ? "BadCRC " : "", ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); #endif ata_qc_for_each_raw(ap, qc, tag) { struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; char data_buf[20] = ""; char cdb_buf[70] = ""; if (!(qc->flags & ATA_QCFLAG_EH) || ata_dev_phys_link(qc->dev) != link || !qc->err_mask) continue; if (qc->dma_dir != DMA_NONE) { static const char *dma_str[] = { [DMA_BIDIRECTIONAL] = "bidi", [DMA_TO_DEVICE] = "out", [DMA_FROM_DEVICE] = "in", }; const char *prot_str = NULL; switch (qc->tf.protocol) { case ATA_PROT_UNKNOWN: prot_str = "unknown"; break; case ATA_PROT_NODATA: prot_str = "nodata"; break; case ATA_PROT_PIO: prot_str = "pio"; break; case ATA_PROT_DMA: prot_str = "dma"; break; case ATA_PROT_NCQ: prot_str = "ncq dma"; break; case ATA_PROT_NCQ_NODATA: prot_str = "ncq nodata"; break; case ATAPI_PROT_NODATA: prot_str = "nodata"; break; case ATAPI_PROT_PIO: prot_str = "pio"; break; case ATAPI_PROT_DMA: prot_str = "dma"; break; } snprintf(data_buf, sizeof(data_buf), " %s %u %s", prot_str, qc->nbytes, dma_str[qc->dma_dir]); } if (ata_is_atapi(qc->tf.protocol)) { const u8 *cdb = qc->cdb; size_t cdb_len = qc->dev->cdb_len; if (qc->scsicmd) { cdb = qc->scsicmd->cmnd; cdb_len = qc->scsicmd->cmd_len; } __scsi_format_command(cdb_buf, sizeof(cdb_buf), cdb, cdb_len); } else ata_dev_err(qc->dev, "failed command: %s\n", ata_get_cmd_name(cmd->command)); ata_dev_err(qc->dev, "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " "tag %d%s\n %s" "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " "Emask 0x%x (%s)%s\n", cmd->command, cmd->feature, cmd->nsect, cmd->lbal, cmd->lbam, cmd->lbah, cmd->hob_feature, cmd->hob_nsect, cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, cmd->device, qc->tag, data_buf, cdb_buf, res->status, res->error, res->nsect, res->lbal, res->lbam, res->lbah, res->hob_feature, res->hob_nsect, res->hob_lbal, res->hob_lbam, res->hob_lbah, res->device, qc->err_mask, ata_err_string(qc->err_mask), qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_SENSE | ATA_ERR)) { if (res->status & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", res->status & ATA_DRDY ? "DRDY " : "", res->status & ATA_DF ? "DF " : "", res->status & ATA_DRQ ? "DRQ " : "", res->status & ATA_SENSE ? "SENSE " : "", res->status & ATA_ERR ? "ERR " : ""); } if (cmd->command != ATA_CMD_PACKET && (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF | ATA_ABORTED))) ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", res->error & ATA_ICRC ? "ICRC " : "", res->error & ATA_UNC ? "UNC " : "", res->error & ATA_AMNF ? "AMNF " : "", res->error & ATA_IDNF ? "IDNF " : "", res->error & ATA_ABORTED ? "ABRT " : ""); #endif } } /** * ata_eh_report - report error handling to user * @ap: ATA port to report EH about * * Report EH to user. * * LOCKING: * None. */ void ata_eh_report(struct ata_port *ap) { struct ata_link *link; ata_for_each_link(link, ap, HOST_FIRST) ata_eh_link_report(link); } static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, unsigned int *classes, unsigned long deadline, bool clear_classes) { struct ata_device *dev; if (clear_classes) ata_for_each_dev(dev, link, ALL) classes[dev->devno] = ATA_DEV_UNKNOWN; return reset(link, classes, deadline); } static bool ata_eh_followup_srst_needed(struct ata_link *link, int rc) { if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) return false; if (rc == -EAGAIN) return true; if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) return true; return false; } int ata_eh_reset(struct ata_link *link, int classify, struct ata_reset_operations *reset_ops) { struct ata_port *ap = link->ap; struct ata_link *slave = ap->slave_link; struct ata_eh_context *ehc = &link->eh_context; struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; ata_reset_fn_t hardreset = reset_ops->hardreset; ata_reset_fn_t softreset = reset_ops->softreset; ata_prereset_fn_t prereset = reset_ops->prereset; ata_postreset_fn_t postreset = reset_ops->postreset; unsigned int *classes = ehc->classes; unsigned int lflags = link->flags; int verbose = !(ehc->i.flags & ATA_EHI_QUIET); int max_tries = 0, try = 0; struct ata_link *failed_link; struct ata_device *dev; unsigned long deadline, now; ata_reset_fn_t reset; unsigned long flags; u32 sstatus; int nr_unknown, rc; /* * Prepare to reset */ while (ata_eh_reset_timeouts[max_tries] != UINT_MAX) max_tries++; if (link->flags & ATA_LFLAG_RST_ONCE) max_tries = 1; if (link->flags & ATA_LFLAG_NO_HRST) hardreset = NULL; if (link->flags & ATA_LFLAG_NO_SRST) softreset = NULL; /* make sure each reset attempt is at least COOL_DOWN apart */ if (ehc->i.flags & ATA_EHI_DID_RESET) { now = jiffies; WARN_ON(time_after(ehc->last_reset, now)); deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN); if (time_before(now, deadline)) schedule_timeout_uninterruptible(deadline - now); } spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_RESETTING; spin_unlock_irqrestore(ap->lock, flags); ata_eh_about_to_do(link, NULL, ATA_EH_RESET); ata_for_each_dev(dev, link, ALL) { /* If we issue an SRST then an ATA drive (not ATAPI) * may change configuration and be in PIO0 timing. If * we do a hard reset (or are coming from power on) * this is true for ATA or ATAPI. Until we've set a * suitable controller mode we should not touch the * bus as we may be talking too fast. */ dev->pio_mode = XFER_PIO_0; dev->dma_mode = 0xff; /* If the controller has a pio mode setup function * then use it to set the chipset to rights. Don't * touch the DMA setup as that will be dealt with when * configuring devices. */ if (ap->ops->set_piomode) ap->ops->set_piomode(ap, dev); } /* prefer hardreset */ reset = NULL; ehc->i.action &= ~ATA_EH_RESET; if (hardreset) { reset = hardreset; ehc->i.action |= ATA_EH_HARDRESET; } else if (softreset) { reset = softreset; ehc->i.action |= ATA_EH_SOFTRESET; } if (prereset) { unsigned long deadline = ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT); if (slave) { sehc->i.action &= ~ATA_EH_RESET; sehc->i.action |= ehc->i.action; } rc = prereset(link, deadline); /* If present, do prereset on slave link too. Reset * is skipped iff both master and slave links report * -ENOENT or clear ATA_EH_RESET. */ if (slave && (rc == 0 || rc == -ENOENT)) { int tmp; tmp = prereset(slave, deadline); if (tmp != -ENOENT) rc = tmp; ehc->i.action |= sehc->i.action; } if (rc) { if (rc == -ENOENT) { ata_link_dbg(link, "port disabled--ignoring\n"); ehc->i.action &= ~ATA_EH_RESET; ata_for_each_dev(dev, link, ALL) classes[dev->devno] = ATA_DEV_NONE; rc = 0; } else ata_link_err(link, "prereset failed (errno=%d)\n", rc); goto out; } /* prereset() might have cleared ATA_EH_RESET. If so, * bang classes, thaw and return. */ if (reset && !(ehc->i.action & ATA_EH_RESET)) { ata_for_each_dev(dev, link, ALL) classes[dev->devno] = ATA_DEV_NONE; if (ata_port_is_frozen(ap) && ata_is_host_link(link)) ata_eh_thaw_port(ap); rc = 0; goto out; } } retry: /* * Perform reset */ if (ata_is_host_link(link)) ata_eh_freeze_port(ap); deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); if (reset) { if (verbose) ata_link_info(link, "%s resetting link\n", reset == softreset ? "soft" : "hard"); /* mark that this EH session started with reset */ ehc->last_reset = jiffies; if (reset == hardreset) { ehc->i.flags |= ATA_EHI_DID_HARDRESET; trace_ata_link_hardreset_begin(link, classes, deadline); } else { ehc->i.flags |= ATA_EHI_DID_SOFTRESET; trace_ata_link_softreset_begin(link, classes, deadline); } rc = ata_do_reset(link, reset, classes, deadline, true); if (reset == hardreset) trace_ata_link_hardreset_end(link, classes, rc); else trace_ata_link_softreset_end(link, classes, rc); if (rc && rc != -EAGAIN) { failed_link = link; goto fail; } /* hardreset slave link if existent */ if (slave && reset == hardreset) { int tmp; if (verbose) ata_link_info(slave, "hard resetting link\n"); ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); trace_ata_slave_hardreset_begin(slave, classes, deadline); tmp = ata_do_reset(slave, reset, classes, deadline, false); trace_ata_slave_hardreset_end(slave, classes, tmp); switch (tmp) { case -EAGAIN: rc = -EAGAIN; break; case 0: break; default: failed_link = slave; rc = tmp; goto fail; } } /* perform follow-up SRST if necessary */ if (reset == hardreset && ata_eh_followup_srst_needed(link, rc)) { reset = softreset; if (!reset) { ata_link_err(link, "follow-up softreset required but no softreset available\n"); failed_link = link; rc = -EINVAL; goto fail; } ata_eh_about_to_do(link, NULL, ATA_EH_RESET); trace_ata_link_softreset_begin(link, classes, deadline); rc = ata_do_reset(link, reset, classes, deadline, true); trace_ata_link_softreset_end(link, classes, rc); if (rc) { failed_link = link; goto fail; } } } else { if (verbose) ata_link_info(link, "no reset method available, skipping reset\n"); if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) lflags |= ATA_LFLAG_ASSUME_ATA; } /* * Post-reset processing */ ata_for_each_dev(dev, link, ALL) { /* After the reset, the device state is PIO 0 and the * controller state is undefined. Reset also wakes up * drives from sleeping mode. */ dev->pio_mode = XFER_PIO_0; dev->flags &= ~ATA_DFLAG_SLEEPING; if (ata_phys_link_offline(ata_dev_phys_link(dev))) continue; /* apply class override */ if (lflags & ATA_LFLAG_ASSUME_ATA) classes[dev->devno] = ATA_DEV_ATA; else if (lflags & ATA_LFLAG_ASSUME_SEMB) classes[dev->devno] = ATA_DEV_SEMB_UNSUP; } /* record current link speed */ if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) link->sata_spd = (sstatus >> 4) & 0xf; if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) slave->sata_spd = (sstatus >> 4) & 0xf; /* thaw the port */ if (ata_is_host_link(link)) ata_eh_thaw_port(ap); /* postreset() should clear hardware SError. Although SError * is cleared during link resume, clearing SError here is * necessary as some PHYs raise hotplug events after SRST. * This introduces race condition where hotplug occurs between * reset and here. This race is mediated by cross checking * link onlineness and classification result later. */ if (postreset) { postreset(link, classes); trace_ata_link_postreset(link, classes, rc); if (slave) { postreset(slave, classes); trace_ata_slave_postreset(slave, classes, rc); } } /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); link->eh_info.serror = 0; if (slave) slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); /* * Make sure onlineness and classification result correspond. * Hotplug could have happened during reset and some * controllers fail to wait while a drive is spinning up after * being hotplugged causing misdetection. By cross checking * link on/offlineness and classification result, those * conditions can be reliably detected and retried. */ nr_unknown = 0; ata_for_each_dev(dev, link, ALL) { if (ata_phys_link_online(ata_dev_phys_link(dev))) { if (classes[dev->devno] == ATA_DEV_UNKNOWN) { ata_dev_dbg(dev, "link online but device misclassified\n"); classes[dev->devno] = ATA_DEV_NONE; nr_unknown++; } } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { if (ata_class_enabled(classes[dev->devno])) ata_dev_dbg(dev, "link offline, clearing class %d to NONE\n", classes[dev->devno]); classes[dev->devno] = ATA_DEV_NONE; } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { ata_dev_dbg(dev, "link status unknown, clearing UNKNOWN to NONE\n"); classes[dev->devno] = ATA_DEV_NONE; } } if (classify && nr_unknown) { if (try < max_tries) { ata_link_warn(link, "link online but %d devices misclassified, retrying\n", nr_unknown); failed_link = link; rc = -EAGAIN; goto fail; } ata_link_warn(link, "link online but %d devices misclassified, " "device detection might fail\n", nr_unknown); } /* reset successful, schedule revalidation */ ata_eh_done(link, NULL, ATA_EH_RESET); if (slave) ata_eh_done(slave, NULL, ATA_EH_RESET); ehc->last_reset = jiffies; /* update to completion time */ ehc->i.action |= ATA_EH_REVALIDATE; link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ rc = 0; out: /* clear hotplug flag */ ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; if (slave) sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_RESETTING; spin_unlock_irqrestore(ap->lock, flags); return rc; fail: /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ if (!ata_is_host_link(link) && sata_scr_read(link, SCR_STATUS, &sstatus)) rc = -ERESTART; if (try >= max_tries) { /* * Thaw host port even if reset failed, so that the port * can be retried on the next phy event. This risks * repeated EH runs but seems to be a better tradeoff than * shutting down a port after a botched hotplug attempt. */ if (ata_is_host_link(link)) ata_eh_thaw_port(ap); ata_link_warn(link, "%s failed\n", reset == hardreset ? "hardreset" : "softreset"); goto out; } now = jiffies; if (time_before(now, deadline)) { unsigned long delta = deadline - now; ata_link_warn(failed_link, "reset failed (errno=%d), retrying in %u secs\n", rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); ata_eh_release(ap); while (delta) delta = schedule_timeout_uninterruptible(delta); ata_eh_acquire(ap); } /* * While disks spinup behind PMP, some controllers fail sending SRST. * They need to be reset - as well as the PMP - before retrying. */ if (rc == -ERESTART) { if (ata_is_host_link(link)) ata_eh_thaw_port(ap); goto out; } if (try == max_tries - 1) { sata_down_spd_limit(link, 0); if (slave) sata_down_spd_limit(slave, 0); } else if (rc == -EPIPE) sata_down_spd_limit(failed_link, 0); if (hardreset) reset = hardreset; goto retry; } static inline void ata_eh_pull_park_action(struct ata_port *ap) { struct ata_link *link; struct ata_device *dev; unsigned long flags; /* * This function can be thought of as an extended version of * ata_eh_about_to_do() specially crafted to accommodate the * requirements of ATA_EH_PARK handling. Since the EH thread * does not leave the do {} while () loop in ata_eh_recover as * long as the timeout for a park request to *one* device on * the port has not expired, and since we still want to pick * up park requests to other devices on the same port or * timeout updates for the same device, we have to pull * ATA_EH_PARK actions from eh_info into eh_context.i * ourselves at the beginning of each pass over the loop. * * Additionally, all write accesses to &ap->park_req_pending * through reinit_completion() (see below) or complete_all() * (see ata_scsi_park_store()) are protected by the host lock. * As a result we have that park_req_pending.done is zero on * exit from this function, i.e. when ATA_EH_PARK actions for * *all* devices on port ap have been pulled into the * respective eh_context structs. If, and only if, * park_req_pending.done is non-zero by the time we reach * wait_for_completion_timeout(), another ATA_EH_PARK action * has been scheduled for at least one of the devices on port * ap and we have to cycle over the do {} while () loop in * ata_eh_recover() again. */ spin_lock_irqsave(ap->lock, flags); reinit_completion(&ap->park_req_pending); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { struct ata_eh_info *ehi = &link->eh_info; link->eh_context.i.dev_action[dev->devno] |= ehi->dev_action[dev->devno] & ATA_EH_PARK; ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); } } spin_unlock_irqrestore(ap->lock, flags); } static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) { struct ata_eh_context *ehc = &dev->link->eh_context; struct ata_taskfile tf; unsigned int err_mask; ata_tf_init(dev, &tf); if (park) { ehc->unloaded_mask |= 1 << dev->devno; tf.command = ATA_CMD_IDLEIMMEDIATE; tf.feature = 0x44; tf.lbal = 0x4c; tf.lbam = 0x4e; tf.lbah = 0x55; } else { ehc->unloaded_mask &= ~(1 << dev->devno); tf.command = ATA_CMD_CHK_POWER; } tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; tf.protocol = ATA_PROT_NODATA; err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); if (park && (err_mask || tf.lbal != 0xc4)) { ata_dev_err(dev, "head unload failed!\n"); ehc->unloaded_mask &= ~(1 << dev->devno); } } static int ata_eh_revalidate_and_attach(struct ata_link *link, struct ata_device **r_failed_dev) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev; unsigned int new_mask = 0; unsigned long flags; int rc = 0; /* For PATA drive side cable detection to work, IDENTIFY must * be done backwards such that PDIAG- is released by the slave * device before the master device is identified. */ ata_for_each_dev(dev, link, ALL_REVERSE) { unsigned int action = ata_eh_dev_action(dev); unsigned int readid_flags = 0; if (ehc->i.flags & ATA_EHI_DID_RESET) readid_flags |= ATA_READID_POSTRESET; if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { WARN_ON(dev->class == ATA_DEV_PMP); /* * The link may be in a deep sleep, wake it up. * * If the link is in deep sleep, ata_phys_link_offline() * will return true, causing the revalidation to fail, * which leads to a (potentially) needless hard reset. * * ata_eh_recover() will later restore the link policy * to ap->target_lpm_policy after revalidation is done. */ if (link->lpm_policy > ATA_LPM_MAX_POWER) { rc = ata_eh_link_set_lpm(link, ATA_LPM_MAX_POWER, r_failed_dev); if (rc) goto err; } if (!ata_eh_link_established(ata_dev_phys_link(dev))) { rc = -EIO; goto err; } ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], readid_flags); if (rc) goto err; ata_eh_done(link, dev, ATA_EH_REVALIDATE); /* Configuration may have changed, reconfigure * transfer mode. */ ehc->i.flags |= ATA_EHI_SETMODE; /* schedule the scsi_rescan_device() here */ schedule_delayed_work(&ap->scsi_rescan_task, 0); } else if (dev->class == ATA_DEV_UNKNOWN && ehc->tries[dev->devno] && ata_class_enabled(ehc->classes[dev->devno])) { /* Temporarily set dev->class, it will be * permanently set once all configurations are * complete. This is necessary because new * device configuration is done in two * separate loops. */ dev->class = ehc->classes[dev->devno]; if (dev->class == ATA_DEV_PMP) rc = sata_pmp_attach(dev); else rc = ata_dev_read_id(dev, &dev->class, readid_flags, dev->id); /* read_id might have changed class, store and reset */ ehc->classes[dev->devno] = dev->class; dev->class = ATA_DEV_UNKNOWN; switch (rc) { case 0: /* clear error info accumulated during probe */ ata_ering_clear(&dev->ering); new_mask |= 1 << dev->devno; break; case -ENOENT: /* IDENTIFY was issued to non-existent * device. No need to reset. Just * thaw and ignore the device. */ ata_eh_thaw_port(ap); break; default: goto err; } } } /* PDIAG- should have been released, ask cable type if post-reset */ if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { if (ap->ops->cable_detect) ap->cbl = ap->ops->cable_detect(ap); ata_force_cbl(ap); } /* Configure new devices forward such that user doesn't see * device detection messages backwards. */ ata_for_each_dev(dev, link, ALL) { if (!(new_mask & (1 << dev->devno))) continue; dev->class = ehc->classes[dev->devno]; if (dev->class == ATA_DEV_PMP) continue; ehc->i.flags |= ATA_EHI_PRINTINFO; rc = ata_dev_configure(dev); ehc->i.flags &= ~ATA_EHI_PRINTINFO; if (rc) { dev->class = ATA_DEV_UNKNOWN; goto err; } spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; spin_unlock_irqrestore(ap->lock, flags); /* new device discovered, configure xfermode */ ehc->i.flags |= ATA_EHI_SETMODE; } return 0; err: dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } /** * ata_eh_set_mode - Program timings and issue SET FEATURES - XFER * @link: link on which timings will be programmed * @r_failed_dev: out parameter for failed device * * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If * ata_eh_set_mode() fails, pointer to the failing device is * returned in @r_failed_dev. * * LOCKING: * PCI/etc. bus probe sem. * * RETURNS: * 0 on success, negative errno otherwise */ static int ata_eh_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) { struct ata_port *ap = link->ap; struct ata_device *dev; int rc; /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ ata_for_each_dev(dev, link, ENABLED) { if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { struct ata_ering_entry *ent; ent = ata_ering_top(&dev->ering); if (ent) ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; } } /* has private set_mode? */ if (ap->ops->set_mode) rc = ap->ops->set_mode(link, r_failed_dev); else rc = ata_set_mode(link, r_failed_dev); /* if transfer mode has changed, set DUBIOUS_XFER on device */ ata_for_each_dev(dev, link, ENABLED) { struct ata_eh_context *ehc = &link->eh_context; u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); if (dev->xfer_mode != saved_xfer_mode || ata_ncq_enabled(dev) != saved_ncq) dev->flags |= ATA_DFLAG_DUBIOUS_XFER; } return rc; } /** * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset * @dev: ATAPI device to clear UA for * * Resets and other operations can make an ATAPI device raise * UNIT ATTENTION which causes the next operation to fail. This * function clears UA. * * LOCKING: * EH context (may sleep). * * RETURNS: * 0 on success, -errno on failure. */ static int atapi_eh_clear_ua(struct ata_device *dev) { int i; for (i = 0; i < ATA_EH_UA_TRIES; i++) { u8 *sense_buffer = dev->sector_buf; u8 sense_key = 0; unsigned int err_mask; err_mask = atapi_eh_tur(dev, &sense_key); if (err_mask != 0 && err_mask != AC_ERR_DEV) { ata_dev_warn(dev, "TEST_UNIT_READY failed (err_mask=0x%x)\n", err_mask); return -EIO; } if (!err_mask || sense_key != UNIT_ATTENTION) return 0; err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); if (err_mask) { ata_dev_warn(dev, "failed to clear " "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); return -EIO; } } ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); return 0; } /** * ata_eh_maybe_retry_flush - Retry FLUSH if necessary * @dev: ATA device which may need FLUSH retry * * If @dev failed FLUSH, it needs to be reported upper layer * immediately as it means that @dev failed to remap and already * lost at least a sector and further FLUSH retrials won't make * any difference to the lost sector. However, if FLUSH failed * for other reasons, for example transmission error, FLUSH needs * to be retried. * * This function determines whether FLUSH failure retry is * necessary and performs it if so. * * RETURNS: * 0 if EH can continue, -errno if EH needs to be repeated. */ static int ata_eh_maybe_retry_flush(struct ata_device *dev) { struct ata_link *link = dev->link; struct ata_port *ap = link->ap; struct ata_queued_cmd *qc; struct ata_taskfile tf; unsigned int err_mask; int rc = 0; /* did flush fail for this device? */ if (!ata_tag_valid(link->active_tag)) return 0; qc = __ata_qc_from_tag(ap, link->active_tag); if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && qc->tf.command != ATA_CMD_FLUSH)) return 0; /* if the device failed it, it should be reported to upper layers */ if (qc->err_mask & AC_ERR_DEV) return 0; /* flush failed for some other reason, give it another shot */ ata_tf_init(dev, &tf); tf.command = qc->tf.command; tf.flags |= ATA_TFLAG_DEVICE; tf.protocol = ATA_PROT_NODATA; ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", tf.command, qc->err_mask); err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); if (!err_mask) { /* * FLUSH is complete but there's no way to * successfully complete a failed command from EH. * Making sure retry is allowed at least once and * retrying it should do the trick - whatever was in * the cache is already on the platter and this won't * cause infinite loop. */ qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); } else { ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", err_mask); rc = -EIO; /* if device failed it, report it to upper layers */ if (err_mask & AC_ERR_DEV) { qc->err_mask |= AC_ERR_DEV; qc->result_tf = tf; if (!ata_port_is_frozen(ap)) rc = 0; } } return rc; } int ata_link_nr_enabled(struct ata_link *link) { struct ata_device *dev; int cnt = 0; ata_for_each_dev(dev, link, ENABLED) cnt++; return cnt; } static int ata_link_nr_vacant(struct ata_link *link) { struct ata_device *dev; int cnt = 0; ata_for_each_dev(dev, link, ALL) if (dev->class == ATA_DEV_UNKNOWN) cnt++; return cnt; } static int ata_eh_skip_recovery(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev; /* skip disabled links */ if (link->flags & ATA_LFLAG_DISABLED) return 1; /* skip if explicitly requested */ if (ehc->i.flags & ATA_EHI_NO_RECOVERY) return 1; /* thaw frozen port and recover failed devices */ if (ata_port_is_frozen(ap) || ata_link_nr_enabled(link)) return 0; /* reset at least once if reset is requested */ if ((ehc->i.action & ATA_EH_RESET) && !(ehc->i.flags & ATA_EHI_DID_RESET)) return 0; /* skip if class codes for all vacant slots are ATA_DEV_NONE */ ata_for_each_dev(dev, link, ALL) { if (dev->class == ATA_DEV_UNKNOWN && ehc->classes[dev->devno] != ATA_DEV_NONE) return 0; } return 1; } static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) { u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); u64 now = get_jiffies_64(); int *trials = void_arg; if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < now - min(now, interval))) return -1; (*trials)++; return 0; } static int ata_eh_schedule_probe(struct ata_device *dev) { struct ata_eh_context *ehc = &dev->link->eh_context; struct ata_link *link = ata_dev_phys_link(dev); int trials = 0; if (!(ehc->i.probe_mask & (1 << dev->devno)) || (ehc->did_probe_mask & (1 << dev->devno))) return 0; ata_eh_detach_dev(dev); ata_dev_init(dev); ehc->did_probe_mask |= (1 << dev->devno); ehc->i.action |= ATA_EH_RESET; ehc->saved_xfer_mode[dev->devno] = 0; ehc->saved_ncq_enabled &= ~(1 << dev->devno); /* the link maybe in a deep sleep, wake it up */ if (link->lpm_policy > ATA_LPM_MAX_POWER) { if (ata_is_host_link(link)) link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, ATA_LPM_EMPTY); else sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, ATA_LPM_EMPTY); } /* Record and count probe trials on the ering. The specific * error mask used is irrelevant. Because a successful device * detection clears the ering, this count accumulates only if * there are consecutive failed probes. * * If the count is equal to or higher than ATA_EH_PROBE_TRIALS * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is * forced to 1.5Gbps. * * This is to work around cases where failed link speed * negotiation results in device misdetection leading to * infinite DEVXCHG or PHRDY CHG events. */ ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); if (trials > ATA_EH_PROBE_TRIALS) sata_down_spd_limit(link, 1); return 1; } static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) { struct ata_eh_context *ehc = &dev->link->eh_context; /* -EAGAIN from EH routine indicates retry without prejudice. * The requester is responsible for ensuring forward progress. */ if (err != -EAGAIN) ehc->tries[dev->devno]--; switch (err) { case -ENODEV: /* device missing or wrong IDENTIFY data, schedule probing */ ehc->i.probe_mask |= (1 << dev->devno); fallthrough; case -EINVAL: /* give it just one more chance */ ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); fallthrough; case -EIO: if (ehc->tries[dev->devno] == 1) { /* This is the last chance, better to slow * down than lose it. */ sata_down_spd_limit(ata_dev_phys_link(dev), 0); if (dev->pio_mode > XFER_PIO_0) ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); } } if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { /* disable device if it has used up all its chances */ ata_dev_disable(dev); /* detach if offline */ if (ata_phys_link_offline(ata_dev_phys_link(dev))) ata_eh_detach_dev(dev); /* schedule probe if necessary */ if (ata_eh_schedule_probe(dev)) { ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; memset(ehc->cmd_timeout_idx[dev->devno], 0, sizeof(ehc->cmd_timeout_idx[dev->devno])); } return 1; } else { ehc->i.action |= ATA_EH_RESET; return 0; } } /** * ata_eh_recover - recover host port after error * @ap: host port to recover * @reset_ops: The set of reset operations to use * @r_failed_link: out parameter for failed link * * This is the alpha and omega, eum and yang, heart and soul of * libata exception handling. On entry, actions required to * recover each link and hotplug requests are recorded in the * link's eh_context. This function executes all the operations * with appropriate retrials and fallbacks to resurrect failed * devices, detach goners and greet newcomers. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, -errno on failure. */ int ata_eh_recover(struct ata_port *ap, struct ata_reset_operations *reset_ops, struct ata_link **r_failed_link) { struct ata_link *link; struct ata_device *dev; int rc, nr_fails; unsigned long flags, deadline; /* prep for recovery */ ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; /* re-enable link? */ if (ehc->i.action & ATA_EH_ENABLE_LINK) { ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); spin_lock_irqsave(ap->lock, flags); link->flags &= ~ATA_LFLAG_DISABLED; spin_unlock_irqrestore(ap->lock, flags); ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); } ata_for_each_dev(dev, link, ALL) { if (link->flags & ATA_LFLAG_NO_RETRY) ehc->tries[dev->devno] = 1; else ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; /* collect port action mask recorded in dev actions */ ehc->i.action |= ehc->i.dev_action[dev->devno] & ~ATA_EH_PERDEV_MASK; ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; /* process hotplug request */ if (dev->flags & ATA_DFLAG_DETACH) ata_eh_detach_dev(dev); /* schedule probe if necessary */ if (!ata_dev_enabled(dev)) ata_eh_schedule_probe(dev); } } retry: rc = 0; /* if UNLOADING, finish immediately */ if (ap->pflags & ATA_PFLAG_UNLOADING) goto out; /* prep for EH */ ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; /* skip EH if possible. */ if (ata_eh_skip_recovery(link)) ehc->i.action = 0; ata_for_each_dev(dev, link, ALL) ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; } /* reset */ ata_for_each_link(link, ap, EDGE) { struct ata_eh_context *ehc = &link->eh_context; if (!(ehc->i.action & ATA_EH_RESET)) continue; rc = ata_eh_reset(link, ata_link_nr_vacant(link), reset_ops); if (rc) { ata_link_err(link, "reset failed, giving up\n"); goto out; } } do { unsigned long now; /* * clears ATA_EH_PARK in eh_info and resets * ap->park_req_pending */ ata_eh_pull_park_action(ap); deadline = jiffies; ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { struct ata_eh_context *ehc = &link->eh_context; unsigned long tmp; if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) continue; if (!(ehc->i.dev_action[dev->devno] & ATA_EH_PARK)) continue; tmp = dev->unpark_deadline; if (time_before(deadline, tmp)) deadline = tmp; else if (time_before_eq(tmp, jiffies)) continue; if (ehc->unloaded_mask & (1 << dev->devno)) continue; ata_eh_park_issue_cmd(dev, 1); } } now = jiffies; if (time_before_eq(deadline, now)) break; ata_eh_release(ap); deadline = wait_for_completion_timeout(&ap->park_req_pending, deadline - now); ata_eh_acquire(ap); } while (deadline); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { if (!(link->eh_context.unloaded_mask & (1 << dev->devno))) continue; ata_eh_park_issue_cmd(dev, 0); ata_eh_done(link, dev, ATA_EH_PARK); } } /* the rest */ nr_fails = 0; ata_for_each_link(link, ap, PMP_FIRST) { struct ata_eh_context *ehc = &link->eh_context; if (sata_pmp_attached(ap) && ata_is_host_link(link)) goto config_lpm; /* revalidate existing devices and attach new ones */ rc = ata_eh_revalidate_and_attach(link, &dev); if (rc) goto rest_fail; /* if PMP got attached, return, pmp EH will take care of it */ if (link->device->class == ATA_DEV_PMP) { ehc->i.action = 0; return 0; } /* configure transfer mode if necessary */ if (ehc->i.flags & ATA_EHI_SETMODE) { rc = ata_eh_set_mode(link, &dev); if (rc) goto rest_fail; ehc->i.flags &= ~ATA_EHI_SETMODE; } /* If reset has been issued, clear UA to avoid * disrupting the current users of the device. */ if (ehc->i.flags & ATA_EHI_DID_RESET) { ata_for_each_dev(dev, link, ALL) { if (dev->class != ATA_DEV_ATAPI) continue; rc = atapi_eh_clear_ua(dev); if (rc) goto rest_fail; if (zpodd_dev_enabled(dev)) zpodd_post_poweron(dev); } } /* * Make sure to transition devices to the active power mode * if needed (e.g. if we were scheduled on system resume). */ ata_for_each_dev(dev, link, ENABLED) { if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) { ata_dev_power_set_active(dev); ata_eh_done(link, dev, ATA_EH_SET_ACTIVE); } } /* retry flush if necessary */ ata_for_each_dev(dev, link, ALL) { if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) continue; rc = ata_eh_maybe_retry_flush(dev); if (rc) goto rest_fail; } config_lpm: /* configure link power saving */ if (link->lpm_policy != ap->target_lpm_policy) { rc = ata_eh_link_set_lpm(link, ap->target_lpm_policy, &dev); if (rc) goto rest_fail; } /* this link is okay now */ ehc->i.flags = 0; continue; rest_fail: nr_fails++; if (dev) ata_eh_handle_dev_fail(dev, rc); if (ata_port_is_frozen(ap)) { /* PMP reset requires working host port. * Can't retry if it's frozen. */ if (sata_pmp_attached(ap)) goto out; break; } } if (nr_fails) goto retry; out: if (rc && r_failed_link) *r_failed_link = link; return rc; } /** * ata_eh_finish - finish up EH * @ap: host port to finish EH for * * Recovery is complete. Clean up EH states and retry or finish * failed qcs. * * LOCKING: * None. */ void ata_eh_finish(struct ata_port *ap) { struct ata_queued_cmd *qc; int tag; /* retry or finish qcs */ ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH)) continue; if (qc->err_mask) { /* FIXME: Once EH migration is complete, * generate sense data in this function, * considering both err_mask and tf. */ if (qc->flags & ATA_QCFLAG_RETRY) { /* * Since qc->err_mask is set, ata_eh_qc_retry() * will not increment scmd->allowed, so upper * layer will only retry the command if it has * not already been retried too many times. */ ata_eh_qc_retry(qc); } else { ata_eh_qc_complete(qc); } } else { if (qc->flags & ATA_QCFLAG_SENSE_VALID || qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) { ata_eh_qc_complete(qc); } else { /* feed zero TF to sense generation */ memset(&qc->result_tf, 0, sizeof(qc->result_tf)); /* * Since qc->err_mask is not set, * ata_eh_qc_retry() will increment * scmd->allowed, so upper layer is guaranteed * to retry the command. */ ata_eh_qc_retry(qc); } } } /* make sure nr_active_links is zero after EH */ WARN_ON(ap->nr_active_links); ap->nr_active_links = 0; } /** * ata_std_error_handler - standard error handler * @ap: host port to handle error for * * Perform standard error handling sequence. * * LOCKING: * Kernel thread context (may sleep). */ void ata_std_error_handler(struct ata_port *ap) { struct ata_reset_operations *reset_ops = &ap->ops->reset; struct ata_link *link = &ap->link; int rc; /* Ignore built-in hardresets if SCR access is not available */ if ((reset_ops->hardreset == sata_std_hardreset || reset_ops->hardreset == sata_sff_hardreset) && !sata_scr_valid(link)) link->flags |= ATA_LFLAG_NO_HRST; ata_eh_autopsy(ap); ata_eh_report(ap); rc = ata_eh_recover(ap, reset_ops, NULL); if (rc) { struct ata_device *dev; ata_for_each_dev(dev, link, ALL) ata_dev_disable(dev); } ata_eh_finish(ap); } EXPORT_SYMBOL_GPL(ata_std_error_handler); #ifdef CONFIG_PM /** * ata_eh_handle_port_suspend - perform port suspend operation * @ap: port to suspend * * Suspend @ap. * * LOCKING: * Kernel thread context (may sleep). */ static void ata_eh_handle_port_suspend(struct ata_port *ap) { unsigned long flags; int rc = 0; struct ata_device *dev; struct ata_link *link; /* are we suspending? */ spin_lock_irqsave(ap->lock, flags); if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || ap->pm_mesg.event & PM_EVENT_RESUME) { spin_unlock_irqrestore(ap->lock, flags); return; } spin_unlock_irqrestore(ap->lock, flags); WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); /* * We will reach this point for all of the PM events: * PM_EVENT_SUSPEND (if runtime pm, PM_EVENT_AUTO will also be set) * PM_EVENT_FREEZE, and PM_EVENT_HIBERNATE. * * We do not want to perform disk spin down for PM_EVENT_FREEZE. * (Spin down will be performed by the subsequent PM_EVENT_HIBERNATE.) */ if (!(ap->pm_mesg.event & PM_EVENT_FREEZE)) { /* Set all devices attached to the port in standby mode */ ata_for_each_link(link, ap, HOST_FIRST) { ata_for_each_dev(dev, link, ENABLED) ata_dev_power_set_standby(dev); } } /* * If we have a ZPODD attached, check its zero * power ready status before the port is frozen. * Only needed for runtime suspend. */ if (PMSG_IS_AUTO(ap->pm_mesg)) { ata_for_each_dev(dev, &ap->link, ENABLED) { if (zpodd_dev_enabled(dev)) zpodd_on_suspend(dev); } } /* suspend */ ata_eh_freeze_port(ap); if (ap->ops->port_suspend) rc = ap->ops->port_suspend(ap, ap->pm_mesg); ata_acpi_set_state(ap, ap->pm_mesg); /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_PM_PENDING; if (rc == 0) ap->pflags |= ATA_PFLAG_SUSPENDED; else if (ata_port_is_frozen(ap)) ata_port_schedule_eh(ap); spin_unlock_irqrestore(ap->lock, flags); return; } /** * ata_eh_handle_port_resume - perform port resume operation * @ap: port to resume * * Resume @ap. * * LOCKING: * Kernel thread context (may sleep). */ static void ata_eh_handle_port_resume(struct ata_port *ap) { struct ata_link *link; struct ata_device *dev; unsigned long flags; /* are we resuming? */ spin_lock_irqsave(ap->lock, flags); if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || !(ap->pm_mesg.event & PM_EVENT_RESUME)) { spin_unlock_irqrestore(ap->lock, flags); return; } spin_unlock_irqrestore(ap->lock, flags); WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); /* * Error timestamps are in jiffies which doesn't run while * suspended and PHY events during resume isn't too uncommon. * When the two are combined, it can lead to unnecessary speed * downs if the machine is suspended and resumed repeatedly. * Clear error history. */ ata_for_each_link(link, ap, HOST_FIRST) ata_for_each_dev(dev, link, ALL) ata_ering_clear(&dev->ering); ata_acpi_set_state(ap, ap->pm_mesg); if (ap->ops->port_resume) ap->ops->port_resume(ap); /* tell ACPI that we're resuming */ ata_acpi_on_resume(ap); /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); ap->pflags |= ATA_PFLAG_RESUMING; spin_unlock_irqrestore(ap->lock, flags); } #endif /* CONFIG_PM */
3 3 3 2 2 1 1 1 1 1 1 3 3 2 2 2 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 // SPDX-License-Identifier: GPL-2.0-or-later /* ----------------------------------------------------------------------- * * * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved * Copyright 2009 Intel Corporation; author: H. Peter Anvin * * ----------------------------------------------------------------------- */ /* * x86 MSR access device * * This device is accessed by lseek() to the appropriate register number * and then read/write in chunks of 8 bytes. A larger size means multiple * reads or writes of the same register. * * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on * an SMP box will direct the access to CPU %d. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/smp.h> #include <linux/major.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/uaccess.h> #include <linux/gfp.h> #include <linux/security.h> #include <asm/cpufeature.h> #include <asm/msr.h> static enum cpuhp_state cpuhp_msr_state; enum allow_write_msrs { MSR_WRITES_ON, MSR_WRITES_OFF, MSR_WRITES_DEFAULT, }; static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT; static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; u32 reg = *ppos; int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); if (err) break; if (copy_to_user(tmp, &data, 8)) { err = -EFAULT; break; } tmp += 2; bytes += 8; } return bytes ? bytes : err; } static int filter_write(u32 reg) { /* * MSRs writes usually happen all at once, and can easily saturate kmsg. * Only allow one message every 30 seconds. * * It's possible to be smarter here and do it (for example) per-MSR, but * it would certainly be more complex, and this is enough at least to * avoid saturating the ring buffer. */ static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1); switch (allow_writes) { case MSR_WRITES_ON: return 0; case MSR_WRITES_OFF: return -EPERM; default: break; } if (!__ratelimit(&fw_rs)) return 0; pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n", reg, current->comm, current->pid); pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n"); return 0; } static ssize_t msr_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { const u32 __user *tmp = (const u32 __user *)buf; u32 data[2]; u32 reg = *ppos; int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; err = security_locked_down(LOCKDOWN_MSR); if (err) return err; err = filter_write(reg); if (err) return err; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { if (copy_from_user(&data, tmp, 8)) { err = -EFAULT; break; } add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); if (err) break; tmp += 2; bytes += 8; } return bytes ? bytes : err; } static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) { u32 __user *uregs = (u32 __user *)arg; u32 regs[8]; int cpu = iminor(file_inode(file)); int err; switch (ioc) { case X86_IOC_RDMSR_REGS: if (!(file->f_mode & FMODE_READ)) { err = -EBADF; break; } if (copy_from_user(&regs, uregs, sizeof(regs))) { err = -EFAULT; break; } err = rdmsr_safe_regs_on_cpu(cpu, regs); if (err) break; if (copy_to_user(uregs, &regs, sizeof(regs))) err = -EFAULT; break; case X86_IOC_WRMSR_REGS: if (!(file->f_mode & FMODE_WRITE)) { err = -EBADF; break; } if (copy_from_user(&regs, uregs, sizeof(regs))) { err = -EFAULT; break; } err = security_locked_down(LOCKDOWN_MSR); if (err) break; err = filter_write(regs[1]); if (err) return err; add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); err = wrmsr_safe_regs_on_cpu(cpu, regs); if (err) break; if (copy_to_user(uregs, &regs, sizeof(regs))) err = -EFAULT; break; default: err = -ENOTTY; break; } return err; } static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file_inode(file)); struct cpuinfo_x86 *c; if (!capable(CAP_SYS_RAWIO)) return -EPERM; if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) return -EIO; /* MSR not supported */ return 0; } /* * File operations we support */ static const struct file_operations msr_fops = { .owner = THIS_MODULE, .llseek = no_seek_end_llseek, .read = msr_read, .write = msr_write, .open = msr_open, .unlocked_ioctl = msr_ioctl, .compat_ioctl = msr_ioctl, }; static char *msr_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); } static const struct class msr_class = { .name = "msr", .devnode = msr_devnode, }; static int msr_device_create(unsigned int cpu) { struct device *dev; dev = device_create(&msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, "msr%d", cpu); return PTR_ERR_OR_ZERO(dev); } static int msr_device_destroy(unsigned int cpu) { device_destroy(&msr_class, MKDEV(MSR_MAJOR, cpu)); return 0; } static int __init msr_init(void) { int err; if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { pr_err("unable to get major %d for msr\n", MSR_MAJOR); return -EBUSY; } err = class_register(&msr_class); if (err) goto out_chrdev; err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online", msr_device_create, msr_device_destroy); if (err < 0) goto out_class; cpuhp_msr_state = err; return 0; out_class: class_unregister(&msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); return err; } module_init(msr_init); static void __exit msr_exit(void) { cpuhp_remove_state(cpuhp_msr_state); class_unregister(&msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); } module_exit(msr_exit) static int set_allow_writes(const char *val, const struct kernel_param *cp) { /* val is NUL-terminated, see kernfs_fop_write() */ char *s = strstrip((char *)val); if (!strcmp(s, "on")) allow_writes = MSR_WRITES_ON; else if (!strcmp(s, "off")) allow_writes = MSR_WRITES_OFF; else allow_writes = MSR_WRITES_DEFAULT; return 0; } static int get_allow_writes(char *buf, const struct kernel_param *kp) { const char *res; switch (allow_writes) { case MSR_WRITES_ON: res = "on"; break; case MSR_WRITES_OFF: res = "off"; break; default: res = "default"; break; } return sprintf(buf, "%s\n", res); } static const struct kernel_param_ops allow_writes_ops = { .set = set_allow_writes, .get = get_allow_writes }; module_param_cb(allow_writes, &allow_writes_ops, NULL, 0600); MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); MODULE_DESCRIPTION("x86 generic MSR driver"); MODULE_LICENSE("GPL");
5 5 5 5 5 5 5 5 5 1 4 5 1 1 1 1 1 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include <linux/phy.h> #include <linux/phylib_stubs.h> struct linkstate_req_info { struct ethnl_req_info base; }; struct linkstate_reply_data { struct ethnl_reply_data base; int link; int sqi; int sqi_max; struct ethtool_link_ext_stats link_stats; bool link_ext_state_provided; struct ethtool_link_ext_state_info ethtool_link_ext_state_info; }; #define LINKSTATE_REPDATA(__reply_base) \ container_of(__reply_base, struct linkstate_reply_data, base) const struct nla_policy ethnl_linkstate_get_policy[] = { [ETHTOOL_A_LINKSTATE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static int linkstate_get_sqi(struct phy_device *phydev) { int ret; if (!phydev) return -EOPNOTSUPP; mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi) ret = -EOPNOTSUPP; else if (!phydev->link) ret = -ENETDOWN; else ret = phydev->drv->get_sqi(phydev); mutex_unlock(&phydev->lock); return ret; } static int linkstate_get_sqi_max(struct phy_device *phydev) { int ret; if (!phydev) return -EOPNOTSUPP; mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi_max) ret = -EOPNOTSUPP; else if (!phydev->link) ret = -ENETDOWN; else ret = phydev->drv->get_sqi_max(phydev); mutex_unlock(&phydev->lock); return ret; }; static bool linkstate_sqi_critical_error(int sqi) { return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; } static bool linkstate_sqi_valid(struct linkstate_reply_data *data) { return data->sqi >= 0 && data->sqi_max >= 0 && data->sqi <= data->sqi_max; } static int linkstate_get_link_ext_state(struct net_device *dev, struct linkstate_reply_data *data) { int err; if (!dev->ethtool_ops->get_link_ext_state) return -EOPNOTSUPP; err = dev->ethtool_ops->get_link_ext_state(dev, &data->ethtool_link_ext_state_info); if (err) return err; data->link_ext_state_provided = true; return 0; } static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->link = __ethtool_get_link(dev); ret = linkstate_get_sqi(phydev); if (linkstate_sqi_critical_error(ret)) goto out; data->sqi = ret; ret = linkstate_get_sqi_max(phydev); if (linkstate_sqi_critical_error(ret)) goto out; data->sqi_max = ret; if (dev->flags & IFF_UP) { ret = linkstate_get_link_ext_state(dev, data); if (ret < 0 && ret != -EOPNOTSUPP && ret != -ENODATA) goto out; } ethtool_stats_init((u64 *)&data->link_stats, sizeof(data->link_stats) / 8); if (req_base->flags & ETHTOOL_FLAG_STATS) { if (phydev) phy_ethtool_get_link_ext_stats(phydev, &data->link_stats); if (dev->ethtool_ops->get_link_ext_stats) dev->ethtool_ops->get_link_ext_stats(dev, &data->link_stats); } ret = 0; out: ethnl_ops_complete(dev); return ret; } static int linkstate_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); int len; len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; if (linkstate_sqi_valid(data)) { len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ } if (data->link_ext_state_provided) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ if (data->ethtool_link_ext_state_info.__link_ext_substate) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_SUBSTATE */ if (data->link_stats.link_down_events != ETHTOOL_STAT_NOT_SET) len += nla_total_size(sizeof(u32)); return len; } static int linkstate_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); if (data->link >= 0 && nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; if (linkstate_sqi_valid(data)) { if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) return -EMSGSIZE; } if (data->link_ext_state_provided) { if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, data->ethtool_link_ext_state_info.link_ext_state)) return -EMSGSIZE; if (data->ethtool_link_ext_state_info.__link_ext_substate && nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, data->ethtool_link_ext_state_info.__link_ext_substate)) return -EMSGSIZE; } if (data->link_stats.link_down_events != ETHTOOL_STAT_NOT_SET) if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, data->link_stats.link_down_events)) return -EMSGSIZE; return 0; } const struct ethnl_request_ops ethnl_linkstate_request_ops = { .request_cmd = ETHTOOL_MSG_LINKSTATE_GET, .reply_cmd = ETHTOOL_MSG_LINKSTATE_GET_REPLY, .hdr_attr = ETHTOOL_A_LINKSTATE_HEADER, .req_info_size = sizeof(struct linkstate_req_info), .reply_data_size = sizeof(struct linkstate_reply_data), .prepare_data = linkstate_prepare_data, .reply_size = linkstate_reply_size, .fill_reply = linkstate_fill_reply, };
2 1 1 1 1 1 1 1 1 1 3 3 2 2 2 2 4 4 1 3 2 2 2 1 1 1 1 3 4 4 4 3 1 3 2 2 2 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 3 3 3 1 1 1 1 1 2 3 2 3 2 2 1 3 1 2 2 1 1 1 1 1 1 7 1 4 4 4 4 4 4 3 4 1 3 1 2 3 3 4 3 3 3 2 1 3 1 1 1 1 2 1 2 3 1 1 1 1 1 1 1 1 1 94 93 1 86 10 10 1 1 3 3 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 2 2 4 2 2 4 9 1 1 1 1 3 3 5 2 4 4 1 1 15 15 1 1 1 1 3 3 1 1 1 7 8 7 3 3 4 4 1 1 1 92 2 2 1 95 1 93 1 92 1 1 1 1 1 1 1 6 1 5 101 97 100 6 94 1 93 96 92 87 94 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 // SPDX-License-Identifier: GPL-2.0 /* * Code related to the io_uring_register() syscall * * Copyright (C) 2023 Jens Axboe */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/syscalls.h> #include <linux/refcount.h> #include <linux/bits.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/nospec.h> #include <linux/compat.h> #include <linux/io_uring.h> #include <linux/io_uring_types.h> #include "filetable.h" #include "io_uring.h" #include "opdef.h" #include "tctx.h" #include "rsrc.h" #include "sqpoll.h" #include "register.h" #include "cancel.h" #include "kbuf.h" #include "napi.h" #include "eventfd.h" #include "msg_ring.h" #include "memmap.h" #include "zcrx.h" #include "query.h" #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { struct io_uring_probe *p; size_t size; int i, ret; if (nr_args > IORING_OP_LAST) nr_args = IORING_OP_LAST; size = struct_size(p, ops, nr_args); p = memdup_user(arg, size); if (IS_ERR(p)) return PTR_ERR(p); ret = -EINVAL; if (memchr_inv(p, 0, size)) goto out; p->last_op = IORING_OP_LAST - 1; for (i = 0; i < nr_args; i++) { p->ops[i].op = i; if (io_uring_op_supported(i)) p->ops[i].flags = IO_URING_OP_SUPPORTED; } p->ops_len = i; ret = 0; if (copy_to_user(arg, p, size)) ret = -EFAULT; out: kfree(p); return ret; } int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { const struct cred *creds; creds = xa_erase(&ctx->personalities, id); if (creds) { put_cred(creds); return 0; } return -EINVAL; } static int io_register_personality(struct io_ring_ctx *ctx) { const struct cred *creds; u32 id; int ret; creds = get_current_cred(); ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); if (ret < 0) { put_cred(creds); return ret; } return id; } static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args, struct io_restriction *restrictions) { struct io_uring_restriction *res; size_t size; int i, ret; if (!arg || nr_args > IORING_MAX_RESTRICTIONS) return -EINVAL; size = array_size(nr_args, sizeof(*res)); if (size == SIZE_MAX) return -EOVERFLOW; res = memdup_user(arg, size); if (IS_ERR(res)) return PTR_ERR(res); ret = -EINVAL; for (i = 0; i < nr_args; i++) { switch (res[i].opcode) { case IORING_RESTRICTION_REGISTER_OP: if (res[i].register_op >= IORING_REGISTER_LAST) goto err; __set_bit(res[i].register_op, restrictions->register_op); break; case IORING_RESTRICTION_SQE_OP: if (res[i].sqe_op >= IORING_OP_LAST) goto err; __set_bit(res[i].sqe_op, restrictions->sqe_op); break; case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: restrictions->sqe_flags_allowed = res[i].sqe_flags; break; case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: restrictions->sqe_flags_required = res[i].sqe_flags; break; default: goto err; } } ret = 0; err: kfree(res); return ret; } static __cold int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, unsigned int nr_args) { int ret; /* Restrictions allowed only if rings started disabled */ if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; /* We allow only a single restrictions registration */ if (ctx->restrictions.registered) return -EBUSY; ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); /* Reset all restrictions if an error happened */ if (ret != 0) memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); else ctx->restrictions.registered = true; return ret; } static int io_register_enable_rings(struct io_ring_ctx *ctx) { if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); /* * Lazy activation attempts would fail if it was polled before * submitter_task is set. */ if (wq_has_sleeper(&ctx->poll_wq)) io_activate_pollwq(ctx); } if (ctx->restrictions.registered) ctx->restricted = 1; ctx->flags &= ~IORING_SETUP_R_DISABLED; if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); return 0; } static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, cpumask_var_t new_mask) { int ret; if (!(ctx->flags & IORING_SETUP_SQPOLL)) { ret = io_wq_cpu_affinity(current->io_uring, new_mask); } else { mutex_unlock(&ctx->uring_lock); ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); mutex_lock(&ctx->uring_lock); } return ret; } static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, unsigned len) { cpumask_var_t new_mask; int ret; if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) return -ENOMEM; cpumask_clear(new_mask); if (len > cpumask_size()) len = cpumask_size(); #ifdef CONFIG_COMPAT if (in_compat_syscall()) ret = compat_get_bitmap(cpumask_bits(new_mask), (const compat_ulong_t __user *)arg, len * 8 /* CHAR_BIT */); else #endif ret = copy_from_user(new_mask, arg, len); if (ret) { free_cpumask_var(new_mask); return -EFAULT; } ret = __io_register_iowq_aff(ctx, new_mask); free_cpumask_var(new_mask); return ret; } static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) { return __io_register_iowq_aff(ctx, NULL); } static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) __must_hold(&ctx->uring_lock) { struct io_tctx_node *node; struct io_uring_task *tctx = NULL; struct io_sq_data *sqd = NULL; __u32 new_count[2]; int i, ret; if (copy_from_user(new_count, arg, sizeof(new_count))) return -EFAULT; for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i] > INT_MAX) return -EINVAL; if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { struct task_struct *tsk; /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold * a ref to the ctx. */ refcount_inc(&sqd->refs); mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); tsk = sqpoll_task_locked(sqd); if (tsk) tctx = tsk->io_uring; } } else { tctx = current->io_uring; } BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i]) ctx->iowq_limits[i] = new_count[i]; ctx->iowq_limits_set = true; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); if (ret) goto err; } else { memset(new_count, 0, sizeof(new_count)); } if (sqd) { mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); mutex_lock(&ctx->uring_lock); } if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; /* that's it for SQPOLL, only the SQPOLL task creates requests */ if (sqd) return 0; /* now propagate the restriction to all registered users */ list_for_each_entry(node, &ctx->tctx_list, ctx_node) { tctx = node->task->io_uring; if (WARN_ON_ONCE(!tctx->io_wq)) continue; for (i = 0; i < ARRAY_SIZE(new_count); i++) new_count[i] = ctx->iowq_limits[i]; /* ignore errors, it always returns zero anyway */ (void)io_wq_max_workers(tctx->io_wq, new_count); } return 0; err: if (sqd) { mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); mutex_lock(&ctx->uring_lock); } return ret; } static int io_register_clock(struct io_ring_ctx *ctx, struct io_uring_clock_register __user *arg) { struct io_uring_clock_register reg; if (copy_from_user(&reg, arg, sizeof(reg))) return -EFAULT; if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv))) return -EINVAL; switch (reg.clockid) { case CLOCK_MONOTONIC: ctx->clock_offset = 0; break; case CLOCK_BOOTTIME: ctx->clock_offset = TK_OFFS_BOOT; break; default: return -EINVAL; } ctx->clockid = reg.clockid; return 0; } /* * State to maintain until we can swap. Both new and old state, used for * either mapping or freeing. */ struct io_ring_ctx_rings { struct io_rings *rings; struct io_uring_sqe *sq_sqes; struct io_mapped_region sq_region; struct io_mapped_region ring_region; }; static void io_register_free_rings(struct io_ring_ctx *ctx, struct io_uring_params *p, struct io_ring_ctx_rings *r) { io_free_region(ctx, &r->sq_region); io_free_region(ctx, &r->ring_region); } #define swap_old(ctx, o, n, field) \ do { \ (o).field = (ctx)->field; \ (ctx)->field = (n).field; \ } while (0) #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP | \ IORING_SETUP_CQE_MIXED) static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_region_desc rd; struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; size_t size, sq_array_offset; unsigned i, tail, old_head; struct io_uring_params p; int ret; /* limited to DEFER_TASKRUN for now */ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) return -EINVAL; if (copy_from_user(&p, arg, sizeof(p))) return -EFAULT; if (p.flags & ~RESIZE_FLAGS) return -EINVAL; /* properties that are always inherited */ p.flags |= (ctx->flags & COPY_FLAGS); ret = io_uring_fill_params(p.sq_entries, &p); if (unlikely(ret)) return ret; size = rings_size(p.flags, p.sq_entries, p.cq_entries, &sq_array_offset); if (size == SIZE_MAX) return -EOVERFLOW; memset(&rd, 0, sizeof(rd)); rd.size = PAGE_ALIGN(size); if (p.flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p.cq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; } n.rings = io_region_get_ptr(&n.ring_region); /* * At this point n.rings is shared with userspace, just like o.rings * is as well. While we don't expect userspace to modify it while * a resize is in progress, and it's most likely that userspace will * shoot itself in the foot if it does, we can't always assume good * intent... Use read/write once helpers from here on to indicate the * shared nature of it. */ WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); if (copy_to_user(arg, &p, sizeof(p))) { io_register_free_rings(ctx, &p, &n); return -EFAULT; } if (p.flags & IORING_SETUP_SQE128) size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); else size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); if (size == SIZE_MAX) { io_register_free_rings(ctx, &p, &n); return -EOVERFLOW; } memset(&rd, 0, sizeof(rd)); rd.size = PAGE_ALIGN(size); if (p.flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p.sq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region_mmap_safe(ctx, &n.sq_region, &rd, IORING_OFF_SQES); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; } n.sq_sqes = io_region_get_ptr(&n.sq_region); /* * If using SQPOLL, park the thread */ if (ctx->sq_data) { mutex_unlock(&ctx->uring_lock); io_sq_thread_park(ctx->sq_data); mutex_lock(&ctx->uring_lock); } /* * We'll do the swap. Grab the ctx->mmap_lock, which will exclude * any new mmap's on the ring fd. Clear out existing mappings to prevent * mmap from seeing them, as we'll unmap them. Any attempt to mmap * existing rings beyond this point will fail. Not that it could proceed * at this point anyway, as the io_uring mmap side needs go grab the * ctx->mmap_lock as well. Likewise, hold the completion lock over the * duration of the actual swap. */ mutex_lock(&ctx->mmap_lock); spin_lock(&ctx->completion_lock); o.rings = ctx->rings; ctx->rings = NULL; o.sq_sqes = ctx->sq_sqes; ctx->sq_sqes = NULL; /* * Now copy SQ and CQ entries, if any. If either of the destination * rings can't hold what is already there, then fail the operation. */ tail = READ_ONCE(o.rings->sq.tail); old_head = READ_ONCE(o.rings->sq.head); if (tail - old_head > p.sq_entries) goto overflow; for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); unsigned dst_head = i & (p.sq_entries - 1); n.sq_sqes[dst_head] = o.sq_sqes[src_head]; } WRITE_ONCE(n.rings->sq.head, old_head); WRITE_ONCE(n.rings->sq.tail, tail); tail = READ_ONCE(o.rings->cq.tail); old_head = READ_ONCE(o.rings->cq.head); if (tail - old_head > p.cq_entries) { overflow: /* restore old rings, and return -EOVERFLOW via cleanup path */ ctx->rings = o.rings; ctx->sq_sqes = o.sq_sqes; to_free = &n; ret = -EOVERFLOW; goto out; } for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->cq_entries - 1); unsigned dst_head = i & (p.cq_entries - 1); n.rings->cqes[dst_head] = o.rings->cqes[src_head]; } WRITE_ONCE(n.rings->cq.head, old_head); WRITE_ONCE(n.rings->cq.tail, tail); /* invalidate cached cqe refill */ ctx->cqe_cached = ctx->cqe_sentinel = NULL; WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); /* all done, store old pointers and assign new ones */ if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); ctx->sq_entries = p.sq_entries; ctx->cq_entries = p.cq_entries; ctx->rings = n.rings; ctx->sq_sqes = n.sq_sqes; swap_old(ctx, o, n, ring_region); swap_old(ctx, o, n, sq_region); to_free = &o; ret = 0; out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); io_register_free_rings(ctx, &p, to_free); if (ctx->sq_data) io_sq_thread_unpark(ctx->sq_data); return ret; } static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) { struct io_uring_mem_region_reg __user *reg_uptr = uarg; struct io_uring_mem_region_reg reg; struct io_uring_region_desc __user *rd_uptr; struct io_uring_region_desc rd; int ret; if (io_region_is_set(&ctx->param_region)) return -EBUSY; if (copy_from_user(&reg, reg_uptr, sizeof(reg))) return -EFAULT; rd_uptr = u64_to_user_ptr(reg.region_uptr); if (copy_from_user(&rd, rd_uptr, sizeof(rd))) return -EFAULT; if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv))) return -EINVAL; if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) return -EINVAL; /* * This ensures there are no waiters. Waiters are unlocked and it's * hard to synchronise with them, especially if we need to initialise * the region. */ if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && !(ctx->flags & IORING_SETUP_R_DISABLED)) return -EINVAL; ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd, IORING_MAP_OFF_PARAM_REGION); if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { guard(mutex)(&ctx->mmap_lock); io_free_region(ctx, &ctx->param_region); return -EFAULT; } if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); ctx->cq_wait_size = rd.size; } return 0; } static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) __acquires(ctx->uring_lock) { int ret; /* * We don't quiesce the refs for register anymore and so it can't be * dying as we're holding a file ref here. */ if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) return -ENXIO; if (ctx->submitter_task && ctx->submitter_task != current) return -EEXIST; if (ctx->restricted) { opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; } switch (opcode) { case IORING_REGISTER_BUFFERS: ret = -EFAULT; if (!arg) break; ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_BUFFERS: ret = -EINVAL; if (arg || nr_args) break; ret = io_sqe_buffers_unregister(ctx); break; case IORING_REGISTER_FILES: ret = -EFAULT; if (!arg) break; ret = io_sqe_files_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_FILES: ret = -EINVAL; if (arg || nr_args) break; ret = io_sqe_files_unregister(ctx); break; case IORING_REGISTER_FILES_UPDATE: ret = io_register_files_update(ctx, arg, nr_args); break; case IORING_REGISTER_EVENTFD: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg, 0); break; case IORING_REGISTER_EVENTFD_ASYNC: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg, 1); break; case IORING_UNREGISTER_EVENTFD: ret = -EINVAL; if (arg || nr_args) break; ret = io_eventfd_unregister(ctx); break; case IORING_REGISTER_PROBE: ret = -EINVAL; if (!arg || nr_args > 256) break; ret = io_probe(ctx, arg, nr_args); break; case IORING_REGISTER_PERSONALITY: ret = -EINVAL; if (arg || nr_args) break; ret = io_register_personality(ctx); break; case IORING_UNREGISTER_PERSONALITY: ret = -EINVAL; if (arg) break; ret = io_unregister_personality(ctx, nr_args); break; case IORING_REGISTER_ENABLE_RINGS: ret = -EINVAL; if (arg || nr_args) break; ret = io_register_enable_rings(ctx); break; case IORING_REGISTER_RESTRICTIONS: ret = io_register_restrictions(ctx, arg, nr_args); break; case IORING_REGISTER_FILES2: ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); break; case IORING_REGISTER_FILES_UPDATE2: ret = io_register_rsrc_update(ctx, arg, nr_args, IORING_RSRC_FILE); break; case IORING_REGISTER_BUFFERS2: ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); break; case IORING_REGISTER_BUFFERS_UPDATE: ret = io_register_rsrc_update(ctx, arg, nr_args, IORING_RSRC_BUFFER); break; case IORING_REGISTER_IOWQ_AFF: ret = -EINVAL; if (!arg || !nr_args) break; ret = io_register_iowq_aff(ctx, arg, nr_args); break; case IORING_UNREGISTER_IOWQ_AFF: ret = -EINVAL; if (arg || nr_args) break; ret = io_unregister_iowq_aff(ctx); break; case IORING_REGISTER_IOWQ_MAX_WORKERS: ret = -EINVAL; if (!arg || nr_args != 2) break; ret = io_register_iowq_max_workers(ctx, arg); break; case IORING_REGISTER_RING_FDS: ret = io_ringfd_register(ctx, arg, nr_args); break; case IORING_UNREGISTER_RING_FDS: ret = io_ringfd_unregister(ctx, arg, nr_args); break; case IORING_REGISTER_PBUF_RING: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_pbuf_ring(ctx, arg); break; case IORING_UNREGISTER_PBUF_RING: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_unregister_pbuf_ring(ctx, arg); break; case IORING_REGISTER_SYNC_CANCEL: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_sync_cancel(ctx, arg); break; case IORING_REGISTER_FILE_ALLOC_RANGE: ret = -EINVAL; if (!arg || nr_args) break; ret = io_register_file_alloc_range(ctx, arg); break; case IORING_REGISTER_PBUF_STATUS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_pbuf_status(ctx, arg); break; case IORING_REGISTER_NAPI: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_napi(ctx, arg); break; case IORING_UNREGISTER_NAPI: ret = -EINVAL; if (nr_args != 1) break; ret = io_unregister_napi(ctx, arg); break; case IORING_REGISTER_CLOCK: ret = -EINVAL; if (!arg || nr_args) break; ret = io_register_clock(ctx, arg); break; case IORING_REGISTER_CLONE_BUFFERS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_clone_buffers(ctx, arg); break; case IORING_REGISTER_ZCRX_IFQ: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_zcrx_ifq(ctx, arg); break; case IORING_REGISTER_RESIZE_RINGS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_resize_rings(ctx, arg); break; case IORING_REGISTER_MEM_REGION: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_mem_region(ctx, arg); break; case IORING_REGISTER_QUERY: ret = io_query(ctx, arg, nr_args); break; default: ret = -EINVAL; break; } return ret; } /* * Given an 'fd' value, return the ctx associated with if. If 'registered' is * true, then the registered index is used. Otherwise, the normal fd table. * Caller must call fput() on the returned file, unless it's an ERR_PTR. */ struct file *io_uring_register_get_file(unsigned int fd, bool registered) { struct file *file; if (registered) { /* * Ring fd has been registered via IORING_REGISTER_RING_FDS, we * need only dereference our task private array to find it. */ struct io_uring_task *tctx = current->io_uring; if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; if (file) get_file(file); } else { file = fget(fd); } if (unlikely(!file)) return ERR_PTR(-EBADF); if (io_is_uring_fops(file)) return file; fput(file); return ERR_PTR(-EOPNOTSUPP); } static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args) { struct io_uring_sqe sqe; if (!arg || nr_args != 1) return -EINVAL; if (copy_from_user(&sqe, arg, sizeof(sqe))) return -EFAULT; /* no flags supported */ if (sqe.flags) return -EINVAL; if (sqe.opcode != IORING_OP_MSG_RING) return -EINVAL; return io_uring_sync_msg_ring(&sqe); } /* * "blind" registration opcodes are ones where there's no ring given, and * hence the source fd must be -1. */ static int io_uring_register_blind(unsigned int opcode, void __user *arg, unsigned int nr_args) { switch (opcode) { case IORING_REGISTER_SEND_MSG_RING: return io_uring_register_send_msg_ring(arg, nr_args); case IORING_REGISTER_QUERY: return io_query(NULL, arg, nr_args); } return -EINVAL; } SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, void __user *, arg, unsigned int, nr_args) { struct io_ring_ctx *ctx; long ret = -EBADF; struct file *file; bool use_registered_ring; use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; if (opcode >= IORING_REGISTER_LAST) return -EINVAL; if (fd == -1) return io_uring_register_blind(opcode, arg, nr_args); file = io_uring_register_get_file(fd, use_registered_ring); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; mutex_lock(&ctx->uring_lock); ret = __io_uring_register(ctx, opcode, arg, nr_args); trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); fput(file); return ret; }
32 32 5 33 7 33 16 16 16 15 16 16 26 26 27 27 17 16 6 6 6 13 17 9 9 4 17 17 2 2 17 17 6 4 17 17 17 2 6 6 6 6 6 6 6 25 25 2 2 1 1 1 1 8 8 7 3 8 1 8 8 8 1 8 3 1 16 1 1 109 55 16 55 110 91 61 24 62 28 61 18 60 37 110 2 2 61 32 61 63 1 1 1 1 22 76 77 149 150 152 151 150 152 146 27 152 152 91 90 91 61 33 61 110 1 1 33 33 31 26 26 26 26 26 26 1 1 33 33 50 52 120 120 119 14 1 1 92 91 92 90 16 16 61 56 91 17 6 6 1 26 26 1 17 7 8 25 2 2 20 5 17 5 63 1 1 1 22 26 76 77 77 24 23 24 150 138 2 152 47 152 118 149 149 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * * This file is part of the SCTP kernel implementation * * These functions work with the state functions in sctp_sm_statefuns.c * to implement that state operations. These functions implement the * steps which require modifying existing data structures. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@austin.ibm.com> * Hui Huang <hui.huang@nokia.com> * Dajiang Zhang <dajiang.zhang@nokia.com> * Daisy Chang <daisyc@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/skbuff.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/ip.h> #include <linux/gfp.h> #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> static int sctp_cmd_interpreter(enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, enum sctp_disposition status, struct sctp_cmd_seq *commands, gfp_t gfp); static int sctp_side_effects(enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, enum sctp_disposition status, struct sctp_cmd_seq *commands, gfp_t gfp); /******************************************************************** * Helper functions ********************************************************************/ /* A helper function for delayed processing of INET ECN CE bit. */ static void sctp_do_ecn_ce_work(struct sctp_association *asoc, __u32 lowest_tsn) { /* Save the TSN away for comparison when we receive CWR */ asoc->last_ecne_tsn = lowest_tsn; asoc->need_ecne = 1; } /* Helper function for delayed processing of SCTP ECNE chunk. */ /* RFC 2960 Appendix A * * RFC 2481 details a specific bit for a sender to send in * the header of its next outbound TCP segment to indicate to * its peer that it has reduced its congestion window. This * is termed the CWR bit. For SCTP the same indication is made * by including the CWR chunk. This chunk contains one data * element, i.e. the TSN number that was sent in the ECNE chunk. * This element represents the lowest TSN number in the datagram * that was originally marked with the CE bit. */ static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc, __u32 lowest_tsn, struct sctp_chunk *chunk) { struct sctp_chunk *repl; /* Our previously transmitted packet ran into some congestion * so we should take action by reducing cwnd and ssthresh * and then ACK our peer that we we've done so by * sending a CWR. */ /* First, try to determine if we want to actually lower * our cwnd variables. Only lower them if the ECNE looks more * recent than the last response. */ if (TSN_lt(asoc->last_cwr_tsn, lowest_tsn)) { struct sctp_transport *transport; /* Find which transport's congestion variables * need to be adjusted. */ transport = sctp_assoc_lookup_tsn(asoc, lowest_tsn); /* Update the congestion variables. */ if (transport) sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_ECNE); asoc->last_cwr_tsn = lowest_tsn; } /* Always try to quiet the other end. In case of lost CWR, * resend last_cwr_tsn. */ repl = sctp_make_cwr(asoc, asoc->last_cwr_tsn, chunk); /* If we run out of memory, it will look like a lost CWR. We'll * get back in sync eventually. */ return repl; } /* Helper function to do delayed processing of ECN CWR chunk. */ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc, __u32 lowest_tsn) { /* Turn off ECNE getting auto-prepended to every outgoing * packet */ asoc->need_ecne = 0; } /* Generate SACK if necessary. We call this at the end of a packet. */ static int sctp_gen_sack(struct sctp_association *asoc, int force, struct sctp_cmd_seq *commands) { struct sctp_transport *trans = asoc->peer.last_data_from; __u32 ctsn, max_tsn_seen; struct sctp_chunk *sack; int error = 0; if (force || (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) || (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE))) asoc->peer.sack_needed = 1; ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); /* From 12.2 Parameters necessary per association (i.e. the TCB): * * Ack State : This flag indicates if the next received packet * : is to be responded to with a SACK. ... * : When DATA chunks are out of order, SACK's * : are not delayed (see Section 6). * * [This is actually not mentioned in Section 6, but we * implement it here anyway. --piggy] */ if (max_tsn_seen != ctsn) asoc->peer.sack_needed = 1; /* From 6.2 Acknowledgement on Reception of DATA Chunks: * * Section 4.2 of [RFC2581] SHOULD be followed. Specifically, * an acknowledgement SHOULD be generated for at least every * second packet (not every second DATA chunk) received, and * SHOULD be generated within 200 ms of the arrival of any * unacknowledged DATA chunk. ... */ if (!asoc->peer.sack_needed) { asoc->peer.sack_cnt++; /* Set the SACK delay timeout based on the * SACK delay for the last transport * data was received from, or the default * for the association. */ if (trans) { /* We will need a SACK for the next packet. */ if (asoc->peer.sack_cnt >= trans->sackfreq - 1) asoc->peer.sack_needed = 1; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = trans->sackdelay; } else { /* We will need a SACK for the next packet. */ if (asoc->peer.sack_cnt >= asoc->sackfreq - 1) asoc->peer.sack_needed = 1; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; } /* Restart the SACK timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { __u32 old_a_rwnd = asoc->a_rwnd; asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) { asoc->a_rwnd = old_a_rwnd; goto nomem; } asoc->peer.sack_needed = 0; asoc->peer.sack_cnt = 0; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(sack)); /* Stop the SACK timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } return error; nomem: error = -ENOMEM; return error; } /* When the T3-RTX timer expires, it calls this function to create the * relevant state machine event. */ void sctp_generate_t3_rtx_event(struct timer_list *t) { struct sctp_transport *transport = timer_container_of(transport, t, T3_rtx_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); int error; /* Check whether a task is in the sock. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20))) sctp_transport_hold(transport); goto out_unlock; } /* Run through the state machine. */ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); if (error) sk->sk_err = -error; out_unlock: bh_unlock_sock(sk); sctp_transport_put(transport); } /* This is a sa interface for producing timeout events. It works * for timeouts which use the association as their parameter. */ static void sctp_generate_timeout_event(struct sctp_association *asoc, enum sctp_event_timeout timeout_type) { struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); int error = 0; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); /* Try again later. */ if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20))) sctp_association_hold(asoc); goto out_unlock; } /* Is this association really dead and just waiting around for * the timer to let go of the reference? */ if (asoc->base.dead) goto out_unlock; /* Run through the state machine. */ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(timeout_type), asoc->state, asoc->ep, asoc, (void *)timeout_type, GFP_ATOMIC); if (error) sk->sk_err = -error; out_unlock: bh_unlock_sock(sk); sctp_association_put(asoc); } static void sctp_generate_t1_cookie_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE); } static void sctp_generate_t1_init_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT); } static void sctp_generate_t2_shutdown_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN); } static void sctp_generate_t4_rto_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO); } static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD); } /* sctp_generate_t5_shutdown_guard_event() */ static void sctp_generate_autoclose_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE); } /* Generate a heart beat event. If the sock is busy, reschedule. Make * sure that the transport is still valid. */ void sctp_generate_heartbeat_event(struct timer_list *t) { struct sctp_transport *transport = timer_container_of(transport, t, hb_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); u32 elapsed, timeout; int error = 0; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20))) sctp_transport_hold(transport); goto out_unlock; } /* Check if we should still send the heartbeat or reschedule */ elapsed = jiffies - transport->last_time_sent; timeout = sctp_transport_timeout(transport); if (elapsed < timeout) { elapsed = timeout - elapsed; if (!mod_timer(&transport->hb_timer, jiffies + elapsed)) sctp_transport_hold(transport); goto out_unlock; } error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); if (error) sk->sk_err = -error; out_unlock: bh_unlock_sock(sk); sctp_transport_put(transport); } /* Handle the timeout of the ICMP protocol unreachable timer. Trigger * the correct state machine transition that will close the association. */ void sctp_generate_proto_unreach_event(struct timer_list *t) { struct sctp_transport *transport = timer_container_of(transport, t, proto_unreach_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, jiffies + (HZ/20))) sctp_transport_hold(transport); goto out_unlock; } /* Is this structure just waiting around for us to actually * get destroyed? */ if (asoc->base.dead) goto out_unlock; sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: bh_unlock_sock(sk); sctp_transport_put(transport); } /* Handle the timeout of the RE-CONFIG timer. */ void sctp_generate_reconf_event(struct timer_list *t) { struct sctp_transport *transport = timer_container_of(transport, t, reconf_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); int error = 0; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->reconf_timer, jiffies + (HZ / 20))) sctp_transport_hold(transport); goto out_unlock; } /* This happens when the response arrives after the timer is triggered. */ if (!asoc->strreset_chunk) goto out_unlock; error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); if (error) sk->sk_err = -error; out_unlock: bh_unlock_sock(sk); sctp_transport_put(transport); } /* Handle the timeout of the probe timer. */ void sctp_generate_probe_event(struct timer_list *t) { struct sctp_transport *transport = timer_container_of(transport, t, probe_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); int error = 0; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20))) sctp_transport_hold(transport); goto out_unlock; } error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); if (error) sk->sk_err = -error; out_unlock: bh_unlock_sock(sk); sctp_transport_put(transport); } /* Inject a SACK Timeout event into the state machine. */ static void sctp_generate_sack_event(struct timer_list *t) { struct sctp_association *asoc = timer_container_of(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]); sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK); } sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { [SCTP_EVENT_TIMEOUT_NONE] = NULL, [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event, [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event, [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event, [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL, [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event, [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] = sctp_generate_t5_shutdown_guard_event, [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL, [SCTP_EVENT_TIMEOUT_RECONF] = NULL, [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event, [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event, }; /* RFC 2960 8.2 Path Failure Detection * * When its peer endpoint is multi-homed, an endpoint should keep a * error counter for each of the destination transport addresses of the * peer endpoint. * * Each time the T3-rtx timer expires on any address, or when a * HEARTBEAT sent to an idle address is not acknowledged within a RTO, * the error counter of that destination address will be incremented. * When the value in the error counter exceeds the protocol parameter * 'Path.Max.Retrans' of that destination address, the endpoint should * mark the destination transport address as inactive, and a * notification SHOULD be sent to the upper layer. * */ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, struct sctp_association *asoc, struct sctp_transport *transport, int is_hb) { /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ /* We are here due to a timer expiration. If the timer was * not a HEARTBEAT, then normal error tracking is done. * If the timer was a heartbeat, we only increment error counts * when we already have an outstanding HEARTBEAT that has not * been acknowledged. * Additionally, some tranport states inhibit error increments. */ if (!is_hb) { asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE) transport->error_count++; } else if (transport->hb_sent) { if (transport->state != SCTP_UNCONFIRMED) asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE) transport->error_count++; } /* If the transport error count is greater than the pf_retrans * threshold, and less than pathmaxrtx, and if the current state * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ if (asoc->base.net->sctp.pf_enable && transport->state == SCTP_ACTIVE && transport->error_count < transport->pathmaxrxt && transport->error_count > transport->pf_retrans) { sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_PF, 0); /* Update the hb timer to resend a heartbeat every rto */ sctp_transport_reset_hb_timer(transport); } if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { pr_debug("%s: association:%p transport addr:%pISpc failed\n", __func__, asoc, &transport->ipaddr.sa); sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); } if (transport->error_count > transport->ps_retrans && asoc->peer.primary_path == transport && asoc->peer.active_path != transport) sctp_assoc_set_primary(asoc, asoc->peer.active_path); /* E2) For the destination address for which the timer * expires, set RTO <- RTO * 2 ("back off the timer"). The * maximum value discussed in rule C7 above (RTO.max) may be * used to provide an upper bound to this doubling operation. * * Special Case: the first HB doesn't trigger exponential backoff. * The first unacknowledged HB triggers it. We do this with a flag * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { transport->rto = min((transport->rto * 2), transport->asoc->rto_max); sctp_max_rto(asoc, transport); } } /* Worker routine to handle INIT command failure. */ static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands, struct sctp_association *asoc, unsigned int error) { struct sctp_ulpevent *event; event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_CANT_STR_ASSOC, (__u16)error, 0, 0, NULL, GFP_ATOMIC); if (event) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(event)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); /* SEND_FAILED sent later when cleaning up the association. */ asoc->outqueue.error = error; sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); } /* Worker routine to handle SCTP_CMD_ASSOC_FAILED. */ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands, struct sctp_association *asoc, enum sctp_event_type event_type, union sctp_subtype subtype, struct sctp_chunk *chunk, unsigned int error) { struct sctp_ulpevent *event; struct sctp_chunk *abort; /* Cancel any partial delivery in progress. */ asoc->stream.si->abort_pd(&asoc->ulpq, GFP_ATOMIC); if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT) event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST, (__u16)error, 0, 0, chunk, GFP_ATOMIC); else event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST, (__u16)error, 0, 0, NULL, GFP_ATOMIC); if (event) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(event)); if (asoc->overall_error_count >= asoc->max_retrans) { abort = sctp_make_violation_max_retrans(asoc, chunk); if (abort) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); } sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); /* SEND_FAILED sent later when cleaning up the association. */ asoc->outqueue.error = error; sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); } /* Process an init chunk (may be real INIT/INIT-ACK or an embedded INIT * inside the cookie. In reality, this is only used for INIT-ACK processing * since all other cases use "temporary" associations and can do all * their work in statefuns directly. */ static int sctp_cmd_process_init(struct sctp_cmd_seq *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_init_chunk *peer_init, gfp_t gfp) { int error; /* We only process the init as a sideeffect in a single * case. This is when we process the INIT-ACK. If we * fail during INIT processing (due to malloc problems), * just return the error and stop processing the stack. */ if (!sctp_process_init(asoc, chunk, sctp_source(chunk), peer_init, gfp)) error = -ENOMEM; else error = 0; return error; } /* Helper function to break out starting up of heartbeat timers. */ static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; /* Start a heartbeat timer for each transport on the association. * hold a reference on the transport to make sure none of * the needed data structures go away. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) sctp_transport_reset_hb_timer(t); } static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; /* Stop all heartbeat timers. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (timer_delete(&t->hb_timer)) sctp_transport_put(t); } } /* Helper function to stop any pending T3-RTX timers */ static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (timer_delete(&t->T3_rtx_timer)) sctp_transport_put(t); } } /* Helper function to handle the reception of an HEARTBEAT ACK. */ static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_transport *t, struct sctp_chunk *chunk) { struct sctp_sender_hb_info *hbinfo; int was_unconfirmed = 0; /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the * HEARTBEAT should clear the error counter of the destination * transport address to which the HEARTBEAT was sent. */ t->error_count = 0; /* * Although RFC4960 specifies that the overall error count must * be cleared when a HEARTBEAT ACK is received, we make an * exception while in SHUTDOWN PENDING. If the peer keeps its * window shut forever, we may never be able to transmit our * outstanding data and rely on the retransmission limit be reached * to shutdown the association. */ if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good * acknowledgement. */ t->hb_sent = 0; /* Mark the destination transport address as active if it is not so * marked. */ if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) { was_unconfirmed = 1; sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); } if (t->state == SCTP_PF) sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); /* HB-ACK was received for a the proper HB. Consider this * forward progress. */ if (t->dst) sctp_transport_dst_confirm(t); /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. * If the transport's rto_pending variable has been cleared, * it was most likely due to a retransmit. However, we want * to re-enable it to properly update the rto. */ if (t->rto_pending == 0) t->rto_pending = 1; hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data; sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); /* Update the heartbeat timer. */ sctp_transport_reset_hb_timer(t); if (was_unconfirmed && asoc->peer.transport_count == 1) sctp_transport_immediate_rtx(t); } /* Helper function to process the process SACK command. */ static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { int err = 0; if (sctp_outq_sack(&asoc->outqueue, chunk)) { /* There are no more TSNs awaiting SACK. */ err = sctp_do_sm(asoc->base.net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); } return err; } /* Helper function to set the timeout value for T2-SHUTDOWN timer and to set * the transport for a shutdown chunk. */ static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { struct sctp_transport *t; if (chunk->transport) t = chunk->transport; else { t = sctp_assoc_choose_alter_transport(asoc, asoc->shutdown_last_sent_to); chunk->transport = t; } asoc->shutdown_last_sent_to = t; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; } /* Helper function to change the state of an association. */ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, enum sctp_state state) { struct sock *sk = asoc->base.sk; asoc->state = state; pr_debug("%s: asoc:%p[%s]\n", __func__, asoc, sctp_state_tbl[state]); if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) inet_sk_set_state(sk, SCTP_SS_ESTABLISHED); /* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */ if (sctp_state(asoc, SHUTDOWN_RECEIVED) && sctp_sstate(sk, ESTABLISHED)) { inet_sk_set_state(sk, SCTP_SS_CLOSING); sk->sk_shutdown |= RCV_SHUTDOWN; } } if (sctp_state(asoc, COOKIE_WAIT)) { /* Reset init timeouts since they may have been * increased due to timer expirations. */ asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = asoc->rto_initial; asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->rto_initial; } if (sctp_state(asoc, ESTABLISHED)) { kfree(asoc->peer.cookie); asoc->peer.cookie = NULL; } if (sctp_state(asoc, ESTABLISHED) || sctp_state(asoc, CLOSED) || sctp_state(asoc, SHUTDOWN_RECEIVED)) { /* Wake up any processes waiting in the asoc's wait queue in * sctp_wait_for_connect() or sctp_wait_for_sndbuf(). */ if (waitqueue_active(&asoc->wait)) wake_up_interruptible(&asoc->wait); /* Wake up any processes waiting in the sk's sleep queue of * a TCP-style or UDP-style peeled-off socket in * sctp_wait_for_accept() or sctp_wait_for_packet(). * For a UDP-style socket, the waiters are woken up by the * notifications. */ if (!sctp_style(sk, UDP)) sk->sk_state_change(sk); } if (sctp_state(asoc, SHUTDOWN_PENDING) && !sctp_outq_is_empty(&asoc->outqueue)) sctp_outq_uncork(&asoc->outqueue, GFP_ATOMIC); } /* Helper function to delete an association. */ static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sock *sk = asoc->base.sk; /* If it is a non-temporary association belonging to a TCP-style * listening socket that is not closed, do not free it so that accept() * can pick it up later. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING) && (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; sctp_association_free(asoc); } /* * ADDIP Section 4.1 ASCONF Chunk Procedures * A4) Start a T-4 RTO timer, using the RTO value of the selected * destination address (we use active path instead of primary path just * because primary path may be inactive. */ static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { struct sctp_transport *t; t = sctp_assoc_choose_alter_transport(asoc, chunk->transport); asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = t->rto; chunk->transport = t; } /* Process an incoming Operation Error Chunk. */ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { struct sctp_errhdr *err_hdr; struct sctp_ulpevent *ev; while (chunk->chunk_end > chunk->skb->data) { err_hdr = (struct sctp_errhdr *)(chunk->skb->data); ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, GFP_ATOMIC); if (!ev) return; asoc->stream.si->enqueue_event(&asoc->ulpq, ev); switch (err_hdr->cause) { case SCTP_ERROR_UNKNOWN_CHUNK: { struct sctp_chunkhdr *unk_chunk_hdr; unk_chunk_hdr = (struct sctp_chunkhdr *)(err_hdr + 1); switch (unk_chunk_hdr->type) { /* ADDIP 4.1 A9) If the peer responds to an ASCONF with * an ERROR chunk reporting that it did not recognized * the ASCONF chunk type, the sender of the ASCONF MUST * NOT send any further ASCONF chunks and MUST stop its * T-4 timer. */ case SCTP_CID_ASCONF: if (asoc->peer.asconf_capable == 0) break; asoc->peer.asconf_capable = 0; sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); break; default: break; } break; } default: break; } } } /* Helper function to remove the association non-primary peer * transports. */ static void sctp_cmd_del_non_primary(struct sctp_association *asoc) { struct sctp_transport *t; struct list_head *temp; struct list_head *pos; list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); if (!sctp_cmp_addr_exact(&t->ipaddr, &asoc->peer.primary_addr)) { sctp_assoc_rm_peer(asoc, t); } } } /* Helper function to set sk_err on a 1-1 style socket. */ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error) { struct sock *sk = asoc->base.sk; if (!sctp_style(sk, UDP)) sk->sk_err = error; } /* Helper function to generate an association change event */ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, struct sctp_association *asoc, u8 state) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_assoc_change(asoc, 0, state, 0, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands, struct sctp_association *asoc) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } /* Helper function to generate an adaptation indication event */ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, struct sctp_association *asoc) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, enum sctp_event_timeout timer, char *name) { struct sctp_transport *t; t = asoc->init_last_sent_to; asoc->init_err_counter++; if (t->init_sent_count > (asoc->init_cycle + 1)) { asoc->timeouts[timer] *= 2; if (asoc->timeouts[timer] > asoc->max_init_timeo) { asoc->timeouts[timer] = asoc->max_init_timeo; } asoc->init_cycle++; pr_debug("%s: T1[%s] timeout adjustment init_err_counter:%d" " cycle:%d timeout:%ld\n", __func__, name, asoc->init_err_counter, asoc->init_cycle, asoc->timeouts[timer]); } } /* Send the whole message, chunk by chunk, to the outqueue. * This way the whole message is queued up and bundling if * encouraged for small fragments. */ static void sctp_cmd_send_msg(struct sctp_association *asoc, struct sctp_datamsg *msg, gfp_t gfp) { struct sctp_chunk *chunk; list_for_each_entry(chunk, &msg->chunks, frag_list) sctp_outq_tail(&asoc->outqueue, chunk, gfp); asoc->outqueue.sched->enqueue(&asoc->outqueue, msg); } /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. */ #define debug_pre_sfn() \ pr_debug("%s[pre-fn]: ep:%p, %s, %s, asoc:%p[%s], %s\n", __func__, \ ep, sctp_evttype_tbl[event_type], (*debug_fn)(subtype), \ asoc, sctp_state_tbl[state], state_fn->name) #define debug_post_sfn() \ pr_debug("%s[post-fn]: asoc:%p, status:%s\n", __func__, asoc, \ sctp_status_tbl[status]) #define debug_post_sfx() \ pr_debug("%s[post-sfx]: error:%d, asoc:%p[%s]\n", __func__, error, \ asoc, sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ sctp_assoc2id(asoc))) ? asoc->state : SCTP_STATE_CLOSED]) /* * This is the master state machine processing function. * * If you want to understand all of lksctp, this is a * good place to start. */ int sctp_do_sm(struct net *net, enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp) { typedef const char *(printfn_t)(union sctp_subtype); static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; printfn_t *debug_fn __attribute__ ((unused)) = table[event_type]; const struct sctp_sm_table_entry *state_fn; struct sctp_cmd_seq commands; enum sctp_disposition status; int error = 0; /* Look up the state function, run it, and then process the * side effects. These three steps are the heart of lksctp. */ state_fn = sctp_sm_lookup_event(net, event_type, state, subtype); sctp_init_cmd_seq(&commands); debug_pre_sfn(); status = state_fn->fn(net, ep, asoc, subtype, event_arg, &commands); debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, ep, &asoc, event_arg, status, &commands, gfp); debug_post_sfx(); return error; } /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ static int sctp_side_effects(enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, enum sctp_disposition status, struct sctp_cmd_seq *commands, gfp_t gfp) { int error; /* FIXME - Most of the dispositions left today would be categorized * as "exceptional" dispositions. For those dispositions, it * may not be proper to run through any of the commands at all. * For example, the command interpreter might be run only with * disposition SCTP_DISPOSITION_CONSUME. */ if (0 != (error = sctp_cmd_interpreter(event_type, subtype, state, ep, *asoc, event_arg, status, commands, gfp))) goto bail; switch (status) { case SCTP_DISPOSITION_DISCARD: pr_debug("%s: ignored sctp protocol event - state:%d, " "event_type:%d, event_id:%d\n", __func__, state, event_type, subtype.chunk); break; case SCTP_DISPOSITION_NOMEM: /* We ran out of memory, so we need to discard this * packet. */ /* BUG--we should now recover some memory, probably by * reneging... */ error = -ENOMEM; break; case SCTP_DISPOSITION_DELETE_TCB: case SCTP_DISPOSITION_ABORT: /* This should now be a command. */ *asoc = NULL; break; case SCTP_DISPOSITION_CONSUME: /* * We should no longer have much work to do here as the * real work has been done as explicit commands above. */ break; case SCTP_DISPOSITION_VIOLATION: net_err_ratelimited("protocol violation state %d chunkid %d\n", state, subtype.chunk); break; case SCTP_DISPOSITION_NOT_IMPL: pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n", state, event_type, subtype.chunk); break; case SCTP_DISPOSITION_BUG: pr_err("bug in state %d, event_type %d, event_id %d\n", state, event_type, subtype.chunk); BUG(); break; default: pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); error = status; if (error >= 0) error = -EINVAL; WARN_ON_ONCE(1); break; } bail: return error; } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ /* This is the side-effect interpreter. */ static int sctp_cmd_interpreter(enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, enum sctp_disposition status, struct sctp_cmd_seq *commands, gfp_t gfp) { struct sctp_sock *sp = sctp_sk(ep->base.sk); struct sctp_chunk *chunk = NULL, *new_obj; struct sctp_packet *packet; struct sctp_sackhdr sackh; struct timer_list *timer; struct sctp_transport *t; unsigned long timeout; struct sctp_cmd *cmd; int local_cork = 0; int error = 0; int force; if (SCTP_EVENT_T_TIMEOUT != event_type) chunk = event_arg; /* Note: This whole file is a huge candidate for rework. * For example, each command could either have its own handler, so * the loop would look like: * while (cmds) * cmd->handle(x, y, z) * --jgrimm */ while (NULL != (cmd = sctp_next_cmd(commands))) { switch (cmd->verb) { case SCTP_CMD_NOP: /* Do nothing. */ break; case SCTP_CMD_NEW_ASOC: /* Register a new association. */ if (local_cork) { sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; } /* Register with the endpoint. */ asoc = cmd->obj.asoc; BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); break; case SCTP_CMD_PURGE_OUTQUEUE: sctp_outq_teardown(&asoc->outqueue); break; case SCTP_CMD_DELETE_TCB: if (local_cork) { sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; } /* Delete the current association. */ sctp_cmd_delete_tcb(commands, asoc); asoc = NULL; break; case SCTP_CMD_NEW_STATE: /* Enter a new state. */ sctp_cmd_new_state(commands, asoc, cmd->obj.state); break; case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ error = sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32, NULL); break; case SCTP_CMD_REPORT_FWDTSN: asoc->stream.si->report_ftsn(&asoc->ulpq, cmd->obj.u32); break; case SCTP_CMD_PROCESS_FWDTSN: asoc->stream.si->handle_ftsn(&asoc->ulpq, cmd->obj.chunk); break; case SCTP_CMD_GEN_SACK: /* Generate a Selective ACK. * The argument tells us whether to just count * the packet and MAYBE generate a SACK, or * force a SACK out. */ force = cmd->obj.i32; error = sctp_gen_sack(asoc, force, commands); break; case SCTP_CMD_PROCESS_SACK: /* Process an inbound SACK. */ error = sctp_cmd_process_sack(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_GEN_INIT_ACK: /* Generate an INIT ACK chunk. */ new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC, 0); if (!new_obj) { error = -ENOMEM; break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; case SCTP_CMD_PEER_INIT: /* Process a unified INIT from the peer. * Note: Only used during INIT-ACK processing. If * there is an error just return to the outter * layer which will bail. */ error = sctp_cmd_process_init(commands, asoc, chunk, cmd->obj.init, gfp); break; case SCTP_CMD_GEN_COOKIE_ECHO: /* Generate a COOKIE ECHO chunk. */ new_obj = sctp_make_cookie_echo(asoc, chunk); if (!new_obj) { if (cmd->obj.chunk) sctp_chunk_free(cmd->obj.chunk); error = -ENOMEM; break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); /* If there is an ERROR chunk to be sent along with * the COOKIE_ECHO, send it, too. */ if (cmd->obj.chunk) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(cmd->obj.chunk)); if (new_obj->transport) { new_obj->transport->init_sent_count++; asoc->init_last_sent_to = new_obj->transport; } /* FIXME - Eventually come up with a cleaner way to * enabling COOKIE-ECHO + DATA bundling during * multihoming stale cookie scenarios, the following * command plays with asoc->peer.retran_path to * avoid the problem of sending the COOKIE-ECHO and * DATA in different paths, which could result * in the association being ABORTed if the DATA chunk * is processed first by the server. Checking the * init error counter simply causes this command * to be executed only during failed attempts of * association establishment. */ if ((asoc->peer.retran_path != asoc->peer.primary_path) && (asoc->init_err_counter > 0)) { sctp_add_cmd_sf(commands, SCTP_CMD_FORCE_PRIM_RETRAN, SCTP_NULL()); } break; case SCTP_CMD_GEN_SHUTDOWN: /* Generate SHUTDOWN when in SHUTDOWN_SENT state. * Reset error counts. */ asoc->overall_error_count = 0; /* Generate a SHUTDOWN chunk. */ new_obj = sctp_make_shutdown(asoc, chunk); if (!new_obj) { error = -ENOMEM; break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", __func__, cmd->obj.chunk, &asoc->ulpq); asoc->stream.si->ulpevent_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", __func__, cmd->obj.ulpevent, &asoc->ulpq); asoc->stream.si->enqueue_event(&asoc->ulpq, cmd->obj.ulpevent); break; case SCTP_CMD_REPLY: /* If an caller has not already corked, do cork. */ if (!asoc->outqueue.cork) { sctp_outq_cork(&asoc->outqueue); local_cork = 1; } /* Send a chunk to our peer. */ sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp); break; case SCTP_CMD_SEND_PKT: /* Send a full packet to our peer. */ packet = cmd->obj.packet; sctp_packet_transmit(packet, gfp); sctp_ootb_pkt_free(packet); break; case SCTP_CMD_T1_RETRAN: /* Mark a transport for retransmission. */ sctp_retransmit(&asoc->outqueue, cmd->obj.transport, SCTP_RTXR_T1_RTX); break; case SCTP_CMD_RETRAN: /* Mark a transport for retransmission. */ sctp_retransmit(&asoc->outqueue, cmd->obj.transport, SCTP_RTXR_T3_RTX); break; case SCTP_CMD_ECN_CE: /* Do delayed CE processing. */ sctp_do_ecn_ce_work(asoc, cmd->obj.u32); break; case SCTP_CMD_ECN_ECNE: /* Do delayed ECNE processing. */ new_obj = sctp_do_ecn_ecne_work(asoc, cmd->obj.u32, chunk); if (new_obj) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; case SCTP_CMD_ECN_CWR: /* Do delayed CWR processing. */ sctp_do_ecn_cwr_work(asoc, cmd->obj.u32); break; case SCTP_CMD_SETUP_T2: sctp_cmd_setup_t2(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_TIMER_START_ONCE: timer = &asoc->timers[cmd->obj.to]; if (timer_pending(timer)) break; fallthrough; case SCTP_CMD_TIMER_START: timer = &asoc->timers[cmd->obj.to]; timeout = asoc->timeouts[cmd->obj.to]; BUG_ON(!timeout); /* * SCTP has a hard time with timer starts. Because we process * timer starts as side effects, it can be hard to tell if we * have already started a timer or not, which leads to BUG * halts when we call add_timer. So here, instead of just starting * a timer, if the timer is already started, and just mod * the timer with the shorter of the two expiration times */ if (!timer_pending(timer)) sctp_association_hold(asoc); timer_reduce(timer, jiffies + timeout); break; case SCTP_CMD_TIMER_RESTART: timer = &asoc->timers[cmd->obj.to]; timeout = asoc->timeouts[cmd->obj.to]; if (!mod_timer(timer, jiffies + timeout)) sctp_association_hold(asoc); break; case SCTP_CMD_TIMER_STOP: timer = &asoc->timers[cmd->obj.to]; if (timer_delete(timer)) sctp_association_put(asoc); break; case SCTP_CMD_INIT_CHOOSE_TRANSPORT: chunk = cmd->obj.chunk; t = sctp_assoc_choose_alter_transport(asoc, asoc->init_last_sent_to); asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; /* Set the new transport as primary */ sctp_assoc_set_primary(asoc, t); break; case SCTP_CMD_INIT_RESTART: /* Do the needed accounting and updates * associated with restarting an initialization * timer. Only multiply the timeout by two if * all transports have been tried at the current * timeout. */ sctp_cmd_t1_timer_update(asoc, SCTP_EVENT_TIMEOUT_T1_INIT, "INIT"); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); break; case SCTP_CMD_COOKIEECHO_RESTART: /* Do the needed accounting and updates * associated with restarting an initialization * timer. Only multiply the timeout by two if * all transports have been tried at the current * timeout. */ sctp_cmd_t1_timer_update(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE, "COOKIE"); /* If we've sent any data bundled with * COOKIE-ECHO we need to resend. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { sctp_retransmit_mark(&asoc->outqueue, t, SCTP_RTXR_T1_RTX); } sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); break; case SCTP_CMD_INIT_FAILED: sctp_cmd_init_failed(commands, asoc, cmd->obj.u16); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, subtype, chunk, cmd->obj.u16); break; case SCTP_CMD_INIT_COUNTER_INC: asoc->init_err_counter++; break; case SCTP_CMD_INIT_COUNTER_RESET: asoc->init_err_counter = 0; asoc->init_cycle = 0; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { t->init_sent_count = 0; } break; case SCTP_CMD_REPORT_DUP: sctp_tsnmap_mark_dup(&asoc->peer.tsn_map, cmd->obj.u32); break; case SCTP_CMD_REPORT_BAD_TAG: pr_debug("%s: vtag mismatch!\n", __func__); break; case SCTP_CMD_STRIKE: /* Mark one strike against a transport. */ sctp_do_8_2_transport_strike(commands, asoc, cmd->obj.transport, 0); break; case SCTP_CMD_TRANSPORT_IDLE: t = cmd->obj.transport; sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE); break; case SCTP_CMD_TRANSPORT_HB_SENT: t = cmd->obj.transport; sctp_do_8_2_transport_strike(commands, asoc, t, 1); t->hb_sent = 1; break; case SCTP_CMD_TRANSPORT_ON: t = cmd->obj.transport; sctp_cmd_transport_on(commands, asoc, t, chunk); break; case SCTP_CMD_HB_TIMERS_START: sctp_cmd_hb_timers_start(commands, asoc); break; case SCTP_CMD_HB_TIMER_UPDATE: t = cmd->obj.transport; sctp_transport_reset_hb_timer(t); break; case SCTP_CMD_HB_TIMERS_STOP: sctp_cmd_hb_timers_stop(commands, asoc); break; case SCTP_CMD_PROBE_TIMER_UPDATE: t = cmd->obj.transport; sctp_transport_reset_probe_timer(t); break; case SCTP_CMD_REPORT_ERROR: error = cmd->obj.error; break; case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ sackh.cum_tsn_ack = cmd->obj.be32; sackh.a_rwnd = htonl(asoc->peer.rwnd + asoc->outqueue.outstanding_bytes); sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; chunk->subh.sack_hdr = &sackh; sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_CHUNK(chunk)); break; case SCTP_CMD_DISCARD_PACKET: /* We need to discard the whole packet. * Uncork the queue since there might be * responses pending */ chunk->pdiscard = 1; if (asoc) { sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; } break; case SCTP_CMD_RTO_PENDING: t = cmd->obj.transport; t->rto_pending = 1; break; case SCTP_CMD_PART_DELIVER: asoc->stream.si->start_pd(&asoc->ulpq, GFP_ATOMIC); break; case SCTP_CMD_RENEGE: asoc->stream.si->renege_events(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_SETUP_T4: sctp_cmd_setup_t4(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_PROCESS_OPERR: sctp_cmd_process_operr(commands, asoc, chunk); break; case SCTP_CMD_CLEAR_INIT_TAG: asoc->peer.i.init_tag = 0; break; case SCTP_CMD_DEL_NON_PRIMARY: sctp_cmd_del_non_primary(asoc); break; case SCTP_CMD_T3_RTX_TIMERS_STOP: sctp_cmd_t3_rtx_timers_stop(commands, asoc); break; case SCTP_CMD_FORCE_PRIM_RETRAN: t = asoc->peer.retran_path; asoc->peer.retran_path = asoc->peer.primary_path; sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; asoc->peer.retran_path = t; break; case SCTP_CMD_SET_SK_ERR: sctp_cmd_set_sk_err(asoc, cmd->obj.error); break; case SCTP_CMD_ASSOC_CHANGE: sctp_cmd_assoc_change(commands, asoc, cmd->obj.u8); break; case SCTP_CMD_ADAPTATION_IND: sctp_cmd_adaptation_ind(commands, asoc); break; case SCTP_CMD_PEER_NO_AUTH: sctp_cmd_peer_no_auth(commands, asoc); break; case SCTP_CMD_ASSOC_SHKEY: error = sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); break; case SCTP_CMD_UPDATE_INITTAG: asoc->peer.i.init_tag = cmd->obj.u32; break; case SCTP_CMD_SEND_MSG: if (!asoc->outqueue.cork) { sctp_outq_cork(&asoc->outqueue); local_cork = 1; } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; case SCTP_CMD_SET_ASOC: if (asoc && local_cork) { sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; } asoc = cmd->obj.asoc; break; default: pr_warn("Impossible command: %u\n", cmd->verb); break; } if (error) { cmd = sctp_next_cmd(commands); while (cmd) { if (cmd->verb == SCTP_CMD_REPLY) sctp_chunk_free(cmd->obj.chunk); cmd = sctp_next_cmd(commands); } break; } } /* If this is in response to a received chunk, wait until * we are done with the packet to open the queue so that we don't * send multiple packets in response to a single request. */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) sctp_outq_uncork(&asoc->outqueue, gfp); if (sp->data_ready_signalled) sp->data_ready_signalled = 0; return error; }
43 44 43 44 1 1 1 43 42 27 27 26 27 43 41 43 42 41 44 43 43 43 44 44 44 44 2 2 42 42 2 38 24 22 1 22 20 2 38 1 1 1 1 1 1 35 39 37 37 38 35 31 10 9 10 39 39 39 37 33 34 36 35 35 13 15 1 1 36 34 35 35 34 36 35 34 10 9 35 36 21 21 20 20 22 22 20 20 22 21 35 38 37 36 35 35 7 37 10 39 9 39 22 8 7 9 9 9 8 8 10 9 10 23 34 34 37 35 38 34 39 37 36 35 1 1 2 37 37 36 8 37 8 8 6 2 2 2 2 2 6 6 6 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 39 38 1 2 2 2 1 1 1 1 1 1 36 4 4 4 4 4 37 36 36 35 26 4 4 4 4 4 4 36 34 36 8 7 1 2 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 35 33 33 33 33 33 5 35 6 1 1 1 6 5 6 4 4 4 4 29 28 28 4 4 27 1 1 26 12 29 6 12 7 12 39 39 39 38 36 39 39 36 39 38 36 37 37 35 6 6 29 29 39 37 36 39 39 39 36 43 36 35 11 44 42 42 19 4 11 44 12 42 41 42 38 39 37 19 15 8 17 3 3 3 6 6 6 6 2 6 6 1 1 1 1 1 1 1 27 4 4 4 4 4 4 35 44 42 37 35 44 43 39 38 39 41 42 41 38 42 43 39 39 4 38 43 44 38 29 43 3 4 4 1 1 1 1 1 42 43 40 43 42 40 7 44 43 43 4 4 44 43 20 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/jiffies.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/rcupdate.h> #include <linux/export.h> #include <linux/kcov.h> #include <linux/bitops.h> #include <kunit/visibility.h> #include <net/mac80211.h> #include <net/ieee80211_radiotap.h> #include <linux/unaligned.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "led.h" #include "mesh.h" #include "wep.h" #include "wpa.h" #include "tkip.h" #include "wme.h" #include "rate.h" /* * monitor mode reception * * This function cleans up the SKB, i.e. it removes all the stuff * only useful for monitoring. */ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, unsigned int present_fcs_len, unsigned int rtap_space) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr; unsigned int hdrlen; __le16 fc; if (present_fcs_len) __pskb_trim(skb, skb->len - present_fcs_len); pskb_pull(skb, rtap_space); /* After pulling radiotap header, clear all flags that indicate * info in skb->data. */ status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END | RX_FLAG_RADIOTAP_LSIG | RX_FLAG_RADIOTAP_HE_MU | RX_FLAG_RADIOTAP_HE); hdr = (void *)skb->data; fc = hdr->frame_control; /* * Remove the HT-Control field (if present) on management * frames after we've sent the frame to monitoring. We * (currently) don't need it, and don't properly parse * frames with it present, due to the assumption of a * fixed management header length. */ if (likely(!ieee80211_is_mgmt(fc) || !ieee80211_has_order(fc))) return skb; hdrlen = ieee80211_hdrlen(fc); hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_ORDER); if (!pskb_may_pull(skb, hdrlen)) { dev_kfree_skb(skb); return NULL; } memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data, hdrlen - IEEE80211_HT_CTL_LEN); pskb_pull(skb, IEEE80211_HT_CTL_LEN); return skb; } static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len, unsigned int rtap_space) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr; hdr = (void *)(skb->data + rtap_space); if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC | RX_FLAG_ONLY_MONITOR | RX_FLAG_NO_PSDU)) return true; if (unlikely(skb->len < 16 + present_fcs_len + rtap_space)) return true; if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && !ieee80211_is_back_req(hdr->frame_control)) return true; return false; } static int ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, struct ieee80211_rx_status *status, struct sk_buff *skb) { int len; /* always present fields */ len = sizeof(struct ieee80211_radiotap_header) + 8; /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); len += 8; } if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) len += 1; /* antenna field, if we don't have per-chain info */ if (!status->chains) len += 1; /* padding for RX_FLAGS if necessary */ len = ALIGN(len, 2); if (status->encoding == RX_ENC_HT) /* HT info */ len += 3; if (status->flag & RX_FLAG_AMPDU_DETAILS) { len = ALIGN(len, 4); len += 8; } if (status->encoding == RX_ENC_VHT) { len = ALIGN(len, 2); len += 12; } if (local->hw.radiotap_timestamp.units_pos >= 0) { len = ALIGN(len, 8); len += 12; } if (status->encoding == RX_ENC_HE && status->flag & RX_FLAG_RADIOTAP_HE) { len = ALIGN(len, 2); len += 12; BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) != 12); } if (status->encoding == RX_ENC_HE && status->flag & RX_FLAG_RADIOTAP_HE_MU) { len = ALIGN(len, 2); len += 12; BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12); } if (status->flag & RX_FLAG_NO_PSDU) len += 1; if (status->flag & RX_FLAG_RADIOTAP_LSIG) { len = ALIGN(len, 2); len += 4; BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_lsig) != 4); } if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); } if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { int tlv_offset = 0; /* * The position to look at depends on the existence (or non- * existence) of other elements, so take that into account... */ if (status->flag & RX_FLAG_RADIOTAP_HE) tlv_offset += sizeof(struct ieee80211_radiotap_he); if (status->flag & RX_FLAG_RADIOTAP_HE_MU) tlv_offset += sizeof(struct ieee80211_radiotap_he_mu); if (status->flag & RX_FLAG_RADIOTAP_LSIG) tlv_offset += sizeof(struct ieee80211_radiotap_lsig); /* ensure 4 byte alignment for TLV */ len = ALIGN(len, 4); /* TLVs until the mac header */ len += skb_mac_header(skb) - &skb->data[tlv_offset]; } return len; } static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, int link_id, struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); if (link_id >= 0) { status->link_valid = 1; status->link_id = link_id; } else { status->link_valid = 0; } skb_queue_tail(&sdata->skb_queue, skb); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); if (sta) { struct link_sta_info *link_sta_info; if (link_id >= 0) { link_sta_info = rcu_dereference(sta->link[link_id]); if (!link_sta_info) return; } else { link_sta_info = &sta->deflink; } link_sta_info->rx_stats.packets++; } } static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, int link_id, struct sta_info *sta, struct sk_buff *skb) { skb->protocol = 0; __ieee80211_queue_skb_to_iface(sdata, link_id, sta, skb); } static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int rtap_space) { struct { struct ieee80211_hdr_3addr hdr; u8 category; u8 action_code; } __packed __aligned(2) action; if (!sdata) return; BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1); if (skb->len < rtap_space + sizeof(action) + VHT_MUMIMO_GROUPS_DATA_LEN) return; if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr)) return; skb_copy_bits(skb, rtap_space, &action, sizeof(action)); if (!ieee80211_is_action(action.hdr.frame_control)) return; if (action.category != WLAN_CATEGORY_VHT) return; if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT) return; if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr)) return; skb = skb_copy(skb, GFP_ATOMIC); if (!skb) return; ieee80211_queue_skb_to_iface(sdata, -1, NULL, skb); } /* * ieee80211_add_rx_radiotap_header - add radiotap header * * add a radiotap header containing all the fields which the hardware provided. */ static void ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_rate *rate, int rtap_len, bool has_fcs) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_header *rthdr; unsigned char *pos; __le32 *it_present; u32 it_present_val; u16 rx_flags = 0; u16 channel_flags = 0; u32 tlvs_len = 0; int mpdulen, chain; unsigned long chains = status->chains; struct ieee80211_radiotap_he he = {}; struct ieee80211_radiotap_he_mu he_mu = {}; struct ieee80211_radiotap_lsig lsig = {}; if (status->flag & RX_FLAG_RADIOTAP_HE) { he = *(struct ieee80211_radiotap_he *)skb->data; skb_pull(skb, sizeof(he)); WARN_ON_ONCE(status->encoding != RX_ENC_HE); } if (status->flag & RX_FLAG_RADIOTAP_HE_MU) { he_mu = *(struct ieee80211_radiotap_he_mu *)skb->data; skb_pull(skb, sizeof(he_mu)); } if (status->flag & RX_FLAG_RADIOTAP_LSIG) { lsig = *(struct ieee80211_radiotap_lsig *)skb->data; skb_pull(skb, sizeof(lsig)); } if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { /* data is pointer at tlv all other info was pulled off */ tlvs_len = skb_mac_header(skb) - skb->data; } mpdulen = skb->len; if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; rthdr = skb_push(skb, rtap_len - tlvs_len); memset(rthdr, 0, rtap_len - tlvs_len); it_present = &rthdr->it_present; /* radiotap header, set always present flags */ rthdr->it_len = cpu_to_le16(rtap_len); it_present_val = BIT(IEEE80211_RADIOTAP_FLAGS) | BIT(IEEE80211_RADIOTAP_CHANNEL) | BIT(IEEE80211_RADIOTAP_RX_FLAGS); if (!status->chains) it_present_val |= BIT(IEEE80211_RADIOTAP_ANTENNA); for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { it_present_val |= BIT(IEEE80211_RADIOTAP_EXT) | BIT(IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE); put_unaligned_le32(it_present_val, it_present); it_present++; it_present_val = BIT(IEEE80211_RADIOTAP_ANTENNA) | BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL); } if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) it_present_val |= BIT(IEEE80211_RADIOTAP_TLV); put_unaligned_le32(it_present_val, it_present); /* This references through an offset into it_optional[] rather * than via it_present otherwise later uses of pos will cause * the compiler to think we have walked past the end of the * struct member. */ pos = (void *)&rthdr->it_optional[it_present + 1 - rthdr->it_optional]; /* the order of the following fields is important */ /* IEEE80211_RADIOTAP_TSFT */ if (ieee80211_have_rx_timestamp(status)) { /* padding */ while ((pos - (u8 *)rthdr) & 7) *pos++ = 0; put_unaligned_le64( ieee80211_calculate_rx_timestamp(local, status, mpdulen, 0), pos); rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT)); pos += 8; } /* IEEE80211_RADIOTAP_FLAGS */ if (has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) *pos |= IEEE80211_RADIOTAP_F_FCS; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) *pos |= IEEE80211_RADIOTAP_F_BADFCS; if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; pos++; /* IEEE80211_RADIOTAP_RATE */ if (!rate || status->encoding != RX_ENC_LEGACY) { /* * Without rate information don't add it. If we have, * MCS information is a separate field in radiotap, * added below. The byte here is needed as padding * for the channel though, so initialise it to 0. */ *pos = 0; } else { int shift = 0; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE)); if (status->bw == RATE_INFO_BW_10) shift = 1; else if (status->bw == RATE_INFO_BW_5) shift = 2; *pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift)); } pos++; /* IEEE80211_RADIOTAP_CHANNEL */ /* TODO: frequency offset in KHz */ put_unaligned_le16(status->freq, pos); pos += 2; if (status->bw == RATE_INFO_BW_10) channel_flags |= IEEE80211_CHAN_HALF; else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; if (status->band == NL80211_BAND_5GHZ || status->band == NL80211_BAND_6GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; else if (rate && rate->flags & IEEE80211_RATE_ERP_G) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; else if (rate) channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ; else channel_flags |= IEEE80211_CHAN_2GHZ; put_unaligned_le16(channel_flags, pos); pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ if (ieee80211_hw_check(&local->hw, SIGNAL_DBM) && !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { *pos = status->signal; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL)); pos++; } /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ if (!status->chains) { /* IEEE80211_RADIOTAP_ANTENNA */ *pos = status->antenna; pos++; } /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ /* IEEE80211_RADIOTAP_RX_FLAGS */ /* ensure 2 byte alignment for the 2 byte field as required */ if ((pos - (u8 *)rthdr) & 1) *pos++ = 0; if (status->flag & RX_FLAG_FAILED_PLCP_CRC) rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; put_unaligned_le16(rx_flags, pos); pos += 2; if (status->encoding == RX_ENC_HT) { unsigned int stbc; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); *pos = local->hw.radiotap_mcs_details; if (status->enc_flags & RX_ENC_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FMT; if (status->enc_flags & RX_ENC_FLAG_LDPC) *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FEC; pos++; *pos = 0; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_MCS_SGI; if (status->bw == RATE_INFO_BW_40) *pos |= IEEE80211_RADIOTAP_MCS_BW_40; if (status->enc_flags & RX_ENC_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; if (status->enc_flags & RX_ENC_FLAG_LDPC) *pos |= IEEE80211_RADIOTAP_MCS_FEC_LDPC; stbc = (status->enc_flags & RX_ENC_FLAG_STBC_MASK) >> RX_ENC_FLAG_STBC_SHIFT; *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; } if (status->flag & RX_FLAG_AMPDU_DETAILS) { u16 flags = 0; /* ensure 4 byte alignment */ while ((pos - (u8 *)rthdr) & 3) pos++; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_AMPDU_STATUS)); put_unaligned_le32(status->ampdu_reference, pos); pos += 4; if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN; if (status->flag & RX_FLAG_AMPDU_IS_LAST) flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST; if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR) flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN; if (status->flag & RX_FLAG_AMPDU_EOF_BIT) flags |= IEEE80211_RADIOTAP_AMPDU_EOF; put_unaligned_le16(flags, pos); pos += 2; *pos++ = 0; *pos++ = 0; } if (status->encoding == RX_ENC_VHT) { u16 known = local->hw.radiotap_vht_details; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); put_unaligned_le16(known, pos); pos += 2; /* flags */ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; /* in VHT, STBC is binary */ if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC; if (status->enc_flags & RX_ENC_FLAG_BF) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED; pos++; /* bandwidth */ switch (status->bw) { case RATE_INFO_BW_80: *pos++ = 4; break; case RATE_INFO_BW_160: *pos++ = 11; break; case RATE_INFO_BW_40: *pos++ = 1; break; default: *pos++ = 0; } /* MCS/NSS */ *pos = (status->rate_idx << 4) | status->nss; pos += 4; /* coding field */ if (status->enc_flags & RX_ENC_FLAG_LDPC) *pos |= IEEE80211_RADIOTAP_CODING_LDPC_USER0; pos++; /* group ID */ pos++; /* partial_aid */ pos += 2; } if (local->hw.radiotap_timestamp.units_pos >= 0) { u16 accuracy = 0; u8 flags; u64 ts; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP)); /* ensure 8 byte alignment */ while ((pos - (u8 *)rthdr) & 7) pos++; if (status->flag & RX_FLAG_MACTIME_IS_RTAP_TS64) { flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT; ts = status->mactime; } else { flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; ts = status->device_timestamp; } put_unaligned_le64(ts, pos); pos += sizeof(u64); if (local->hw.radiotap_timestamp.accuracy >= 0) { accuracy = local->hw.radiotap_timestamp.accuracy; flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY; } put_unaligned_le16(accuracy, pos); pos += sizeof(u16); *pos++ = local->hw.radiotap_timestamp.units_pos; *pos++ = flags; } if (status->encoding == RX_ENC_HE && status->flag & RX_FLAG_RADIOTAP_HE) { #define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f) if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) { he.data6 |= HE_PREP(DATA6_NSTS, FIELD_GET(RX_ENC_FLAG_STBC_MASK, status->enc_flags)); he.data3 |= HE_PREP(DATA3_STBC, 1); } else { he.data6 |= HE_PREP(DATA6_NSTS, status->nss); } #define CHECK_GI(s) \ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \ (int)NL80211_RATE_INFO_HE_GI_##s) CHECK_GI(0_8); CHECK_GI(1_6); CHECK_GI(3_2); he.data3 |= HE_PREP(DATA3_DATA_MCS, status->rate_idx); he.data3 |= HE_PREP(DATA3_DATA_DCM, status->he_dcm); he.data3 |= HE_PREP(DATA3_CODING, !!(status->enc_flags & RX_ENC_FLAG_LDPC)); he.data5 |= HE_PREP(DATA5_GI, status->he_gi); switch (status->bw) { case RATE_INFO_BW_20: he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ); break; case RATE_INFO_BW_40: he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ); break; case RATE_INFO_BW_80: he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ); break; case RATE_INFO_BW_160: he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ); break; case RATE_INFO_BW_HE_RU: #define CHECK_RU_ALLOC(s) \ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \ NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4) CHECK_RU_ALLOC(26); CHECK_RU_ALLOC(52); CHECK_RU_ALLOC(106); CHECK_RU_ALLOC(242); CHECK_RU_ALLOC(484); CHECK_RU_ALLOC(996); CHECK_RU_ALLOC(2x996); he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, status->he_ru + 4); break; default: WARN_ONCE(1, "Invalid SU BW %d\n", status->bw); } /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE)); memcpy(pos, &he, sizeof(he)); pos += sizeof(he); } if (status->encoding == RX_ENC_HE && status->flag & RX_FLAG_RADIOTAP_HE_MU) { /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE_MU)); memcpy(pos, &he_mu, sizeof(he_mu)); pos += sizeof(he_mu); } if (status->flag & RX_FLAG_NO_PSDU) { rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_ZERO_LEN_PSDU)); *pos++ = status->zero_length_psdu_type; } if (status->flag & RX_FLAG_RADIOTAP_LSIG) { /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_LSIG)); memcpy(pos, &lsig, sizeof(lsig)); pos += sizeof(lsig); } for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; } } static struct sk_buff * ieee80211_make_monitor_skb(struct ieee80211_local *local, struct sk_buff **origskb, struct ieee80211_rate *rate, int rtap_space, bool use_origskb) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(*origskb); int rt_hdrlen, needed_headroom; struct sk_buff *skb; /* room for the radiotap header based on driver features */ rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, *origskb); needed_headroom = rt_hdrlen - rtap_space; if (use_origskb) { /* only need to expand headroom if necessary */ skb = *origskb; *origskb = NULL; /* * This shouldn't trigger often because most devices have an * RX header they pull before we get here, and that should * be big enough for our radiotap information. We should * probably export the length to drivers so that we can have * them allocate enough headroom to start with. */ if (skb_headroom(skb) < needed_headroom && pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return NULL; } } else { /* * Need to make a copy and possibly remove radiotap header * and FCS from the original. */ skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD, 0, GFP_ATOMIC); if (!skb) return NULL; } /* prepend radiotap information */ ieee80211_add_rx_radiotap_header(local, skb, rate, rt_hdrlen, true); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->pkt_type = PACKET_OTHERHOST; skb->protocol = htons(ETH_P_802_2); return skb; } /* * This function copies a received frame to all monitor interfaces and * returns a cleaned-up SKB that no longer includes the FCS nor the * radiotap header the driver might have added. */ static struct sk_buff * ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct ieee80211_rate *rate) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb); struct ieee80211_sub_if_data *sdata, *prev_sdata = NULL; struct sk_buff *skb, *monskb = NULL; int present_fcs_len = 0; unsigned int rtap_space = 0; struct ieee80211_sub_if_data *monitor_sdata = rcu_dereference(local->monitor_sdata); bool only_monitor = false; unsigned int min_head_len; if (WARN_ON_ONCE(status->flag & RX_FLAG_RADIOTAP_TLV_AT_END && !skb_mac_header_was_set(origskb))) { /* with this skb no way to know where frame payload starts */ dev_kfree_skb(origskb); return NULL; } if (status->flag & RX_FLAG_RADIOTAP_HE) rtap_space += sizeof(struct ieee80211_radiotap_he); if (status->flag & RX_FLAG_RADIOTAP_HE_MU) rtap_space += sizeof(struct ieee80211_radiotap_he_mu); if (status->flag & RX_FLAG_RADIOTAP_LSIG) rtap_space += sizeof(struct ieee80211_radiotap_lsig); if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) rtap_space += skb_mac_header(origskb) - &origskb->data[rtap_space]; min_head_len = rtap_space; /* * First, we may need to make a copy of the skb because * (1) we need to modify it for radiotap (if not present), and * (2) the other RX handlers will modify the skb we got. * * We don't need to, of course, if we aren't going to return * the SKB because it has a bad FCS/PLCP checksum. */ if (!(status->flag & RX_FLAG_NO_PSDU)) { if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) { if (unlikely(origskb->len <= FCS_LEN + rtap_space)) { /* driver bug */ WARN_ON(1); dev_kfree_skb(origskb); return NULL; } present_fcs_len = FCS_LEN; } /* also consider the hdr->frame_control */ min_head_len += 2; } /* ensure that the expected data elements are in skb head */ if (!pskb_may_pull(origskb, min_head_len)) { dev_kfree_skb(origskb); return NULL; } only_monitor = should_drop_frame(origskb, present_fcs_len, rtap_space); if (!local->monitors || (status->flag & RX_FLAG_SKIP_MONITOR)) { if (only_monitor) { dev_kfree_skb(origskb); return NULL; } return ieee80211_clean_skb(origskb, present_fcs_len, rtap_space); } ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space); list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) { struct cfg80211_chan_def *chandef; chandef = &sdata->vif.bss_conf.chanreq.oper; if (chandef->chan && chandef->chan->center_freq != status->freq) continue; if (!prev_sdata) { prev_sdata = sdata; continue; } if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) ieee80211_handle_mu_mimo_mon(sdata, origskb, rtap_space); if (!monskb) monskb = ieee80211_make_monitor_skb(local, &origskb, rate, rtap_space, false); if (!monskb) continue; skb = skb_clone(monskb, GFP_ATOMIC); if (!skb) continue; skb->dev = prev_sdata->dev; dev_sw_netstats_rx_add(skb->dev, skb->len); netif_receive_skb(skb); prev_sdata = sdata; } if (prev_sdata) { if (monskb) skb = monskb; else skb = ieee80211_make_monitor_skb(local, &origskb, rate, rtap_space, only_monitor); if (skb) { skb->dev = prev_sdata->dev; dev_sw_netstats_rx_add(skb->dev, skb->len); netif_receive_skb(skb); } } if (!origskb) return NULL; return ieee80211_clean_skb(origskb, present_fcs_len, rtap_space); } static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); int tid, seqno_idx, security_idx; /* does the frame have a qos control field? */ if (ieee80211_is_data_qos(hdr->frame_control)) { u8 *qc = ieee80211_get_qos_ctl(hdr); /* frame has qos control */ tid = *qc & IEEE80211_QOS_CTL_TID_MASK; if (*qc & IEEE80211_QOS_CTL_A_MSDU_PRESENT) status->rx_flags |= IEEE80211_RX_AMSDU; seqno_idx = tid; security_idx = tid; } else { /* * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): * * Sequence numbers for management frames, QoS data * frames with a broadcast/multicast address in the * Address 1 field, and all non-QoS data frames sent * by QoS STAs are assigned using an additional single * modulo-4096 counter, [...] * * We also use that counter for non-QoS STAs. */ seqno_idx = IEEE80211_NUM_TIDS; security_idx = 0; if (ieee80211_is_mgmt(hdr->frame_control)) security_idx = IEEE80211_NUM_TIDS; tid = 0; } rx->seqno_idx = seqno_idx; rx->security_idx = security_idx; /* Set skb->priority to 1d tag if highest order bit of TID is not set. * For now, set skb->priority to 0 for other cases. */ rx->skb->priority = (tid > 7) ? 0 : tid; } /** * DOC: Packet alignment * * Drivers always need to pass packets that are aligned to two-byte boundaries * to the stack. * * Additionally, they should, if possible, align the payload data in a way that * guarantees that the contained IP header is aligned to a four-byte * boundary. In the case of regular frames, this simply means aligning the * payload to a four-byte boundary (because either the IP header is directly * contained, or IV/RFC1042 headers that have a length divisible by four are * in front of it). If the payload data is not properly aligned and the * architecture doesn't support efficient unaligned operations, mac80211 * will align the data. * * With A-MSDU frames, however, the payload data address must yield two modulo * four because there are 14-byte 802.3 headers within the A-MSDU frames that * push the IP header further back to a multiple of four again. Thankfully, the * specs were sane enough this time around to require padding each A-MSDU * subframe to a length that is a multiple of four. * * Padding like Atheros hardware adds which is between the 802.11 header and * the payload is not supported; the driver is required to move the 802.11 * header to be directly in front of the payload in that case. */ static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG WARN_ON_ONCE((unsigned long)rx->skb->data & 1); #endif } /* rx handlers */ static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (is_multicast_ether_addr(hdr->addr1)) return 0; return ieee80211_is_robust_mgmt_frame(skb); } static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (!is_multicast_ether_addr(hdr->addr1)) return 0; return ieee80211_is_robust_mgmt_frame(skb); } /* Get the BIP key index from MMIE; return -1 if this is not a BIP frame */ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) { struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data; struct ieee80211_mmie *mmie; struct ieee80211_mmie_16 *mmie16; if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da)) return -1; if (!ieee80211_is_robust_mgmt_frame(skb) && !ieee80211_is_beacon(hdr->frame_control)) return -1; /* not a robust management frame */ mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id == WLAN_EID_MMIE && mmie->length == sizeof(*mmie) - 2) return le16_to_cpu(mmie->key_id); mmie16 = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie16)); if (skb->len >= 24 + sizeof(*mmie16) && mmie16->element_id == WLAN_EID_MMIE && mmie16->length == sizeof(*mmie16) - 2) return le16_to_cpu(mmie16->key_id); return -1; } static int ieee80211_get_keyid(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; int hdrlen = ieee80211_hdrlen(fc); u8 keyid; /* WEP, TKIP, CCMP and GCMP */ if (unlikely(skb->len < hdrlen + IEEE80211_WEP_IV_LEN)) return -EINVAL; skb_copy_bits(skb, hdrlen + 3, &keyid, 1); keyid >>= 6; return keyid; } static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; char *dev_addr = rx->sdata->vif.addr; if (ieee80211_is_data(hdr->frame_control)) { if (is_multicast_ether_addr(hdr->addr1)) { if (ieee80211_has_tods(hdr->frame_control) || !ieee80211_has_fromds(hdr->frame_control)) return RX_DROP; if (ether_addr_equal(hdr->addr3, dev_addr)) return RX_DROP; } else { if (!ieee80211_has_a4(hdr->frame_control)) return RX_DROP; if (ether_addr_equal(hdr->addr4, dev_addr)) return RX_DROP; } } /* If there is not an established peer link and this is not a peer link * establisment frame, beacon or probe, drop the frame. */ if (!rx->sta || sta_plink_state(rx->sta) != NL80211_PLINK_ESTAB) { struct ieee80211_mgmt *mgmt; if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_DROP; if (ieee80211_is_action(hdr->frame_control)) { u8 category; /* make sure category field is present */ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) return RX_DROP; mgmt = (struct ieee80211_mgmt *)hdr; category = mgmt->u.action.category; if (category != WLAN_CATEGORY_MESH_ACTION && category != WLAN_CATEGORY_SELF_PROTECTED) return RX_DROP; return RX_CONTINUE; } if (ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || ieee80211_is_beacon(hdr->frame_control) || ieee80211_is_auth(hdr->frame_control)) return RX_CONTINUE; return RX_DROP; } return RX_CONTINUE; } static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx, int index) { struct sk_buff_head *frames = &tid_agg_rx->reorder_buf[index]; struct sk_buff *tail = skb_peek_tail(frames); struct ieee80211_rx_status *status; if (tid_agg_rx->reorder_buf_filtered && tid_agg_rx->reorder_buf_filtered & BIT_ULL(index)) return true; if (!tail) return false; status = IEEE80211_SKB_RXCB(tail); if (status->flag & RX_FLAG_AMSDU_MORE) return false; return true; } static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, int index, struct sk_buff_head *frames) { struct sk_buff_head *skb_list = &tid_agg_rx->reorder_buf[index]; struct sk_buff *skb; struct ieee80211_rx_status *status; lockdep_assert_held(&tid_agg_rx->reorder_lock); if (skb_queue_empty(skb_list)) goto no_frame; if (!ieee80211_rx_reorder_ready(tid_agg_rx, index)) { __skb_queue_purge(skb_list); goto no_frame; } /* release frames from the reorder ring buffer */ tid_agg_rx->stored_mpdu_num--; while ((skb = __skb_dequeue(skb_list))) { status = IEEE80211_SKB_RXCB(skb); status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; __skb_queue_tail(frames, skb); } no_frame: if (tid_agg_rx->reorder_buf_filtered) tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index); tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); } static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, u16 head_seq_num, struct sk_buff_head *frames) { int index; lockdep_assert_held(&tid_agg_rx->reorder_lock); while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); } } /* * Timeout (in jiffies) for skb's that are waiting in the RX reorder buffer. If * the skb was added to the buffer longer than this time ago, the earlier * frames that have not yet been received are assumed to be lost and the skb * can be released for processing. This may also release other skb's from the * reorder buffer if there are no additional gaps between the frames. * * Callers must hold tid_agg_rx->reorder_lock. */ #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff_head *frames) { int index, i, j; lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; if (!ieee80211_rx_reorder_ready(tid_agg_rx, index) && tid_agg_rx->stored_mpdu_num) { /* * No buffers ready to be released, but check whether any * frames in the reorder buffer have timed out. */ int skipped = 1; for (j = (index + 1) % tid_agg_rx->buf_size; j != index; j = (j + 1) % tid_agg_rx->buf_size) { if (!ieee80211_rx_reorder_ready(tid_agg_rx, j)) { skipped++; continue; } if (skipped && !time_after(jiffies, tid_agg_rx->reorder_time[j] + HT_RX_REORDER_BUF_TIMEOUT)) goto set_release_timer; /* don't leave incomplete A-MSDUs around */ for (i = (index + 1) % tid_agg_rx->buf_size; i != j; i = (i + 1) % tid_agg_rx->buf_size) __skb_queue_purge(&tid_agg_rx->reorder_buf[i]); ht_dbg_ratelimited(sdata, "release an RX reorder frame due to timeout on earlier frames\n"); ieee80211_release_reorder_frame(sdata, tid_agg_rx, j, frames); /* * Increment the head seq# also for the skipped slots. */ tid_agg_rx->head_seq_num = (tid_agg_rx->head_seq_num + skipped) & IEEE80211_SN_MASK; skipped = 0; } } else while (ieee80211_rx_reorder_ready(tid_agg_rx, index)) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { j = index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { if (ieee80211_rx_reorder_ready(tid_agg_rx, j)) break; } set_release_timer: if (!tid_agg_rx->removed) mod_timer(&tid_agg_rx->reorder_timer, tid_agg_rx->reorder_time[j] + 1 + HT_RX_REORDER_BUF_TIMEOUT); } else { timer_delete(&tid_agg_rx->reorder_timer); } } /* * As this function belongs to the RX path it must be under * rcu_read_lock protection. It returns false if the frame * can be processed immediately, true if it was consumed. */ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, struct sk_buff_head *frames) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u16 mpdu_seq_num = ieee80211_get_sn(hdr); u16 head_seq_num, buf_size; int index; bool ret = true; spin_lock(&tid_agg_rx->reorder_lock); /* * Offloaded BA sessions have no known starting sequence number so pick * one from first Rxed frame for this tid after BA was started. */ if (unlikely(tid_agg_rx->auto_seq)) { tid_agg_rx->auto_seq = false; tid_agg_rx->ssn = mpdu_seq_num; tid_agg_rx->head_seq_num = mpdu_seq_num; } buf_size = tid_agg_rx->buf_size; head_seq_num = tid_agg_rx->head_seq_num; /* * If the current MPDU's SN is smaller than the SSN, it shouldn't * be reordered. */ if (unlikely(!tid_agg_rx->started)) { if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { ret = false; goto out; } tid_agg_rx->started = true; } /* frame with out of date sequence number */ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); goto out; } /* * If frame the sequence number exceeds our buffering window * size release some previous frames to make room for this one. */ if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) { head_seq_num = ieee80211_sn_inc( ieee80211_sn_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, head_seq_num, frames); } /* Now the new frame is always in the range of the reordering buffer */ index = mpdu_seq_num % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (ieee80211_rx_reorder_ready(tid_agg_rx, index)) { dev_kfree_skb(skb); goto out; } /* * If the current MPDU is in the right order and nothing else * is stored we can process it directly, no need to buffer it. * If it is first but there's something stored, we may be able * to release frames after this one. */ if (mpdu_seq_num == tid_agg_rx->head_seq_num && tid_agg_rx->stored_mpdu_num == 0) { if (!(status->flag & RX_FLAG_AMSDU_MORE)) tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); ret = false; goto out; } /* put the frame in the reordering buffer */ __skb_queue_tail(&tid_agg_rx->reorder_buf[index], skb); if (!(status->flag & RX_FLAG_AMSDU_MORE)) { tid_agg_rx->reorder_time[index] = jiffies; tid_agg_rx->stored_mpdu_num++; ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames); } out: spin_unlock(&tid_agg_rx->reorder_lock); return ret; } /* * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns * true if the MPDU was buffered, false if it should be processed. */ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct sta_info *sta = rx->sta; struct tid_ampdu_rx *tid_agg_rx; u16 sc; u8 tid, ack_policy; if (!ieee80211_is_data_qos(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* * filter the QoS data rx stream according to * STA/TID and check if this STA/TID is on aggregation */ if (!sta) goto dont_reorder; ack_policy = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_ACK_POLICY_MASK; tid = ieee80211_get_tid(hdr); tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) { if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && !test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_REQUIRE_SETUP); goto dont_reorder; } /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) goto dont_reorder; /* not part of a BA session */ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ /* reset session timer */ if (tid_agg_rx->timeout) tid_agg_rx->last_rx = jiffies; /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { ieee80211_queue_skb_to_iface(rx->sdata, rx->link_id, NULL, skb); return; } /* * No locking needed -- we will only ever process one * RX packet at a time, and thus own tid_agg_rx. All * other code manipulating it needs to (and does) make * sure that we cannot get to it any more before doing * anything with it. */ if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb, frames)) return; dont_reorder: __skb_queue_tail(frames, skb); } static ieee80211_rx_result debug_noinline ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); if (status->flag & RX_FLAG_DUP_VALIDATED) return RX_CONTINUE; /* * Drop duplicate 802.11 retransmissions * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") */ if (rx->skb->len < 24) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || ieee80211_is_any_nullfunc(hdr->frame_control)) return RX_CONTINUE; if (!rx->sta) return RX_CONTINUE; if (unlikely(is_multicast_ether_addr(hdr->addr1))) { struct ieee80211_sub_if_data *sdata = rx->sdata; u16 sn = ieee80211_get_sn(hdr); if (!ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; if (!ieee80211_vif_is_mld(&sdata->vif) || sdata->vif.type != NL80211_IFTYPE_STATION) return RX_CONTINUE; if (sdata->u.mgd.mcast_seq_last != IEEE80211_SN_MODULO && ieee80211_sn_less_eq(sn, sdata->u.mgd.mcast_seq_last)) return RX_DROP_U_DUP; sdata->u.mgd.mcast_seq_last = sn; return RX_CONTINUE; } if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); rx->link_sta->rx_stats.num_duplicates++; return RX_DROP_U_DUP; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; } return RX_CONTINUE; } static ieee80211_rx_result debug_noinline ieee80211_rx_h_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; /* Drop disallowed frame classes based on STA auth/assoc state; * IEEE 802.11, Chap 5.5. * * mac80211 filters only based on association state, i.e. it drops * Class 3 frames from not associated stations. hostapd sends * deauth/disassoc frames when needed. In addition, hostapd is * responsible for filtering on both auth and assoc states. */ if (ieee80211_vif_is_mesh(&rx->sdata->vif)) return ieee80211_rx_mesh_check(rx); if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && rx->sdata->vif.type != NL80211_IFTYPE_OCB && (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))) { /* * accept port control frames from the AP even when it's not * yet marked ASSOC to prevent a race where we don't set the * assoc bit quickly enough before it sends the first frame */ if (rx->sta && rx->sdata->vif.type == NL80211_IFTYPE_STATION && ieee80211_is_data_present(hdr->frame_control)) { unsigned int hdrlen; __be16 ethertype; hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len < hdrlen + 8) return RX_DROP; skb_copy_bits(rx->skb, hdrlen + 6, &ethertype, 2); if (ethertype == rx->sdata->control_port_protocol) return RX_CONTINUE; } if (rx->sdata->vif.type == NL80211_IFTYPE_AP && cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2, rx->link_id, GFP_ATOMIC)) return RX_DROP_U_SPURIOUS; return RX_DROP; } return RX_CONTINUE; } static ieee80211_rx_result debug_noinline ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx) { struct ieee80211_local *local; struct ieee80211_hdr *hdr; struct sk_buff *skb; local = rx->local; skb = rx->skb; hdr = (struct ieee80211_hdr *) skb->data; if (!local->pspolling) return RX_CONTINUE; if (!ieee80211_has_fromds(hdr->frame_control)) /* this is not from AP */ return RX_CONTINUE; if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!ieee80211_has_moredata(hdr->frame_control)) { /* AP has no more frames buffered for us */ local->pspolling = false; return RX_CONTINUE; } /* more data bit is set, let's request a new frame from the AP */ ieee80211_send_pspoll(local, rx->sdata); return RX_CONTINUE; } static void sta_ps_start(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ps_data *ps; int tid; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; else return; atomic_inc(&ps->num_sta_ps); set_sta_flag(sta, WLAN_STA_PS_STA); if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); ieee80211_clear_fast_xmit(sta); for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi = to_txq_info(txq); spin_lock(&local->active_txq_lock[txq->ac]); if (!list_empty(&txqi->schedule_order)) list_del_init(&txqi->schedule_order); spin_unlock(&local->active_txq_lock[txq->ac]); if (txq_has_queue(txq)) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); } } static void sta_ps_end(struct sta_info *sta) { ps_dbg(sta->sdata, "STA %pM aid %d exits power save mode\n", sta->sta.addr, sta->sta.aid); if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { /* * Clear the flag only if the other one is still set * so that the TX path won't start TX'ing new frames * directly ... In the case that the driver flag isn't * set ieee80211_sta_ps_deliver_wakeup() will clear it. */ clear_sta_flag(sta, WLAN_STA_PS_STA); ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", sta->sta.addr, sta->sta.aid); return; } set_sta_flag(sta, WLAN_STA_PS_DELIVER); clear_sta_flag(sta, WLAN_STA_PS_STA); ieee80211_sta_ps_deliver_wakeup(sta); } int ieee80211_sta_ps_transition(struct ieee80211_sta *pubsta, bool start) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); bool in_ps; WARN_ON(!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS)); /* Don't let the same PS state be set twice */ in_ps = test_sta_flag(sta, WLAN_STA_PS_STA); if ((start && in_ps) || (!start && !in_ps)) return -EINVAL; if (start) sta_ps_start(sta); else sta_ps_end(sta); return 0; } EXPORT_SYMBOL(ieee80211_sta_ps_transition); void ieee80211_sta_pspoll(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); if (test_sta_flag(sta, WLAN_STA_SP)) return; if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) ieee80211_sta_ps_deliver_poll_response(sta); else set_sta_flag(sta, WLAN_STA_PSPOLL); } EXPORT_SYMBOL(ieee80211_sta_pspoll); void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); int ac = ieee80211_ac_from_tid(tid); /* * If this AC is not trigger-enabled do nothing unless the * driver is calling us after it already checked. * * NB: This could/should check a separate bitmap of trigger- * enabled queues, but for now we only implement uAPSD w/o * TSPEC changes to the ACs, so they're always the same. */ if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) && tid != IEEE80211_NUM_TIDS) return; /* if we are in a service period, do nothing */ if (test_sta_flag(sta, WLAN_STA_SP)) return; if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) ieee80211_sta_ps_deliver_uapsd(sta); else set_sta_flag(sta, WLAN_STA_UAPSD); } EXPORT_SYMBOL(ieee80211_sta_uapsd_trigger); static ieee80211_rx_result debug_noinline ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (void *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); if (!rx->sta) return RX_CONTINUE; if (sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_AP_VLAN) return RX_CONTINUE; /* * The device handles station powersave, so don't do anything about * uAPSD and PS-Poll frames (the latter shouldn't even come up from * it to mac80211 since they're handled.) */ if (ieee80211_hw_check(&sdata->local->hw, AP_LINK_PS)) return RX_CONTINUE; /* * Don't do anything if the station isn't already asleep. In * the uAPSD case, the station will probably be marked asleep, * in the PS-Poll case the station must be confused ... */ if (!test_sta_flag(rx->sta, WLAN_STA_PS_STA)) return RX_CONTINUE; if (unlikely(ieee80211_is_pspoll(hdr->frame_control))) { ieee80211_sta_pspoll(&rx->sta->sta); /* Free PS Poll skb here instead of returning RX_DROP that would * count as an dropped frame. */ dev_kfree_skb(rx->skb); return RX_QUEUED; } else if (!ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && ieee80211_has_pm(hdr->frame_control) && (ieee80211_is_data_qos(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control))) { u8 tid = ieee80211_get_tid(hdr); ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid); } return RX_CONTINUE; } static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; struct link_sta_info *link_sta = rx->link_sta; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int i; if (!sta || !link_sta) return RX_CONTINUE; /* * Update last_rx only for IBSS packets which are for the current * BSSID and for station already AUTHORIZED to avoid keeping the * current IBSS network alive in cases where other STAs start * using different BSSID. This will also give the station another * chance to restart the authentication/authorization in case * something went wrong the first time. */ if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, NL80211_IFTYPE_ADHOC); if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { link_sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data_present(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { link_sta->rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ link_sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data_present(hdr->frame_control)) link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); } link_sta->rx_stats.fragments++; u64_stats_update_begin(&link_sta->rx_stats.syncp); link_sta->rx_stats.bytes += rx->skb->len; u64_stats_update_end(&link_sta->rx_stats.syncp); if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { link_sta->rx_stats.last_signal = status->signal; ewma_signal_add(&link_sta->rx_stats_avg.signal, -status->signal); } if (status->chains) { link_sta->rx_stats.chains = status->chains; for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { int signal = status->chain_signal[i]; if (!(status->chains & BIT(i))) continue; link_sta->rx_stats.chain_signal_last[i] = signal; ewma_signal_add(&link_sta->rx_stats_avg.chain_signal[i], -signal); } } if (ieee80211_is_s1g_beacon(hdr->frame_control)) return RX_CONTINUE; /* * Change STA power saving mode only at the end of a frame * exchange sequence, and only for a data or management * frame as specified in IEEE 802.11-2016 11.2.3.2 */ if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && !ieee80211_has_morefrags(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1) && (ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_data(hdr->frame_control)) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (!ieee80211_has_pm(hdr->frame_control)) sta_ps_end(sta); } else { if (ieee80211_has_pm(hdr->frame_control)) sta_ps_start(sta); } } /* mesh power save support */ if (ieee80211_vif_is_mesh(&rx->sdata->vif)) ieee80211_mps_rx_h_sta_process(sta, hdr); /* * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. */ if (ieee80211_is_any_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* * If we receive a 4-addr nullfunc frame from a STA * that was not moved to a 4-addr STA vlan yet send * the event to userspace and for older hostapd drop * the frame to the monitor interface. */ if (ieee80211_has_a4(hdr->frame_control) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || (rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !rx->sdata->u.vlan.sta))) { if (!test_and_set_sta_flag(sta, WLAN_STA_4ADDR_EVENT)) cfg80211_rx_unexpected_4addr_frame( rx->sdata->dev, sta->sta.addr, rx->link_id, GFP_ATOMIC); return RX_DROP_U_UNEXPECTED_4ADDR_FRAME; } /* * Update counter and free packet here to avoid * counting this as a dropped packed. */ link_sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } return RX_CONTINUE; } /* ieee80211_rx_h_sta_process */ static struct ieee80211_key * ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) { struct ieee80211_key *key = NULL; int idx2; /* Make sure key gets set if either BIGTK key index is set so that * ieee80211_drop_unencrypted_mgmt() can properly drop both unprotected * Beacon frames and Beacon frames that claim to use another BIGTK key * index (i.e., a key that we do not have). */ if (idx < 0) { idx = NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS; idx2 = idx + 1; } else { if (idx == NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) idx2 = idx + 1; else idx2 = idx - 1; } if (rx->link_sta) key = rcu_dereference(rx->link_sta->gtk[idx]); if (!key) key = rcu_dereference(rx->link->gtk[idx]); if (!key && rx->link_sta) key = rcu_dereference(rx->link_sta->gtk[idx2]); if (!key) key = rcu_dereference(rx->link->gtk[idx2]); return key; } static ieee80211_rx_result debug_noinline ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int keyidx; ieee80211_rx_result result = RX_DROP_U_DECRYPT_FAIL; struct ieee80211_key *sta_ptk = NULL; struct ieee80211_key *ptk_idx = NULL; int mmie_keyidx = -1; __le16 fc; if (ieee80211_is_ext(hdr->frame_control)) return RX_CONTINUE; /* * Key selection 101 * * There are five types of keys: * - GTK (group keys) * - IGTK (group keys for management frames) * - BIGTK (group keys for Beacon frames) * - PTK (pairwise keys) * - STK (station-to-station pairwise keys) * * When selecting a key, we have to distinguish between multicast * (including broadcast) and unicast frames, the latter can only * use PTKs and STKs while the former always use GTKs, IGTKs, and * BIGTKs. Unless, of course, actual WEP keys ("pre-RSNA") are used, * then unicast frames can also use key indices like GTKs. Hence, if we * don't have a PTK/STK we check the key index for a WEP key. * * Note that in a regular BSS, multicast frames are sent by the * AP only, associated stations unicast the frame to the AP first * which then multicasts it on their behalf. * * There is also a slight problem in IBSS mode: GTKs are negotiated * with each station, that is something we don't currently handle. * The spec seems to expect that one negotiates the same key with * every station but there's no such requirement; VLANs could be * possible. */ /* start without a key */ rx->key = NULL; fc = hdr->frame_control; if (rx->sta) { int keyid = rx->sta->ptk_idx; sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); if (ieee80211_has_protected(fc) && !(status->flag & RX_FLAG_IV_STRIPPED)) { keyid = ieee80211_get_keyid(rx->skb); if (unlikely(keyid < 0)) return RX_DROP_U_NO_KEY_ID;