Total coverage: 254789 (14%)of 1844694
78 31 10 2 6 112 110 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xdp #if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_XDP_H #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/tracepoint.h> #include <linux/bpf.h> #include <net/xdp.h> #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ FN(TX) \ FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); #define __XDP_ACT_SYM_FN(x) \ { XDP_##x, #x }, #define __XDP_ACT_SYM_TAB \ __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, NULL } __XDP_ACT_MAP(__XDP_ACT_TP_FN) TRACE_EVENT(xdp_exception, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, u32 act), TP_ARGS(dev, xdp, act), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), TP_printk("prog_id=%d action=%s ifindex=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); TRACE_EVENT(xdp_bulk_tx, TP_PROTO(const struct net_device *dev, int sent, int drops, int err), TP_ARGS(dev, sent, drops, err), TP_STRUCT__entry( __field(int, ifindex) __field(u32, act) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->ifindex = dev->ifindex; __entry->act = XDP_TX; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", __entry->ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); #ifndef __DEVMAP_OBJ_TYPE #define __DEVMAP_OBJ_TYPE struct _bpf_dtab_netdev { struct net_device *dev; }; #endif /* __DEVMAP_OBJ_TYPE */ DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) __field(int, err) __field(int, to_ifindex) __field(u32, map_id) __field(int, map_index) ), TP_fast_assign( u32 ifindex = 0, map_index = index; if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) { /* Just leave to_ifindex to 0 if do broadcast redirect, * as tgt will be NULL. */ if (tgt) ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex; } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { ifindex = index; map_index = 0; } __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; __entry->to_ifindex = ifindex; __entry->map_id = map_id; __entry->map_index = map_index; ), TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" " map_id=%d map_index=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, __entry->err, __entry->map_id, __entry->map_index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); #define _trace_xdp_redirect(dev, xdp, to) \ trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) #define _trace_xdp_redirect_err(dev, xdp, to, err) \ trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) #define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \ trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index) #define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) /* not used anymore, but kept around so as not to break old programs */ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats), TP_ARGS(map_id, processed, drops, sched, xdp_stats), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, sched) __field(unsigned int, xdp_pass) __field(unsigned int, xdp_drop) __field(unsigned int, xdp_redirect) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->sched = sched; __entry->xdp_pass = xdp_stats->pass; __entry->xdp_drop = xdp_stats->drop; __entry->xdp_redirect = xdp_stats->redirect; ), TP_printk("kthread" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " sched=%d" " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->sched, __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) ); TRACE_EVENT(xdp_cpumap_enqueue, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int to_cpu), TP_ARGS(map_id, processed, drops, to_cpu), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, to_cpu) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->to_cpu = to_cpu; ), TP_printk("enqueue" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " to_cpu=%d", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->to_cpu) ); TRACE_EVENT(xdp_devmap_xmit, TP_PROTO(const struct net_device *from_dev, const struct net_device *to_dev, int sent, int drops, int err), TP_ARGS(from_dev, to_dev, sent, drops, err), TP_STRUCT__entry( __field(int, from_ifindex) __field(u32, act) __field(int, to_ifindex) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->from_ifindex = from_dev->ifindex; __entry->act = XDP_REDIRECT; __entry->to_ifindex = to_dev->ifindex; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ndo_xdp_xmit" " from_ifindex=%d to_ifindex=%d action=%s" " sent=%d drops=%d" " err=%d", __entry->from_ifindex, __entry->to_ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); /* Expect users already include <net/xdp.h>, but not xdp_priv.h */ #include <net/xdp_priv.h> #define __MEM_TYPE_MAP(FN) \ FN(PAGE_SHARED) \ FN(PAGE_ORDER0) \ FN(PAGE_POOL) \ FN(XSK_BUFF_POOL) #define __MEM_TYPE_TP_FN(x) \ TRACE_DEFINE_ENUM(MEM_TYPE_##x); #define __MEM_TYPE_SYM_FN(x) \ { MEM_TYPE_##x, #x }, #define __MEM_TYPE_SYM_TAB \ __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } __MEM_TYPE_MAP(__MEM_TYPE_TP_FN) TRACE_EVENT(mem_disconnect, TP_PROTO(const struct xdp_mem_allocator *xa), TP_ARGS(xa), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; ), TP_printk("mem_id=%d mem_type=%s allocator=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator ) ); TRACE_EVENT(mem_connect, TP_PROTO(const struct xdp_mem_allocator *xa, const struct xdp_rxq_info *rxq), TP_ARGS(xa, rxq), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) __field(const struct xdp_rxq_info *, rxq) __field(int, ifindex) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; __entry->rxq = rxq; __entry->ifindex = rxq->dev->ifindex; ), TP_printk("mem_id=%d mem_type=%s allocator=%p" " ifindex=%d", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator, __entry->ifindex ) ); TRACE_EVENT(mem_return_failed, TP_PROTO(const struct xdp_mem_info *mem, const struct page *page), TP_ARGS(mem, page), TP_STRUCT__entry( __field(const struct page *, page) __field(u32, mem_id) __field(u32, mem_type) ), TP_fast_assign( __entry->page = page; __entry->mem_id = mem->id; __entry->mem_type = mem->type; ), TP_printk("mem_id=%d mem_type=%s page=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->page ) ); TRACE_EVENT(bpf_xdp_link_attach_failed, TP_PROTO(const char *msg), TP_ARGS(msg), TP_STRUCT__entry( __string(msg, msg) ), TP_fast_assign( __assign_str(msg); ), TP_printk("errmsg=%s", __get_str(msg)) ); #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h>
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 #include <linux/if_vlan.h> #include <linux/rhashtable-types.h> #include <net/udp_tunnel.h> #include <net/dst_metadata.h> #include <net/rtnetlink.h> #include <net/switchdev.h> #include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 #define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * I = VXLAN Network Identifier (VNI) present. */ struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; }; /* VXLAN header flags. */ #define VXLAN_HF_VNI cpu_to_be32(BIT(27)) #define VXLAN_N_VID (1u << 24) #define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) #define FDB_HASH_BITS 8 #define FDB_HASH_SIZE (1<<FDB_HASH_BITS) /* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R|R|R|C| Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) |O| Csum start | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * C = Remote checksum offload bit. When set indicates that the * remote checksum offload data is present. * * O = Offset bit. Indicates the checksum offset relative to * checksum start. * * Csum start = Checksum start divided by two. * * http://tools.ietf.org/html/draft-herbert-vxlan-rco */ /* VXLAN-RCO header flags. */ #define VXLAN_HF_RCO cpu_to_be32(BIT(21)) /* Remote checksum offload header option */ #define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */ #define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */ #define VXLAN_RCO_SHIFT 1 /* Left shift of start */ #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) #define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT) /* * VXLAN Group Based Policy Extension (VXLAN_F_GBP): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * G = Group Policy ID present. * * D = Don't Learn bit. When set, this bit indicates that the egress * VTEP MUST NOT learn the source address of the encapsulated frame. * * A = Indicates that the group policy has already been applied to * this packet. Policies MUST NOT be applied by devices when the * A bit is set. * * https://tools.ietf.org/html/draft-smith-vxlan-group-policy */ struct vxlanhdr_gbp { u8 vx_flags; #ifdef __LITTLE_ENDIAN_BITFIELD u8 reserved_flags1:3, policy_applied:1, reserved_flags2:2, dont_learn:1, reserved_flags3:1; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags1:1, dont_learn:1, reserved_flags2:2, policy_applied:1, reserved_flags3:3; #else #error "Please fix <asm/byteorder.h>" #endif __be16 policy_id; __be32 vx_vni; }; /* VXLAN-GBP header flags. */ #define VXLAN_HF_GBP cpu_to_be32(BIT(31)) #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF)) /* skb->mark mapping * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) #define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \ VXLAN_GBP_ID_MASK) /* * VXLAN Generic Protocol Extension (VXLAN_F_GPE): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Ver = Version. Indicates VXLAN GPE protocol version. * * P = Next Protocol Bit. The P bit is set to indicate that the * Next Protocol field is present. * * O = OAM Flag Bit. The O bit is set to indicate that the packet * is an OAM packet. * * Next Protocol = This 8 bit field indicates the protocol header * immediately following the VXLAN GPE header. * * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 */ struct vxlanhdr_gpe { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 oam_flag:1, reserved_flags1:1, np_applied:1, instance_applied:1, version:2, reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, instance_applied:1, np_applied:1, reserved_flags1:1, oam_flag:1; #endif u8 reserved_flags3; u8 reserved_flags4; u8 next_protocol; __be32 vx_vni; }; /* VXLAN-GPE header flags. */ #define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) #define VXLAN_HF_NP cpu_to_be32(BIT(26)) #define VXLAN_HF_OAM cpu_to_be32(BIT(24)) #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ cpu_to_be32(0xff)) struct vxlan_metadata { u32 gbp; }; /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; refcount_t refcnt; u32 flags; }; union vxlan_addr { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; struct net_device *remote_dev; struct list_head list; struct rcu_head rcu; struct dst_cache dst_cache; }; struct vxlan_config { union vxlan_addr remote_ip; union vxlan_addr saddr; __be32 vni; int remote_ifindex; int mtu; __be16 dst_port; u16 port_min; u16 port_max; u8 tos; u8 ttl; __be32 label; enum ifla_vxlan_label_policy label_policy; u32 flags; unsigned long age_interval; unsigned int addrmax; bool no_share; enum ifla_vxlan_df df; struct vxlanhdr reserved_bits; }; enum { VXLAN_VNI_STATS_RX, VXLAN_VNI_STATS_RX_DROPS, VXLAN_VNI_STATS_RX_ERRORS, VXLAN_VNI_STATS_TX, VXLAN_VNI_STATS_TX_DROPS, VXLAN_VNI_STATS_TX_ERRORS, }; struct vxlan_vni_stats { u64 rx_packets; u64 rx_bytes; u64 rx_drops; u64 rx_errors; u64 tx_packets; u64 tx_bytes; u64 tx_drops; u64 tx_errors; }; struct vxlan_vni_stats_pcpu { struct vxlan_vni_stats stats; struct u64_stats_sync syncp; }; struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; }; struct vxlan_vni_node { struct rhash_head vnode; struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct list_head vlist; __be32 vni; union vxlan_addr remote_ip; /* default remote ip for this vni */ struct vxlan_vni_stats_pcpu __percpu *stats; struct rcu_head rcu; }; struct vxlan_vni_group { struct rhashtable vni_hash; struct list_head vni_list; u32 num_vnis; }; /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; spinlock_t hash_lock[FDB_HASH_SIZE]; unsigned int addrcnt; struct gro_cells gro_cells; struct vxlan_config cfg; struct vxlan_vni_group __rcu *vnigrp; struct hlist_head fdb_head[FDB_HASH_SIZE]; struct rhashtable mdb_tbl; struct hlist_head mdb_list; unsigned int mdb_seq; }; #define VXLAN_F_LEARN 0x01 #define VXLAN_F_PROXY 0x02 #define VXLAN_F_RSC 0x04 #define VXLAN_F_L2MISS 0x08 #define VXLAN_F_L3MISS 0x10 #define VXLAN_F_IPV6 0x20 #define VXLAN_F_UDP_ZERO_CSUM_TX 0x40 #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 #define VXLAN_F_LOCALBYPASS 0x80000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ VXLAN_F_IPV6 | \ VXLAN_F_IPV6_LINKLOCAL | \ VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER | \ VXLAN_F_LOCALBYPASS) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) { u8 l4_hdr = 0; if (!skb->encapsulation) return features; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): l4_hdr = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: return features; } if ((l4_hdr == IPPROTO_UDP) && (skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) || (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto)))) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } static inline int vxlan_headroom(u32 flags) { /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) + (flags & VXLAN_F_GPE ? 0 : ETH_HLEN); } static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) { return (struct vxlanhdr *)(udp_hdr(skb) + 1); } static inline __be32 vxlan_vni(__be32 vni_field) { #if defined(__BIG_ENDIAN) return (__force __be32)((__force u32)vni_field >> 8); #else return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); #endif } static inline __be32 vxlan_vni_field(__be32 vni) { #if defined(__BIG_ENDIAN) return (__force __be32)((__force u32)vni << 8); #else return (__force __be32)((__force u32)vni >> 8); #endif } static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; } static inline size_t vxlan_rco_offset(__be32 vni_field) { return (vni_field & VXLAN_RCO_UDP) ? offsetof(struct udphdr, check) : offsetof(struct tcphdr, check); } static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) { __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT); if (offset == offsetof(struct udphdr, check)) vni_field |= VXLAN_RCO_UDP; return vni_field; } static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { return vs->sock->sk->sk_family; } #if IS_ENABLED(CONFIG_IPV6) static inline bool vxlan_addr_any(const union vxlan_addr *ipa) { if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_any(&ipa->sin6.sin6_addr); else return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); } static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); else return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #else /* !IS_ENABLED(CONFIG_IPV6) */ static inline bool vxlan_addr_any(const union vxlan_addr *ipa) { return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); } static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #endif /* IS_ENABLED(CONFIG_IPV6) */ static inline bool netif_is_vxlan(const struct net_device *dev) { return dev->rtnl_link_ops && !strcmp(dev->rtnl_link_ops->kind, "vxlan"); } struct switchdev_notifier_vxlan_fdb_info { struct switchdev_notifier_info info; /* must be first */ union vxlan_addr remote_ip; __be16 remote_port; __be32 remote_vni; u32 remote_ifindex; u8 eth_addr[ETH_ALEN]; __be32 vni; bool offloaded; bool added_by_user; }; #if IS_ENABLED(CONFIG_VXLAN) int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info); int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct notifier_block *nb, struct netlink_ext_ack *extack); void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); #else static inline int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info) { return -ENOENT; } static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct notifier_block *nb, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) { } #endif static inline void vxlan_flag_attr_error(int attrtype, struct netlink_ext_ack *extack) { #define VXLAN_FLAG(flg) \ case IFLA_VXLAN_##flg: \ NL_SET_ERR_MSG_MOD(extack, \ "cannot change " #flg " flag"); \ break switch (attrtype) { VXLAN_FLAG(TTL_INHERIT); VXLAN_FLAG(LEARNING); VXLAN_FLAG(PROXY); VXLAN_FLAG(RSC); VXLAN_FLAG(L2MISS); VXLAN_FLAG(L3MISS); VXLAN_FLAG(COLLECT_METADATA); VXLAN_FLAG(UDP_ZERO_CSUM6_TX); VXLAN_FLAG(UDP_ZERO_CSUM6_RX); VXLAN_FLAG(REMCSUM_TX); VXLAN_FLAG(REMCSUM_RX); VXLAN_FLAG(GBP); VXLAN_FLAG(GPE); VXLAN_FLAG(REMCSUM_NOPARTIAL); default: NL_SET_ERR_MSG_MOD(extack, \ "cannot change flag"); break; } #undef VXLAN_FLAG } static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, u32 hash, struct vxlan_rdst *rdst) { struct fib_nh_common *nhc; nhc = nexthop_path_fdb_result(nh, hash >> 1); if (unlikely(!nhc)) return false; switch (nhc->nhc_gw_family) { case AF_INET: rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4; rdst->remote_ip.sa.sa_family = AF_INET; break; case AF_INET6: rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6; rdst->remote_ip.sa.sa_family = AF_INET6; break; } return true; } static inline void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, const struct vxlan_metadata *md) { struct vxlanhdr_gbp *gbp; if (!md->gbp) return; gbp = (struct vxlanhdr_gbp *)vxh; vxh->vx_flags |= VXLAN_HF_GBP; if (md->gbp & VXLAN_GBP_DONT_LEARN) gbp->dont_learn = 1; if (md->gbp & VXLAN_GBP_POLICY_APPLIED) gbp->policy_applied = 1; gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } #endif
34 34 34 34 1 34 12 12 12 12 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/kconfig.h> #include <linux/list.h> #include <linux/security.h> #include <linux/umh.h> #include <linux/sysctl.h> #include <linux/module.h> #include "fallback.h" #include "firmware.h" /* * firmware fallback mechanism */ /* * use small loading timeout for caching devices' firmware because all these * firmware images have been loaded successfully at lease once, also system is * ready for completing firmware loading now. The maximum size of firmware in * current distributions is about 2M bytes, so 10 secs should be enough. */ void fw_fallback_set_cache_timeout(void) { fw_fallback_config.old_timeout = __firmware_loading_timeout(); __fw_fallback_set_timeout(10); } /* Restores the timeout to the value last configured during normal operation */ void fw_fallback_set_default_timeout(void) { __fw_fallback_set_timeout(fw_fallback_config.old_timeout); } static long firmware_loading_timeout(void) { return __firmware_loading_timeout() > 0 ? __firmware_loading_timeout() * HZ : MAX_JIFFY_OFFSET; } static inline int fw_sysfs_wait_timeout(struct fw_priv *fw_priv, long timeout) { return __fw_state_wait_common(fw_priv, timeout); } static LIST_HEAD(pending_fw_head); void kill_pending_fw_fallback_reqs(bool kill_all) { struct fw_priv *fw_priv; struct fw_priv *next; mutex_lock(&fw_lock); list_for_each_entry_safe(fw_priv, next, &pending_fw_head, pending_list) { if (kill_all || !fw_priv->need_uevent) __fw_load_abort(fw_priv); } if (kill_all) fw_load_abort_all = true; mutex_unlock(&fw_lock); } /** * fw_load_sysfs_fallback() - load a firmware via the sysfs fallback mechanism * @fw_sysfs: firmware sysfs information for the firmware to load * @timeout: timeout to wait for the load * * In charge of constructing a sysfs fallback interface for firmware loading. **/ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) { int retval = 0; struct device *f_dev = &fw_sysfs->dev; struct fw_priv *fw_priv = fw_sysfs->fw_priv; /* fall back on userspace loading */ if (!fw_priv->data) fw_priv->is_paged_buf = true; dev_set_uevent_suppress(f_dev, true); retval = device_add(f_dev); if (retval) { dev_err(f_dev, "%s: device_register failed\n", __func__); goto err_put_dev; } mutex_lock(&fw_lock); if (fw_load_abort_all || fw_state_is_aborted(fw_priv)) { mutex_unlock(&fw_lock); retval = -EINTR; goto out; } list_add(&fw_priv->pending_list, &pending_fw_head); mutex_unlock(&fw_lock); if (fw_priv->opt_flags & FW_OPT_UEVENT) { fw_priv->need_uevent = true; dev_set_uevent_suppress(f_dev, false); dev_dbg(f_dev, "firmware: requesting %s\n", fw_priv->fw_name); kobject_uevent(&fw_sysfs->dev.kobj, KOBJ_ADD); } else { timeout = MAX_JIFFY_OFFSET; } retval = fw_sysfs_wait_timeout(fw_priv, timeout); if (retval < 0 && retval != -ENOENT) { mutex_lock(&fw_lock); fw_load_abort(fw_sysfs); mutex_unlock(&fw_lock); } if (fw_state_is_aborted(fw_priv)) { if (retval == -ERESTARTSYS) retval = -EINTR; } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; out: device_del(f_dev); err_put_dev: put_device(f_dev); return retval; } static int fw_load_from_user_helper(struct firmware *firmware, const char *name, struct device *device, u32 opt_flags) { struct fw_sysfs *fw_sysfs; long timeout; int ret; timeout = firmware_loading_timeout(); if (opt_flags & FW_OPT_NOWAIT) { timeout = usermodehelper_read_lock_wait(timeout); if (!timeout) { dev_dbg(device, "firmware: %s loading timed out\n", name); return -EBUSY; } } else { ret = usermodehelper_read_trylock(); if (WARN_ON(ret)) { dev_err(device, "firmware: %s will not be loaded\n", name); return ret; } } fw_sysfs = fw_create_instance(firmware, name, device, opt_flags); if (IS_ERR(fw_sysfs)) { ret = PTR_ERR(fw_sysfs); goto out_unlock; } fw_sysfs->fw_priv = firmware->priv; ret = fw_load_sysfs_fallback(fw_sysfs, timeout); if (!ret) ret = assign_fw(firmware, device); out_unlock: usermodehelper_read_unlock(); return ret; } static bool fw_force_sysfs_fallback(u32 opt_flags) { if (fw_fallback_config.force_sysfs_fallback) return true; if (!(opt_flags & FW_OPT_USERHELPER)) return false; return true; } static bool fw_run_sysfs_fallback(u32 opt_flags) { int ret; if (fw_fallback_config.ignore_sysfs_fallback) { pr_info_once("Ignoring firmware sysfs fallback due to sysctl knob\n"); return false; } if ((opt_flags & FW_OPT_NOFALLBACK_SYSFS)) return false; /* Also permit LSMs and IMA to fail firmware sysfs fallback */ ret = security_kernel_load_data(LOADING_FIRMWARE, true); if (ret < 0) return false; return fw_force_sysfs_fallback(opt_flags); } /** * firmware_fallback_sysfs() - use the fallback mechanism to find firmware * @fw: pointer to firmware image * @name: name of firmware file to look for * @device: device for which firmware is being loaded * @opt_flags: options to control firmware loading behaviour, as defined by * &enum fw_opt * @ret: return value from direct lookup which triggered the fallback mechanism * * This function is called if direct lookup for the firmware failed, it enables * a fallback mechanism through userspace by exposing a sysfs loading * interface. Userspace is in charge of loading the firmware through the sysfs * loading interface. This sysfs fallback mechanism may be disabled completely * on a system by setting the proc sysctl value ignore_sysfs_fallback to true. * If this is false we check if the internal API caller set the * @FW_OPT_NOFALLBACK_SYSFS flag, if so it would also disable the fallback * mechanism. A system may want to enforce the sysfs fallback mechanism at all * times, it can do this by setting ignore_sysfs_fallback to false and * force_sysfs_fallback to true. * Enabling force_sysfs_fallback is functionally equivalent to build a kernel * with CONFIG_FW_LOADER_USER_HELPER_FALLBACK. **/ int firmware_fallback_sysfs(struct firmware *fw, const char *name, struct device *device, u32 opt_flags, int ret) { if (!fw_run_sysfs_fallback(opt_flags)) return ret; if (!(opt_flags & FW_OPT_NO_WARN)) dev_warn(device, "Falling back to sysfs fallback for: %s\n", name); else dev_dbg(device, "Falling back to sysfs fallback for: %s\n", name); return fw_load_from_user_helper(fw, name, device, opt_flags); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2010 Daniel Mack <daniel@caiaq.de> * * This file holds USB constants and structures defined * by the USB Device Class Definition for Audio Devices in version 2.0. * Comments below reference relevant sections of the documents contained * in http://www.usb.org/developers/devclass_docs/Audio2.0_final.zip */ #ifndef __LINUX_USB_AUDIO_V2_H #define __LINUX_USB_AUDIO_V2_H #include <linux/types.h> /* v1.0 and v2.0 of this standard have many things in common. For the rest * of the definitions, please refer to audio.h */ /* * bmControl field decoders * * From the USB Audio spec v2.0: * * bmaControls() is a (ch+1)-element array of 4-byte bitmaps, * each containing a set of bit pairs. If a Control is present, * it must be Host readable. If a certain Control is not * present then the bit pair must be set to 0b00. * If a Control is present but read-only, the bit pair must be * set to 0b01. If a Control is also Host programmable, the bit * pair must be set to 0b11. The value 0b10 is not allowed. * */ static inline bool uac_v2v3_control_is_readable(u32 bmControls, u8 control) { return (bmControls >> ((control - 1) * 2)) & 0x1; } static inline bool uac_v2v3_control_is_writeable(u32 bmControls, u8 control) { return (bmControls >> ((control - 1) * 2)) & 0x2; } /* 4.7.2 Class-Specific AC Interface Descriptor */ struct uac2_ac_header_descriptor { __u8 bLength; /* 9 */ __u8 bDescriptorType; /* USB_DT_CS_INTERFACE */ __u8 bDescriptorSubtype; /* UAC_MS_HEADER */ __le16 bcdADC; /* 0x0200 */ __u8 bCategory; __le16 wTotalLength; /* includes Unit and Terminal desc. */ __u8 bmControls; } __packed; /* 2.3.1.6 Type I Format Type Descriptor (Frmts20 final.pdf)*/ struct uac2_format_type_i_descriptor { __u8 bLength; /* in bytes: 6 */ __u8 bDescriptorType; /* USB_DT_CS_INTERFACE */ __u8 bDescriptorSubtype; /* FORMAT_TYPE */ __u8 bFormatType; /* FORMAT_TYPE_1 */ __u8 bSubslotSize; /* {1,2,3,4} */ __u8 bBitResolution; } __packed; /* 4.7.2.1 Clock Source Descriptor */ struct uac_clock_source_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bClockID; __u8 bmAttributes; __u8 bmControls; __u8 bAssocTerminal; __u8 iClockSource; } __attribute__((packed)); /* bmAttribute fields */ #define UAC_CLOCK_SOURCE_TYPE_EXT 0x0 #define UAC_CLOCK_SOURCE_TYPE_INT_FIXED 0x1 #define UAC_CLOCK_SOURCE_TYPE_INT_VAR 0x2 #define UAC_CLOCK_SOURCE_TYPE_INT_PROG 0x3 #define UAC_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 2) /* 4.7.2.2 Clock Selector Descriptor */ struct uac_clock_selector_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bClockID; __u8 bNrInPins; __u8 baCSourceID[]; /* bmControls and iClockSelector omitted */ } __attribute__((packed)); /* 4.7.2.3 Clock Multiplier Descriptor */ struct uac_clock_multiplier_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bClockID; __u8 bCSourceID; __u8 bmControls; __u8 iClockMultiplier; } __attribute__((packed)); /* 4.7.2.4 Input terminal descriptor */ struct uac2_input_terminal_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; __le16 wTerminalType; __u8 bAssocTerminal; __u8 bCSourceID; __u8 bNrChannels; __le32 bmChannelConfig; __u8 iChannelNames; __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); /* 4.7.2.5 Output terminal descriptor */ struct uac2_output_terminal_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; __le16 wTerminalType; __u8 bAssocTerminal; __u8 bSourceID; __u8 bCSourceID; __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); /* 4.7.2.8 Feature Unit Descriptor */ struct uac2_feature_unit_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bUnitID; __u8 bSourceID; /* bmaControls is actually u32, * but u8 is needed for the hybrid parser */ __u8 bmaControls[]; /* variable length */ } __attribute__((packed)); #define UAC2_DT_FEATURE_UNIT_SIZE(ch) (6 + ((ch) + 1) * 4) /* As above, but more useful for defining your own descriptors: */ #define DECLARE_UAC2_FEATURE_UNIT_DESCRIPTOR(ch) \ struct uac2_feature_unit_descriptor_##ch { \ __u8 bLength; \ __u8 bDescriptorType; \ __u8 bDescriptorSubtype; \ __u8 bUnitID; \ __u8 bSourceID; \ __le32 bmaControls[ch + 1]; \ __u8 iFeature; \ } __packed /* 4.7.2.10 Effect Unit Descriptor */ struct uac2_effect_unit_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bUnitID; __le16 wEffectType; __u8 bSourceID; __u8 bmaControls[]; /* variable length */ } __attribute__((packed)); /* 4.9.2 Class-Specific AS Interface Descriptor */ struct uac2_as_header_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalLink; __u8 bmControls; __u8 bFormatType; __le32 bmFormats; __u8 bNrChannels; __le32 bmChannelConfig; __u8 iChannelNames; } __attribute__((packed)); #define UAC2_FORMAT_TYPE_I_RAW_DATA (1 << 31) /* 4.10.1.2 Class-Specific AS Isochronous Audio Data Endpoint Descriptor */ struct uac2_iso_endpoint_descriptor { __u8 bLength; /* in bytes: 8 */ __u8 bDescriptorType; /* USB_DT_CS_ENDPOINT */ __u8 bDescriptorSubtype; /* EP_GENERAL */ __u8 bmAttributes; __u8 bmControls; __u8 bLockDelayUnits; __le16 wLockDelay; } __attribute__((packed)); #define UAC2_CONTROL_PITCH (3 << 0) #define UAC2_CONTROL_DATA_OVERRUN (3 << 2) #define UAC2_CONTROL_DATA_UNDERRUN (3 << 4) /* 5.2.5.4.2 Connector Control Parameter Block */ struct uac2_connectors_ctl_blk { __u8 bNrChannels; __le32 bmChannelConfig; __u8 iChannelNames; } __attribute__((packed)); /* 6.1 Interrupt Data Message */ #define UAC2_INTERRUPT_DATA_MSG_VENDOR (1 << 0) #define UAC2_INTERRUPT_DATA_MSG_EP (1 << 1) struct uac2_interrupt_data_msg { __u8 bInfo; __u8 bAttribute; __le16 wValue; __le16 wIndex; } __attribute__((packed)); /* A.7 Audio Function Category Codes */ #define UAC2_FUNCTION_SUBCLASS_UNDEFINED 0x00 #define UAC2_FUNCTION_DESKTOP_SPEAKER 0x01 #define UAC2_FUNCTION_HOME_THEATER 0x02 #define UAC2_FUNCTION_MICROPHONE 0x03 #define UAC2_FUNCTION_HEADSET 0x04 #define UAC2_FUNCTION_TELEPHONE 0x05 #define UAC2_FUNCTION_CONVERTER 0x06 #define UAC2_FUNCTION_SOUND_RECORDER 0x07 #define UAC2_FUNCTION_IO_BOX 0x08 #define UAC2_FUNCTION_MUSICAL_INSTRUMENT 0x09 #define UAC2_FUNCTION_PRO_AUDIO 0x0a #define UAC2_FUNCTION_AUDIO_VIDEO 0x0b #define UAC2_FUNCTION_CONTROL_PANEL 0x0c #define UAC2_FUNCTION_OTHER 0xff /* A.9 Audio Class-Specific AC Interface Descriptor Subtypes */ /* see audio.h for the rest, which is identical to v1 */ #define UAC2_EFFECT_UNIT 0x07 #define UAC2_PROCESSING_UNIT_V2 0x08 #define UAC2_EXTENSION_UNIT_V2 0x09 #define UAC2_CLOCK_SOURCE 0x0a #define UAC2_CLOCK_SELECTOR 0x0b #define UAC2_CLOCK_MULTIPLIER 0x0c #define UAC2_SAMPLE_RATE_CONVERTER 0x0d /* A.10 Audio Class-Specific AS Interface Descriptor Subtypes */ /* see audio.h for the rest, which is identical to v1 */ #define UAC2_ENCODER 0x03 #define UAC2_DECODER 0x04 /* A.11 Effect Unit Effect Types */ #define UAC2_EFFECT_UNDEFINED 0x00 #define UAC2_EFFECT_PARAM_EQ 0x01 #define UAC2_EFFECT_REVERB 0x02 #define UAC2_EFFECT_MOD_DELAY 0x03 #define UAC2_EFFECT_DYN_RANGE_COMP 0x04 /* A.12 Processing Unit Process Types */ #define UAC2_PROCESS_UNDEFINED 0x00 #define UAC2_PROCESS_UP_DOWNMIX 0x01 #define UAC2_PROCESS_DOLBY_PROLOCIC 0x02 #define UAC2_PROCESS_STEREO_EXTENDER 0x03 /* A.14 Audio Class-Specific Request Codes */ #define UAC2_CS_CUR 0x01 #define UAC2_CS_RANGE 0x02 #define UAC2_CS_MEM 0x03 /* A.15 Encoder Type Codes */ #define UAC2_ENCODER_UNDEFINED 0x00 #define UAC2_ENCODER_OTHER 0x01 #define UAC2_ENCODER_MPEG 0x02 #define UAC2_ENCODER_AC3 0x03 #define UAC2_ENCODER_WMA 0x04 #define UAC2_ENCODER_DTS 0x05 /* A.16 Decoder Type Codes */ #define UAC2_DECODER_UNDEFINED 0x00 #define UAC2_DECODER_OTHER 0x01 #define UAC2_DECODER_MPEG 0x02 #define UAC2_DECODER_AC3 0x03 #define UAC2_DECODER_WMA 0x04 #define UAC2_DECODER_DTS 0x05 /* A.17.1 Clock Source Control Selectors */ #define UAC2_CS_UNDEFINED 0x00 #define UAC2_CS_CONTROL_SAM_FREQ 0x01 #define UAC2_CS_CONTROL_CLOCK_VALID 0x02 /* A.17.2 Clock Selector Control Selectors */ #define UAC2_CX_UNDEFINED 0x00 #define UAC2_CX_CLOCK_SELECTOR 0x01 /* A.17.3 Clock Multiplier Control Selectors */ #define UAC2_CM_UNDEFINED 0x00 #define UAC2_CM_NUMERATOR 0x01 #define UAC2_CM_DENOMINTATOR 0x02 /* A.17.4 Terminal Control Selectors */ #define UAC2_TE_UNDEFINED 0x00 #define UAC2_TE_COPY_PROTECT 0x01 #define UAC2_TE_CONNECTOR 0x02 #define UAC2_TE_OVERLOAD 0x03 #define UAC2_TE_CLUSTER 0x04 #define UAC2_TE_UNDERFLOW 0x05 #define UAC2_TE_OVERFLOW 0x06 #define UAC2_TE_LATENCY 0x07 /* A.17.5 Mixer Control Selectors */ #define UAC2_MU_UNDEFINED 0x00 #define UAC2_MU_MIXER 0x01 #define UAC2_MU_CLUSTER 0x02 #define UAC2_MU_UNDERFLOW 0x03 #define UAC2_MU_OVERFLOW 0x04 #define UAC2_MU_LATENCY 0x05 /* A.17.6 Selector Control Selectors */ #define UAC2_SU_UNDEFINED 0x00 #define UAC2_SU_SELECTOR 0x01 #define UAC2_SU_LATENCY 0x02 /* A.17.7 Feature Unit Control Selectors */ /* see audio.h for the rest, which is identical to v1 */ #define UAC2_FU_INPUT_GAIN 0x0b #define UAC2_FU_INPUT_GAIN_PAD 0x0c #define UAC2_FU_PHASE_INVERTER 0x0d #define UAC2_FU_UNDERFLOW 0x0e #define UAC2_FU_OVERFLOW 0x0f #define UAC2_FU_LATENCY 0x10 /* A.17.8.1 Parametric Equalizer Section Effect Unit Control Selectors */ #define UAC2_PE_UNDEFINED 0x00 #define UAC2_PE_ENABLE 0x01 #define UAC2_PE_CENTERFREQ 0x02 #define UAC2_PE_QFACTOR 0x03 #define UAC2_PE_GAIN 0x04 #define UAC2_PE_UNDERFLOW 0x05 #define UAC2_PE_OVERFLOW 0x06 #define UAC2_PE_LATENCY 0x07 /* A.17.8.2 Reverberation Effect Unit Control Selectors */ #define UAC2_RV_UNDEFINED 0x00 #define UAC2_RV_ENABLE 0x01 #define UAC2_RV_TYPE 0x02 #define UAC2_RV_LEVEL 0x03 #define UAC2_RV_TIME 0x04 #define UAC2_RV_FEEDBACK 0x05 #define UAC2_RV_PREDELAY 0x06 #define UAC2_RV_DENSITY 0x07 #define UAC2_RV_HIFREQ_ROLLOFF 0x08 #define UAC2_RV_UNDERFLOW 0x09 #define UAC2_RV_OVERFLOW 0x0a #define UAC2_RV_LATENCY 0x0b /* A.17.8.3 Modulation Delay Effect Control Selectors */ #define UAC2_MD_UNDEFINED 0x00 #define UAC2_MD_ENABLE 0x01 #define UAC2_MD_BALANCE 0x02 #define UAC2_MD_RATE 0x03 #define UAC2_MD_DEPTH 0x04 #define UAC2_MD_TIME 0x05 #define UAC2_MD_FEEDBACK 0x06 #define UAC2_MD_UNDERFLOW 0x07 #define UAC2_MD_OVERFLOW 0x08 #define UAC2_MD_LATENCY 0x09 /* A.17.8.4 Dynamic Range Compressor Effect Unit Control Selectors */ #define UAC2_DR_UNDEFINED 0x00 #define UAC2_DR_ENABLE 0x01 #define UAC2_DR_COMPRESSION_RATE 0x02 #define UAC2_DR_MAXAMPL 0x03 #define UAC2_DR_THRESHOLD 0x04 #define UAC2_DR_ATTACK_TIME 0x05 #define UAC2_DR_RELEASE_TIME 0x06 #define UAC2_DR_UNDEFLOW 0x07 #define UAC2_DR_OVERFLOW 0x08 #define UAC2_DR_LATENCY 0x09 /* A.17.9.1 Up/Down-mix Processing Unit Control Selectors */ #define UAC2_UD_UNDEFINED 0x00 #define UAC2_UD_ENABLE 0x01 #define UAC2_UD_MODE_SELECT 0x02 #define UAC2_UD_CLUSTER 0x03 #define UAC2_UD_UNDERFLOW 0x04 #define UAC2_UD_OVERFLOW 0x05 #define UAC2_UD_LATENCY 0x06 /* A.17.9.2 Dolby Prologic[tm] Processing Unit Control Selectors */ #define UAC2_DP_UNDEFINED 0x00 #define UAC2_DP_ENABLE 0x01 #define UAC2_DP_MODE_SELECT 0x02 #define UAC2_DP_CLUSTER 0x03 #define UAC2_DP_UNDERFFLOW 0x04 #define UAC2_DP_OVERFLOW 0x05 #define UAC2_DP_LATENCY 0x06 /* A.17.9.3 Stereo Expander Processing Unit Control Selectors */ #define UAC2_ST_EXT_UNDEFINED 0x00 #define UAC2_ST_EXT_ENABLE 0x01 #define UAC2_ST_EXT_WIDTH 0x02 #define UAC2_ST_EXT_UNDEFLOW 0x03 #define UAC2_ST_EXT_OVERFLOW 0x04 #define UAC2_ST_EXT_LATENCY 0x05 /* A.17.10 Extension Unit Control Selectors */ #define UAC2_XU_UNDEFINED 0x00 #define UAC2_XU_ENABLE 0x01 #define UAC2_XU_CLUSTER 0x02 #define UAC2_XU_UNDERFLOW 0x03 #define UAC2_XU_OVERFLOW 0x04 #define UAC2_XU_LATENCY 0x05 /* A.17.11 AudioStreaming Interface Control Selectors */ #define UAC2_AS_UNDEFINED 0x00 #define UAC2_AS_ACT_ALT_SETTING 0x01 #define UAC2_AS_VAL_ALT_SETTINGS 0x02 #define UAC2_AS_AUDIO_DATA_FORMAT 0x03 /* A.17.12 Encoder Control Selectors */ #define UAC2_EN_UNDEFINED 0x00 #define UAC2_EN_BIT_RATE 0x01 #define UAC2_EN_QUALITY 0x02 #define UAC2_EN_VBR 0x03 #define UAC2_EN_TYPE 0x04 #define UAC2_EN_UNDERFLOW 0x05 #define UAC2_EN_OVERFLOW 0x06 #define UAC2_EN_ENCODER_ERROR 0x07 #define UAC2_EN_PARAM1 0x08 #define UAC2_EN_PARAM2 0x09 #define UAC2_EN_PARAM3 0x0a #define UAC2_EN_PARAM4 0x0b #define UAC2_EN_PARAM5 0x0c #define UAC2_EN_PARAM6 0x0d #define UAC2_EN_PARAM7 0x0e #define UAC2_EN_PARAM8 0x0f /* A.17.13.1 MPEG Decoder Control Selectors */ #define UAC2_MPEG_UNDEFINED 0x00 #define UAC2_MPEG_DUAL_CHANNEL 0x01 #define UAC2_MPEG_SECOND_STEREO 0x02 #define UAC2_MPEG_MULTILINGUAL 0x03 #define UAC2_MPEG_DYN_RANGE 0x04 #define UAC2_MPEG_SCALING 0x05 #define UAC2_MPEG_HILO_SCALING 0x06 #define UAC2_MPEG_UNDERFLOW 0x07 #define UAC2_MPEG_OVERFLOW 0x08 #define UAC2_MPEG_DECODER_ERROR 0x09 /* A17.13.2 AC3 Decoder Control Selectors */ #define UAC2_AC3_UNDEFINED 0x00 #define UAC2_AC3_MODE 0x01 #define UAC2_AC3_DYN_RANGE 0x02 #define UAC2_AC3_SCALING 0x03 #define UAC2_AC3_HILO_SCALING 0x04 #define UAC2_AC3_UNDERFLOW 0x05 #define UAC2_AC3_OVERFLOW 0x06 #define UAC2_AC3_DECODER_ERROR 0x07 /* A17.13.3 WMA Decoder Control Selectors */ #define UAC2_WMA_UNDEFINED 0x00 #define UAC2_WMA_UNDERFLOW 0x01 #define UAC2_WMA_OVERFLOW 0x02 #define UAC2_WMA_DECODER_ERROR 0x03 /* A17.13.4 DTS Decoder Control Selectors */ #define UAC2_DTS_UNDEFINED 0x00 #define UAC2_DTS_UNDERFLOW 0x01 #define UAC2_DTS_OVERFLOW 0x02 #define UAC2_DTS_DECODER_ERROR 0x03 /* A17.14 Endpoint Control Selectors */ #define UAC2_EP_CS_UNDEFINED 0x00 #define UAC2_EP_CS_PITCH 0x01 #define UAC2_EP_CS_DATA_OVERRUN 0x02 #define UAC2_EP_CS_DATA_UNDERRUN 0x03 #endif /* __LINUX_USB_AUDIO_V2_H */
374 16 373 4 370 242 131 4 5 369 127 1355 4 28 27 3 4 163 230 367 374 130 246 4 5 368 369 130 364 363 4 1 3 163 42 163 41 228 372 372 127 247 4 370 92 91 92 373 4 369 127 247 365 92 244 128 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/core/gen_stats.c * * Authors: Thomas Graf <tgraf@suug.ch> * Jamal Hadi Salim * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * See Documentation/networking/gen_stats.rst */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/socket.h> #include <linux/rtnetlink.h> #include <linux/gen_stats.h> #include <net/netlink.h> #include <net/gen_stats.h> #include <net/sch_generic.h> static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) { if (nla_put_64bit(d->skb, type, size, buf, padattr)) goto nla_put_failure; return 0; nla_put_failure: if (d->lock) spin_unlock_bh(d->lock); kfree(d->xstats); d->xstats = NULL; d->xstats_len = 0; return -1; } /** * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode * @skb: socket buffer to put statistics TLVs into * @type: TLV type for top level statistic TLV * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV * @xstats_type: TLV type for backward compatibility xstats TLV * @lock: statistics lock * @d: dumping handle * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all * other statistic TLVS. * * The dumping handle is marked to be in backward compatibility mode telling * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats. * * Returns 0 on success or -1 if the room in the socket buffer was not sufficient. */ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, int xstats_type, spinlock_t *lock, struct gnet_dump *d, int padattr) __acquires(lock) { memset(d, 0, sizeof(*d)); if (type) d->tail = (struct nlattr *)skb_tail_pointer(skb); d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; d->padattr = padattr; if (lock) { d->lock = lock; spin_lock_bh(lock); } if (d->tail) { int ret = gnet_stats_copy(d, type, NULL, 0, padattr); /* The initial attribute added in gnet_stats_copy() may be * preceded by a padding attribute, in which case d->tail will * end up pointing at the padding instead of the real attribute. * Fix this so gnet_stats_finish_copy() adjusts the length of * the right attribute. */ if (ret == 0 && d->tail->nla_type == padattr) d->tail = (struct nlattr *)((char *)d->tail + NLA_ALIGN(d->tail->nla_len)); return ret; } return 0; } EXPORT_SYMBOL(gnet_stats_start_copy_compat); /** * gnet_stats_start_copy - start dumping procedure in compatibility mode * @skb: socket buffer to put statistics TLVs into * @type: TLV type for top level statistic TLV * @lock: statistics lock * @d: dumping handle * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all * other statistic TLVS. * * Returns 0 on success or -1 if the room in the socket buffer was not sufficient. */ int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr) { return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr); } EXPORT_SYMBOL(gnet_stats_start_copy); /* Must not be inlined, due to u64_stats seqcount_t lockdep key */ void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) { u64_stats_set(&b->bytes, 0); u64_stats_set(&b->packets, 0); u64_stats_init(&b->syncp); } EXPORT_SYMBOL(gnet_stats_basic_sync_init); static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu) { u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); unsigned int start; u64 bytes, packets; do { start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; } _bstats_update(bstats, t_bytes, t_packets); } void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running) { unsigned int start; u64 bytes = 0; u64 packets = 0; WARN_ON_ONCE((cpu || running) && in_hardirq()); if (cpu) { gnet_stats_add_basic_cpu(bstats, cpu); return; } do { if (running) start = u64_stats_fetch_begin(&b->syncp); bytes = u64_stats_read(&b->bytes); packets = u64_stats_read(&b->packets); } while (running && u64_stats_fetch_retry(&b->syncp, start)); _bstats_update(bstats, bytes, packets); } EXPORT_SYMBOL(gnet_stats_add_basic); static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running) { unsigned int start; if (cpu) { u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); unsigned int start; u64 bytes, packets; do { start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; } *ret_bytes = t_bytes; *ret_packets = t_packets; return; } do { if (running) start = u64_stats_fetch_begin(&b->syncp); *ret_bytes = u64_stats_read(&b->bytes); *ret_packets = u64_stats_read(&b->packets); } while (running && u64_stats_fetch_retry(&b->syncp, start)); } static int ___gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, int type, bool running) { u64 bstats_bytes, bstats_packets; gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { d->tc_stats.bytes = bstats_bytes; d->tc_stats.packets = bstats_packets; } if (d->tail) { struct gnet_stats_basic sb; int res; memset(&sb, 0, sizeof(sb)); sb.bytes = bstats_bytes; sb.packets = bstats_packets; res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); if (res < 0 || sb.packets == bstats_packets) return res; /* emit 64bit stats only if needed */ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, sizeof(bstats_packets), TCA_STATS_PAD); } return 0; } /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics * @running: true if @b represents a running qdisc, thus @b's * internal values might change during basic reads. * Only used if @cpu is NULL * * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running) { return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); } EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics * @running: true if @b represents a running qdisc, thus @b's * internal values might change during basic reads. * Only used if @cpu is NULL * * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_copy_basic_hw(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running) { return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); } EXPORT_SYMBOL(gnet_stats_copy_basic_hw); /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV * @d: dumping handle * @rate_est: rate estimator * * Appends the rate estimator statistics to the top level TLV created by * gnet_stats_start_copy(). * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **rate_est) { struct gnet_stats_rate_est64 sample; struct gnet_stats_rate_est est; int res; if (!gen_estimator_read(rate_est, &sample)) return 0; est.bps = min_t(u64, UINT_MAX, sample.bps); /* we have some time before reaching 2^32 packets per second */ est.pps = sample.pps; if (d->compat_tc_stats) { d->tc_stats.bps = est.bps; d->tc_stats.pps = est.pps; } if (d->tail) { res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est), TCA_STATS_PAD); if (res < 0 || est.bps == sample.bps) return res; /* emit 64bit stats only if needed */ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, &sample, sizeof(sample), TCA_STATS_PAD); } return 0; } EXPORT_SYMBOL(gnet_stats_copy_rate_est); static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, const struct gnet_stats_queue __percpu *q) { int i; for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); qstats->qlen += qcpu->qlen; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; qstats->overlimits += qcpu->overlimits; } } void gnet_stats_add_queue(struct gnet_stats_queue *qstats, const struct gnet_stats_queue __percpu *cpu, const struct gnet_stats_queue *q) { if (cpu) { gnet_stats_add_queue_cpu(qstats, cpu); } else { qstats->qlen += q->qlen; qstats->backlog += q->backlog; qstats->drops += q->drops; qstats->requeues += q->requeues; qstats->overlimits += q->overlimits; } } EXPORT_SYMBOL(gnet_stats_add_queue); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV * @d: dumping handle * @cpu_q: per cpu queue statistics * @q: queue statistics * @qlen: queue length statistics * * Appends the queue statistics to the top level TLV created by * gnet_stats_start_copy(). Using per cpu queue statistics if * they are available. * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen) { struct gnet_stats_queue qstats = {0}; gnet_stats_add_queue(&qstats, cpu_q, q); qstats.qlen = qlen; if (d->compat_tc_stats) { d->tc_stats.drops = qstats.drops; d->tc_stats.qlen = qstats.qlen; d->tc_stats.backlog = qstats.backlog; d->tc_stats.overlimits = qstats.overlimits; } if (d->tail) return gnet_stats_copy(d, TCA_STATS_QUEUE, &qstats, sizeof(qstats), TCA_STATS_PAD); return 0; } EXPORT_SYMBOL(gnet_stats_copy_queue); /** * gnet_stats_copy_app - copy application specific statistics into statistics TLV * @d: dumping handle * @st: application specific statistics data * @len: length of data * * Appends the application specific statistics to the top level TLV created by * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping * handle is in backward compatibility mode. * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { d->xstats = kmemdup(st, len, GFP_ATOMIC); if (!d->xstats) goto err_out; d->xstats_len = len; } if (d->tail) return gnet_stats_copy(d, TCA_STATS_APP, st, len, TCA_STATS_PAD); return 0; err_out: if (d->lock) spin_unlock_bh(d->lock); d->xstats_len = 0; return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); /** * gnet_stats_finish_copy - finish dumping procedure * @d: dumping handle * * Corrects the length of the top level TLV to include all TLVs added * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs * if gnet_stats_start_copy_compat() was used and releases the statistics * lock. * * Returns 0 on success or -1 with the statistic lock released * if the room in the socket buffer was not sufficient. */ int gnet_stats_finish_copy(struct gnet_dump *d) { if (d->tail) d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; if (d->compat_tc_stats) if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, sizeof(d->tc_stats), d->padattr) < 0) return -1; if (d->compat_xstats && d->xstats) { if (gnet_stats_copy(d, d->compat_xstats, d->xstats, d->xstats_len, d->padattr) < 0) return -1; } if (d->lock) spin_unlock_bh(d->lock); kfree(d->xstats); d->xstats = NULL; d->xstats_len = 0; return 0; } EXPORT_SYMBOL(gnet_stats_finish_copy);
4 4 1571 2 171 1581 1585 1581 4 1581 1580 1571 1575 1584 1579 1578 1576 247 245 237 198 68 244 241 234 5 8 236 82 104 148 201 148 3 10 103 219 97 120 2 2 1 6 3 7 279 276 279 269 279 5 253 117 117 78 63 78 294 1 284 288 2 275 279 7 4 19 248 124 239 219 218 1 1 4 3 3 1 3 1 1 1 1 1 19 3 3 13 2 296 1 2 3 1 267 19 3 1 2 1 1 309 3 299 7 21 1 9 6 5 2 1 6 4 1 3 3 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); } EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) { unsigned long ret; if (NF_INVF(ipinfo, IPT_INV_SRCIP, (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || NF_INVF(ipinfo, IPT_INV_DSTIP, (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ if (NF_INVF(ipinfo, IPT_INV_FRAG, (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; } static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) return false; if (ip->invflags & ~IPT_INV_MASK) return false; return true; } static unsigned int ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } /* Performance critical */ static inline struct ipt_entry * get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; } /* for const-correctness */ static inline const struct xt_entry_target * ipt_get_target_c(const struct ipt_entry *e) { return ipt_get_target((struct ipt_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP_TRACE_COMMENT_RULE, NF_IP_TRACE_COMMENT_RETURN, NF_IP_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP_TRACE_COMMENT_RULE] = "rule", [NF_IP_TRACE_COMMENT_RETURN] = "return", [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = 4, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ipt_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] : comments[NF_IP_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ipt_entry *e) { const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ipt_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; ip = ip_hdr(skb); indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); if (!ip_packet_match(ip, indev, outdev, &e->ip, acpar.fragoff)) { no_match: e = ipt_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); } else { e = jumpstack[--stackidx]; e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && !(e->ip.flags & IPT_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) { /* Target might have changed stuff. */ ip = ip_hdr(skb); e = ipt_next_entry(e); } else { /* Verdict */ break; } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ipt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ip->proto, ip->invflags & IPT_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ip.proto, e->ip.invflags & IPT_INV_PROTO); } static int find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ipt_get_target(e); target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ipt_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip_checkentry(&e->ip)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ipt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ipt_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ipt_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ipt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; /* macro does multi eval of i */ } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ipt_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ipt_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ipt_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ipt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ipt_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ipt_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ipt_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET); #endif t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strscpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(AF_INET); #endif return ret; } static int get_entries(struct net *net, struct ipt_get_entries __user *uptr, const int *len) { int ret; struct ipt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ipt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; struct ipt_entry *iter; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, AF_INET, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); } vfree(counters); return 0; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ipt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_ipt_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_INET_NUMHOOKS]; u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ struct compat_ipt_entry entries[]; }; static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; const struct xt_entry_match *ematch; int ret = 0; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_ipt_entry); *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); xt_ematch_foreach(ematch, e) { ret = xt_compat_match_to_user(ematch, dstptr, size); if (ret != 0) return ret; } target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_find_calc_match(struct xt_entry_match *m, const struct ipt_ip *ip, int *size) { struct xt_match *match; match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; } static void compat_release_entry(struct compat_ipt_entry *e) { struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) module_put(ematch->u.kernel.match->me); t = compat_ipt_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; int ret, off; if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_ipt_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!ip_checkentry(&e->ip)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; } t = compat_ipt_get_target(e); target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); if (ret) goto out; return 0; out: module_put(t->u.kernel.target->me); release_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; module_put(ematch->u.kernel.match->me); } return ret; } static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ipt_entry *de; unsigned int origsize; int h; struct xt_entry_match *ematch; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct ipt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); xt_ematch_foreach(ematch, e) xt_compat_match_from_user(ematch, dstptr, size); de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(AF_INET); ret = xt_compat_init_offsets(AF_INET, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone. * entry1/newinfo contains a 64bit ruleset that looks exactly as * generated by 64bit userspace. * * Call standard translate_table() to validate all hook_entrys, * underflows, check for loops, etc. */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_INET_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ipt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_ipt_entry entrytable[]; }; static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct ipt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } static int compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_ipt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(AF_INET); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(AF_INET); return ret; } #endif static int do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IPT_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case IPT_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IPT_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case IPT_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case IPT_SO_GET_REVISION_MATCH: case IPT_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; else target = 0; try_then_request_module(xt_find_revision(AF_INET, rev.name, rev.revision, target, &ret), "ipt_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct ipt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ipt_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } /* No template? No need to do anything. This is used by 'nat' table, it registers * with the nat core instead of the netfilter core. */ if (!template_ops) return 0; num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __ipt_unregister_table(net, new_table); return ret; } void ipt_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ipt_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); } static struct xt_target ipt_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_IPV4, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = ipt_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_IPV4, }, }; static struct nf_sockopt_ops ipt_sockopts = { .pf = PF_INET, .set_optmin = IPT_BASE_CTL, .set_optmax = IPT_SO_SET_MAX+1, .set = do_ipt_set_ctl, .get_optmin = IPT_BASE_CTL, .get_optmax = IPT_SO_GET_MAX+1, .get = do_ipt_get_ctl, .owner = THIS_MODULE, }; static int __net_init ip_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV4); } static void __net_exit ip_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_IPV4); } static struct pernet_operations ip_tables_net_ops = { .init = ip_tables_net_init, .exit = ip_tables_net_exit, }; static int __init ip_tables_init(void) { int ret; ret = register_pernet_subsys(&ip_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); err2: unregister_pernet_subsys(&ip_tables_net_ops); err1: return ret; } static void __exit ip_tables_fini(void) { nf_unregister_sockopt(&ipt_sockopts); xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); module_exit(ip_tables_fini);
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Media device * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #ifndef _MEDIA_DEVICE_H #define _MEDIA_DEVICE_H #include <linux/list.h> #include <linux/mutex.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <media/media-devnode.h> #include <media/media-entity.h> struct ida; struct media_device; /** * struct media_entity_notify - Media Entity Notify * * @list: List head * @notify_data: Input data to invoke the callback * @notify: Callback function pointer * * Drivers may register a callback to take action when new entities get * registered with the media device. This handler is intended for creating * links between existing entities and should not create entities and register * them. */ struct media_entity_notify { struct list_head list; void *notify_data; void (*notify)(struct media_entity *entity, void *notify_data); }; /** * struct media_device_ops - Media device operations * @link_notify: Link state change notification callback. This callback is * called with the graph_mutex held. * @req_alloc: Allocate a request. Set this if you need to allocate a struct * larger then struct media_request. @req_alloc and @req_free must * either both be set or both be NULL. * @req_free: Free a request. Set this if @req_alloc was set as well, leave * to NULL otherwise. * @req_validate: Validate a request, but do not queue yet. The req_queue_mutex * lock is held when this op is called. * @req_queue: Queue a validated request, cannot fail. If something goes * wrong when queueing this request then it should be marked * as such internally in the driver and any related buffers * must eventually return to vb2 with state VB2_BUF_STATE_ERROR. * The req_queue_mutex lock is held when this op is called. * It is important that vb2 buffer objects are queued last after * all other object types are queued: queueing a buffer kickstarts * the request processing, so all other objects related to the * request (and thus the buffer) must be available to the driver. * And once a buffer is queued, then the driver can complete * or delete objects from the request before req_queue exits. */ struct media_device_ops { int (*link_notify)(struct media_link *link, u32 flags, unsigned int notification); struct media_request *(*req_alloc)(struct media_device *mdev); void (*req_free)(struct media_request *req); int (*req_validate)(struct media_request *req); void (*req_queue)(struct media_request *req); }; /** * struct media_device - Media device * @dev: Parent device * @devnode: Media device node * @driver_name: Optional device driver name. If not set, calls to * %MEDIA_IOC_DEVICE_INFO will return ``dev->driver->name``. * This is needed for USB drivers for example, as otherwise * they'll all appear as if the driver name was "usb". * @model: Device model name * @serial: Device serial number (optional) * @bus_info: Unique and stable device location identifier * @hw_revision: Hardware device revision * @topology_version: Monotonic counter for storing the version of the graph * topology. Should be incremented each time the topology changes. * @id: Unique ID used on the last registered graph object * @entity_internal_idx: Unique internal entity ID used by the graph traversal * algorithms * @entity_internal_idx_max: Allocated internal entity indices * @entities: List of registered entities * @interfaces: List of registered interfaces * @pads: List of registered pads * @links: List of registered links * @entity_notify: List of registered entity_notify callbacks * @graph_mutex: Protects access to struct media_device data * @pm_count_walk: Graph walk for power state walk. Access serialised using * graph_mutex. * * @source_priv: Driver Private data for enable/disable source handlers * @enable_source: Enable Source Handler function pointer * @disable_source: Disable Source Handler function pointer * * @ops: Operation handler callbacks * @req_queue_mutex: Serialise the MEDIA_REQUEST_IOC_QUEUE ioctl w.r.t. * other operations that stop or start streaming. * @request_id: Used to generate unique request IDs * * This structure represents an abstract high-level media device. It allows easy * access to entities and provides basic media device-level support. The * structure can be allocated directly or embedded in a larger structure. * * The parent @dev is a physical device. It must be set before registering the * media device. * * @model is a descriptive model name exported through sysfs. It doesn't have to * be unique. * * @enable_source is a handler to find source entity for the * sink entity and activate the link between them if source * entity is free. Drivers should call this handler before * accessing the source. * * @disable_source is a handler to find source entity for the * sink entity and deactivate the link between them. Drivers * should call this handler to release the source. * * Use-case: find tuner entity connected to the decoder * entity and check if it is available, and activate the * link between them from @enable_source and deactivate * from @disable_source. * * .. note:: * * Bridge driver is expected to implement and set the * handler when &media_device is registered or when * bridge driver finds the media_device during probe. * Bridge driver sets source_priv with information * necessary to run @enable_source and @disable_source handlers. * Callers should hold graph_mutex to access and call @enable_source * and @disable_source handlers. */ struct media_device { /* dev->driver_data points to this struct. */ struct device *dev; struct media_devnode *devnode; char model[32]; char driver_name[32]; char serial[40]; char bus_info[32]; u32 hw_revision; u64 topology_version; u32 id; struct ida entity_internal_idx; int entity_internal_idx_max; struct list_head entities; struct list_head interfaces; struct list_head pads; struct list_head links; /* notify callback list invoked when a new entity is registered */ struct list_head entity_notify; /* Serializes graph operations. */ struct mutex graph_mutex; struct media_graph pm_count_walk; void *source_priv; int (*enable_source)(struct media_entity *entity, struct media_pipeline *pipe); void (*disable_source)(struct media_entity *entity); const struct media_device_ops *ops; struct mutex req_queue_mutex; atomic_t request_id; }; /* We don't need to include usb.h here */ struct usb_device; #ifdef CONFIG_MEDIA_CONTROLLER /* Supported link_notify @notification values. */ #define MEDIA_DEV_NOTIFY_PRE_LINK_CH 0 #define MEDIA_DEV_NOTIFY_POST_LINK_CH 1 /** * media_device_init() - Initializes a media device element * * @mdev: pointer to struct &media_device * * This function initializes the media device prior to its registration. * The media device initialization and registration is split in two functions * to avoid race conditions and make the media device available to user-space * before the media graph has been completed. * * So drivers need to first initialize the media device, register any entity * within the media device, create pad to pad links and then finally register * the media device by calling media_device_register() as a final step. * * The caller is responsible for initializing the media device before * registration. The following fields must be set: * * - dev must point to the parent device * - model must be filled with the device model name * * The bus_info field is set by media_device_init() for PCI and platform devices * if the field begins with '\0'. */ void media_device_init(struct media_device *mdev); /** * media_device_cleanup() - Cleanups a media device element * * @mdev: pointer to struct &media_device * * This function that will destroy the graph_mutex that is * initialized in media_device_init(). */ void media_device_cleanup(struct media_device *mdev); /** * __media_device_register() - Registers a media device element * * @mdev: pointer to struct &media_device * @owner: should be filled with %THIS_MODULE * * Users, should, instead, call the media_device_register() macro. * * The caller is responsible for initializing the &media_device structure * before registration. The following fields of &media_device must be set: * * - &media_device.model must be filled with the device model name as a * NUL-terminated UTF-8 string. The device/model revision must not be * stored in this field. * * The following fields are optional: * * - &media_device.serial is a unique serial number stored as a * NUL-terminated ASCII string. The field is big enough to store a GUID * in text form. If the hardware doesn't provide a unique serial number * this field must be left empty. * * - &media_device.bus_info represents the location of the device in the * system as a NUL-terminated ASCII string. For PCI/PCIe devices * &media_device.bus_info must be set to "PCI:" (or "PCIe:") followed by * the value of pci_name(). For USB devices,the usb_make_path() function * must be used. This field is used by applications to distinguish between * otherwise identical devices that don't provide a serial number. * * - &media_device.hw_revision is the hardware device revision in a * driver-specific format. When possible the revision should be formatted * with the KERNEL_VERSION() macro. * * .. note:: * * #) Upon successful registration a character device named media[0-9]+ is created. The device major and minor numbers are dynamic. The model name is exported as a sysfs attribute. * * #) Unregistering a media device that hasn't been registered is **NOT** safe. * * Return: returns zero on success or a negative error code. */ int __must_check __media_device_register(struct media_device *mdev, struct module *owner); /** * media_device_register() - Registers a media device element * * @mdev: pointer to struct &media_device * * This macro calls __media_device_register() passing %THIS_MODULE as * the __media_device_register() second argument (**owner**). */ #define media_device_register(mdev) __media_device_register(mdev, THIS_MODULE) /** * media_device_unregister() - Unregisters a media device element * * @mdev: pointer to struct &media_device * * It is safe to call this function on an unregistered (but initialised) * media device. */ void media_device_unregister(struct media_device *mdev); /** * media_device_register_entity() - registers a media entity inside a * previously registered media device. * * @mdev: pointer to struct &media_device * @entity: pointer to struct &media_entity to be registered * * Entities are identified by a unique positive integer ID. The media * controller framework will such ID automatically. IDs are not guaranteed * to be contiguous, and the ID number can change on newer Kernel versions. * So, neither the driver nor userspace should hardcode ID numbers to refer * to the entities, but, instead, use the framework to find the ID, when * needed. * * The media_entity name, type and flags fields should be initialized before * calling media_device_register_entity(). Entities embedded in higher-level * standard structures can have some of those fields set by the higher-level * framework. * * If the device has pads, media_entity_pads_init() should be called before * this function. Otherwise, the &media_entity.pad and &media_entity.num_pads * should be zeroed before calling this function. * * Entities have flags that describe the entity capabilities and state: * * %MEDIA_ENT_FL_DEFAULT * indicates the default entity for a given type. * This can be used to report the default audio and video devices or the * default camera sensor. * * .. note:: * * Drivers should set the entity function before calling this function. * Please notice that the values %MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN and * %MEDIA_ENT_F_UNKNOWN should not be used by the drivers. */ int __must_check media_device_register_entity(struct media_device *mdev, struct media_entity *entity); /** * media_device_unregister_entity() - unregisters a media entity. * * @entity: pointer to struct &media_entity to be unregistered * * All links associated with the entity and all PADs are automatically * unregistered from the media_device when this function is called. * * Unregistering an entity will not change the IDs of the other entities and * the previoully used ID will never be reused for a newly registered entities. * * When a media device is unregistered, all its entities are unregistered * automatically. No manual entities unregistration is then required. * * .. note:: * * The media_entity instance itself must be freed explicitly by * the driver if required. */ void media_device_unregister_entity(struct media_entity *entity); /** * media_device_register_entity_notify() - Registers a media entity_notify * callback * * @mdev: The media device * @nptr: The media_entity_notify * * .. note:: * * When a new entity is registered, all the registered * media_entity_notify callbacks are invoked. */ void media_device_register_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr); /** * media_device_unregister_entity_notify() - Unregister a media entity notify * callback * * @mdev: The media device * @nptr: The media_entity_notify * */ void media_device_unregister_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr); /* Iterate over all entities. */ #define media_device_for_each_entity(entity, mdev) \ list_for_each_entry(entity, &(mdev)->entities, graph_obj.list) /* Iterate over all interfaces. */ #define media_device_for_each_intf(intf, mdev) \ list_for_each_entry(intf, &(mdev)->interfaces, graph_obj.list) /* Iterate over all pads. */ #define media_device_for_each_pad(pad, mdev) \ list_for_each_entry(pad, &(mdev)->pads, graph_obj.list) /* Iterate over all links. */ #define media_device_for_each_link(link, mdev) \ list_for_each_entry(link, &(mdev)->links, graph_obj.list) /** * media_device_pci_init() - create and initialize a * struct &media_device from a PCI device. * * @mdev: pointer to struct &media_device * @pci_dev: pointer to struct pci_dev * @name: media device name. If %NULL, the routine will use the default * name for the pci device, given by pci_name() macro. */ void media_device_pci_init(struct media_device *mdev, struct pci_dev *pci_dev, const char *name); /** * __media_device_usb_init() - create and initialize a * struct &media_device from a PCI device. * * @mdev: pointer to struct &media_device * @udev: pointer to struct usb_device * @board_name: media device name. If %NULL, the routine will use the usb * product name, if available. * @driver_name: name of the driver. if %NULL, the routine will use the name * given by ``udev->dev->driver->name``, with is usually the wrong * thing to do. * * .. note:: * * It is better to call media_device_usb_init() instead, as * such macro fills driver_name with %KBUILD_MODNAME. */ void __media_device_usb_init(struct media_device *mdev, struct usb_device *udev, const char *board_name, const char *driver_name); #else static inline void media_device_init(struct media_device *mdev) { } static inline int media_device_register(struct media_device *mdev) { return 0; } static inline void media_device_unregister(struct media_device *mdev) { } static inline void media_device_cleanup(struct media_device *mdev) { } static inline int media_device_register_entity(struct media_device *mdev, struct media_entity *entity) { return 0; } static inline void media_device_unregister_entity(struct media_entity *entity) { } static inline void media_device_register_entity_notify( struct media_device *mdev, struct media_entity_notify *nptr) { } static inline void media_device_unregister_entity_notify( struct media_device *mdev, struct media_entity_notify *nptr) { } static inline void media_device_pci_init(struct media_device *mdev, struct pci_dev *pci_dev, char *name) { } static inline void __media_device_usb_init(struct media_device *mdev, struct usb_device *udev, char *board_name, char *driver_name) { } #endif /* CONFIG_MEDIA_CONTROLLER */ /** * media_device_usb_init() - create and initialize a * struct &media_device from a PCI device. * * @mdev: pointer to struct &media_device * @udev: pointer to struct usb_device * @name: media device name. If %NULL, the routine will use the usb * product name, if available. * * This macro calls media_device_usb_init() passing the * media_device_usb_init() **driver_name** parameter filled with * %KBUILD_MODNAME. */ #define media_device_usb_init(mdev, udev, name) \ __media_device_usb_init(mdev, udev, name, KBUILD_MODNAME) /** * media_set_bus_info() - Set bus_info field * * @bus_info: Variable where to write the bus info (char array) * @bus_info_size: Length of the bus_info * @dev: Related struct device * * Sets bus information based on &dev. This is currently done for PCI and * platform devices. dev is required to be non-NULL for this to happen. * * This function is not meant to be called from drivers. */ static inline void media_set_bus_info(char *bus_info, size_t bus_info_size, struct device *dev) { if (!dev) strscpy(bus_info, "no bus info", bus_info_size); else if (dev_is_platform(dev)) snprintf(bus_info, bus_info_size, "platform:%s", dev_name(dev)); else if (dev_is_pci(dev)) snprintf(bus_info, bus_info_size, "PCI:%s", dev_name(dev)); } #endif
142 123 123 123 142 123 136 5 23 35 38 5 1 14962 87 881 14139 102 7 101 71 72 71 142 54 142 142 142 141 141 142 142 216 1 215 69 12 12 215 215 41 153 153 153 148 148 148 148 148 148 176 176 60 176 18912 350 340 18835 18912 18844 265 265 265 265 265 264 265 265 266 5 263 5 262 265 265 261 1 5348 5131 1 2 266 4626 20175 20177 20178 20164 4115 18570 20258 19986 19959 142 64 5 40 19 30 17 36 142 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/util.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/slab.h> #include <linux/rculist.h> #include "common.h" /* Lock for protecting policy. */ DEFINE_MUTEX(tomoyo_policy_lock); /* Has /sbin/init started? */ bool tomoyo_policy_loaded; /* * Mapping table from "enum tomoyo_mac_index" to * "enum tomoyo_mac_category_index". */ const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = { /* CONFIG::file group */ [TOMOYO_MAC_FILE_EXECUTE] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_OPEN] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_CREATE] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_UNLINK] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_GETATTR] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MKDIR] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_RMDIR] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MKFIFO] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MKSOCK] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_TRUNCATE] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_SYMLINK] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MKBLOCK] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MKCHAR] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_LINK] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_RENAME] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_CHMOD] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_CHOWN] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_CHGRP] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_IOCTL] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_CHROOT] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_MOUNT] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_UMOUNT] = TOMOYO_MAC_CATEGORY_FILE, [TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE, /* CONFIG::network group */ [TOMOYO_MAC_NETWORK_INET_STREAM_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_DGRAM_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_DGRAM_SEND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_RAW_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_INET_RAW_SEND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN] = TOMOYO_MAC_CATEGORY_NETWORK, [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT] = TOMOYO_MAC_CATEGORY_NETWORK, /* CONFIG::misc group */ [TOMOYO_MAC_ENVIRON] = TOMOYO_MAC_CATEGORY_MISC, }; /** * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss. * * @time64: Seconds since 1970/01/01 00:00:00. * @stamp: Pointer to "struct tomoyo_time". * * Returns nothing. */ void tomoyo_convert_time(time64_t time64, struct tomoyo_time *stamp) { struct tm tm; time64_to_tm(time64, 0, &tm); stamp->sec = tm.tm_sec; stamp->min = tm.tm_min; stamp->hour = tm.tm_hour; stamp->day = tm.tm_mday; stamp->month = tm.tm_mon + 1; stamp->year = tm.tm_year + 1900; } /** * tomoyo_permstr - Find permission keywords. * * @string: String representation for permissions in foo/bar/buz format. * @keyword: Keyword to find from @string/ * * Returns true if @keyword was found in @string, false otherwise. * * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2. */ bool tomoyo_permstr(const char *string, const char *keyword) { const char *cp = strstr(string, keyword); if (cp) return cp == string || *(cp - 1) == '/'; return false; } /** * tomoyo_read_token - Read a word from a line. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns a word on success, "" otherwise. * * To allow the caller to skip NULL check, this function returns "" rather than * NULL if there is no more words to read. */ char *tomoyo_read_token(struct tomoyo_acl_param *param) { char *pos = param->data; char *del = strchr(pos, ' '); if (del) *del++ = '\0'; else del = pos + strlen(pos); param->data = del; return pos; } static bool tomoyo_correct_path2(const char *filename, const size_t len); /** * tomoyo_get_domainname - Read a domainname from a line. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns a domainname on success, NULL otherwise. */ const struct tomoyo_path_info *tomoyo_get_domainname (struct tomoyo_acl_param *param) { char *start = param->data; char *pos = start; while (*pos) { if (*pos++ != ' ' || tomoyo_correct_path2(pos, strchrnul(pos, ' ') - pos)) continue; *(pos - 1) = '\0'; break; } param->data = pos; if (tomoyo_correct_domain(start)) return tomoyo_get_name(start); return NULL; } /** * tomoyo_parse_ulong - Parse an "unsigned long" value. * * @result: Pointer to "unsigned long". * @str: Pointer to string to parse. * * Returns one of values in "enum tomoyo_value_type". * * The @src is updated to point the first character after the value * on success. */ u8 tomoyo_parse_ulong(unsigned long *result, char **str) { const char *cp = *str; char *ep; int base = 10; if (*cp == '0') { char c = *(cp + 1); if (c == 'x' || c == 'X') { base = 16; cp += 2; } else if (c >= '0' && c <= '7') { base = 8; cp++; } } *result = simple_strtoul(cp, &ep, base); if (cp == ep) return TOMOYO_VALUE_TYPE_INVALID; *str = ep; switch (base) { case 16: return TOMOYO_VALUE_TYPE_HEXADECIMAL; case 8: return TOMOYO_VALUE_TYPE_OCTAL; default: return TOMOYO_VALUE_TYPE_DECIMAL; } } /** * tomoyo_print_ulong - Print an "unsigned long" value. * * @buffer: Pointer to buffer. * @buffer_len: Size of @buffer. * @value: An "unsigned long" value. * @type: Type of @value. * * Returns nothing. */ void tomoyo_print_ulong(char *buffer, const int buffer_len, const unsigned long value, const u8 type) { if (type == TOMOYO_VALUE_TYPE_DECIMAL) snprintf(buffer, buffer_len, "%lu", value); else if (type == TOMOYO_VALUE_TYPE_OCTAL) snprintf(buffer, buffer_len, "0%lo", value); else if (type == TOMOYO_VALUE_TYPE_HEXADECIMAL) snprintf(buffer, buffer_len, "0x%lX", value); else snprintf(buffer, buffer_len, "type(%u)", type); } /** * tomoyo_parse_name_union - Parse a tomoyo_name_union. * * @param: Pointer to "struct tomoyo_acl_param". * @ptr: Pointer to "struct tomoyo_name_union". * * Returns true on success, false otherwise. */ bool tomoyo_parse_name_union(struct tomoyo_acl_param *param, struct tomoyo_name_union *ptr) { char *filename; if (param->data[0] == '@') { param->data++; ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP); return ptr->group != NULL; } filename = tomoyo_read_token(param); if (!tomoyo_correct_word(filename)) return false; ptr->filename = tomoyo_get_name(filename); return ptr->filename != NULL; } /** * tomoyo_parse_number_union - Parse a tomoyo_number_union. * * @param: Pointer to "struct tomoyo_acl_param". * @ptr: Pointer to "struct tomoyo_number_union". * * Returns true on success, false otherwise. */ bool tomoyo_parse_number_union(struct tomoyo_acl_param *param, struct tomoyo_number_union *ptr) { char *data; u8 type; unsigned long v; memset(ptr, 0, sizeof(*ptr)); if (param->data[0] == '@') { param->data++; ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP); return ptr->group != NULL; } data = tomoyo_read_token(param); type = tomoyo_parse_ulong(&v, &data); if (type == TOMOYO_VALUE_TYPE_INVALID) return false; ptr->values[0] = v; ptr->value_type[0] = type; if (!*data) { ptr->values[1] = v; ptr->value_type[1] = type; return true; } if (*data++ != '-') return false; type = tomoyo_parse_ulong(&v, &data); if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v) return false; ptr->values[1] = v; ptr->value_type[1] = type; return true; } /** * tomoyo_byte_range - Check whether the string is a \ooo style octal value. * * @str: Pointer to the string. * * Returns true if @str is a \ooo style octal value, false otherwise. * * TOMOYO uses \ooo style representation for 0x01 - 0x20 and 0x7F - 0xFF. * This function verifies that \ooo is in valid range. */ static inline bool tomoyo_byte_range(const char *str) { return *str >= '0' && *str++ <= '3' && *str >= '0' && *str++ <= '7' && *str >= '0' && *str <= '7'; } /** * tomoyo_alphabet_char - Check whether the character is an alphabet. * * @c: The character to check. * * Returns true if @c is an alphabet character, false otherwise. */ static inline bool tomoyo_alphabet_char(const char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } /** * tomoyo_make_byte - Make byte value from three octal characters. * * @c1: The first character. * @c2: The second character. * @c3: The third character. * * Returns byte value. */ static inline u8 tomoyo_make_byte(const u8 c1, const u8 c2, const u8 c3) { return ((c1 - '0') << 6) + ((c2 - '0') << 3) + (c3 - '0'); } /** * tomoyo_valid - Check whether the character is a valid char. * * @c: The character to check. * * Returns true if @c is a valid character, false otherwise. */ static inline bool tomoyo_valid(const unsigned char c) { return c > ' ' && c < 127; } /** * tomoyo_invalid - Check whether the character is an invalid char. * * @c: The character to check. * * Returns true if @c is an invalid character, false otherwise. */ static inline bool tomoyo_invalid(const unsigned char c) { return c && (c <= ' ' || c >= 127); } /** * tomoyo_str_starts - Check whether the given string starts with the given keyword. * * @src: Pointer to pointer to the string. * @find: Pointer to the keyword. * * Returns true if @src starts with @find, false otherwise. * * The @src is updated to point the first character after the @find * if @src starts with @find. */ bool tomoyo_str_starts(char **src, const char *find) { const int len = strlen(find); char *tmp = *src; if (strncmp(tmp, find, len)) return false; tmp += len; *src = tmp; return true; } /** * tomoyo_normalize_line - Format string. * * @buffer: The line to normalize. * * Leading and trailing whitespaces are removed. * Multiple whitespaces are packed into single space. * * Returns nothing. */ void tomoyo_normalize_line(unsigned char *buffer) { unsigned char *sp = buffer; unsigned char *dp = buffer; bool first = true; while (tomoyo_invalid(*sp)) sp++; while (*sp) { if (!first) *dp++ = ' '; first = false; while (tomoyo_valid(*sp)) *dp++ = *sp++; while (tomoyo_invalid(*sp)) sp++; } *dp = '\0'; } /** * tomoyo_correct_word2 - Validate a string. * * @string: The string to check. Maybe non-'\0'-terminated. * @len: Length of @string. * * Check whether the given string follows the naming rules. * Returns true if @string follows the naming rules, false otherwise. */ static bool tomoyo_correct_word2(const char *string, size_t len) { u8 recursion = 20; const char *const start = string; bool in_repetition = false; if (!len) goto out; while (len--) { unsigned char c = *string++; if (c == '\\') { if (!len--) goto out; c = *string++; if (c >= '0' && c <= '3') { unsigned char d; unsigned char e; if (!len-- || !len--) goto out; d = *string++; e = *string++; if (d < '0' || d > '7' || e < '0' || e > '7') goto out; c = tomoyo_make_byte(c, d, e); if (c <= ' ' || c >= 127) continue; goto out; } switch (c) { case '\\': /* "\\" */ case '+': /* "\+" */ case '?': /* "\?" */ case 'x': /* "\x" */ case 'a': /* "\a" */ case '-': /* "\-" */ continue; } if (!recursion--) goto out; switch (c) { case '*': /* "\*" */ case '@': /* "\@" */ case '$': /* "\$" */ case 'X': /* "\X" */ case 'A': /* "\A" */ continue; case '{': /* "/\{" */ if (string - 3 < start || *(string - 3) != '/') goto out; in_repetition = true; continue; case '}': /* "\}/" */ if (*string != '/') goto out; if (!in_repetition) goto out; in_repetition = false; continue; } goto out; } else if (in_repetition && c == '/') { goto out; } else if (c <= ' ' || c >= 127) { goto out; } } if (in_repetition) goto out; return true; out: return false; } /** * tomoyo_correct_word - Validate a string. * * @string: The string to check. * * Check whether the given string follows the naming rules. * Returns true if @string follows the naming rules, false otherwise. */ bool tomoyo_correct_word(const char *string) { return tomoyo_correct_word2(string, strlen(string)); } /** * tomoyo_correct_path2 - Check whether the given pathname follows the naming rules. * * @filename: The pathname to check. * @len: Length of @filename. * * Returns true if @filename follows the naming rules, false otherwise. */ static bool tomoyo_correct_path2(const char *filename, const size_t len) { const char *cp1 = memchr(filename, '/', len); const char *cp2 = memchr(filename, '.', len); return cp1 && (!cp2 || (cp1 < cp2)) && tomoyo_correct_word2(filename, len); } /** * tomoyo_correct_path - Validate a pathname. * * @filename: The pathname to check. * * Check whether the given pathname follows the naming rules. * Returns true if @filename follows the naming rules, false otherwise. */ bool tomoyo_correct_path(const char *filename) { return tomoyo_correct_path2(filename, strlen(filename)); } /** * tomoyo_correct_domain - Check whether the given domainname follows the naming rules. * * @domainname: The domainname to check. * * Returns true if @domainname follows the naming rules, false otherwise. */ bool tomoyo_correct_domain(const unsigned char *domainname) { if (!domainname || !tomoyo_domain_def(domainname)) return false; domainname = strchr(domainname, ' '); if (!domainname++) return true; while (1) { const unsigned char *cp = strchr(domainname, ' '); if (!cp) break; if (!tomoyo_correct_path2(domainname, cp - domainname)) return false; domainname = cp + 1; } return tomoyo_correct_path(domainname); } /** * tomoyo_domain_def - Check whether the given token can be a domainname. * * @buffer: The token to check. * * Returns true if @buffer possibly be a domainname, false otherwise. */ bool tomoyo_domain_def(const unsigned char *buffer) { const unsigned char *cp; int len; if (*buffer != '<') return false; cp = strchr(buffer, ' '); if (!cp) len = strlen(buffer); else len = cp - buffer; if (buffer[len - 1] != '>' || !tomoyo_correct_word2(buffer + 1, len - 2)) return false; return true; } /** * tomoyo_find_domain - Find a domain by the given name. * * @domainname: The domainname to find. * * Returns pointer to "struct tomoyo_domain_info" if found, NULL otherwise. * * Caller holds tomoyo_read_lock(). */ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname) { struct tomoyo_domain_info *domain; struct tomoyo_path_info name; name.name = domainname; tomoyo_fill_path_info(&name); list_for_each_entry_rcu(domain, &tomoyo_domain_list, list, srcu_read_lock_held(&tomoyo_ss)) { if (!domain->is_deleted && !tomoyo_pathcmp(&name, domain->domainname)) return domain; } return NULL; } /** * tomoyo_const_part_length - Evaluate the initial length without a pattern in a token. * * @filename: The string to evaluate. * * Returns the initial length without a pattern in @filename. */ static int tomoyo_const_part_length(const char *filename) { char c; int len = 0; if (!filename) return 0; while ((c = *filename++) != '\0') { if (c != '\\') { len++; continue; } c = *filename++; switch (c) { case '\\': /* "\\" */ len += 2; continue; case '0': /* "\ooo" */ case '1': case '2': case '3': c = *filename++; if (c < '0' || c > '7') break; c = *filename++; if (c < '0' || c > '7') break; len += 4; continue; } break; } return len; } /** * tomoyo_fill_path_info - Fill in "struct tomoyo_path_info" members. * * @ptr: Pointer to "struct tomoyo_path_info" to fill in. * * The caller sets "struct tomoyo_path_info"->name. */ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) { const char *name = ptr->name; const int len = strlen(name); ptr->const_len = tomoyo_const_part_length(name); ptr->is_dir = len && (name[len - 1] == '/'); ptr->is_patterned = (ptr->const_len < len); ptr->hash = full_name_hash(NULL, name, len); } /** * tomoyo_file_matches_pattern2 - Pattern matching without '/' character and "\-" pattern. * * @filename: The start of string to check. * @filename_end: The end of string to check. * @pattern: The start of pattern to compare. * @pattern_end: The end of pattern to compare. * * Returns true if @filename matches @pattern, false otherwise. */ static bool tomoyo_file_matches_pattern2(const char *filename, const char *filename_end, const char *pattern, const char *pattern_end) { while (filename < filename_end && pattern < pattern_end) { char c; int i; int j; if (*pattern != '\\') { if (*filename++ != *pattern++) return false; continue; } c = *filename; pattern++; switch (*pattern) { case '?': if (c == '/') { return false; } else if (c == '\\') { if (filename[1] == '\\') filename++; else if (tomoyo_byte_range(filename + 1)) filename += 3; else return false; } break; case '\\': if (c != '\\') return false; if (*++filename != '\\') return false; break; case '+': if (!isdigit(c)) return false; break; case 'x': if (!isxdigit(c)) return false; break; case 'a': if (!tomoyo_alphabet_char(c)) return false; break; case '0': case '1': case '2': case '3': if (c == '\\' && tomoyo_byte_range(filename + 1) && strncmp(filename + 1, pattern, 3) == 0) { filename += 3; pattern += 2; break; } return false; /* Not matched. */ case '*': case '@': for (i = 0; i <= filename_end - filename; i++) { if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; c = filename[i]; if (c == '.' && *pattern == '@') break; if (c != '\\') continue; if (filename[i + 1] == '\\') i++; else if (tomoyo_byte_range(filename + i + 1)) i += 3; else break; /* Bad pattern. */ } return false; /* Not matched. */ default: j = 0; c = *pattern; if (c == '$') { while (isdigit(filename[j])) j++; } else if (c == 'X') { while (isxdigit(filename[j])) j++; } else if (c == 'A') { while (tomoyo_alphabet_char(filename[j])) j++; } for (i = 1; i <= j; i++) { if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; } return false; /* Not matched or bad pattern. */ } filename++; pattern++; } while (*pattern == '\\' && (*(pattern + 1) == '*' || *(pattern + 1) == '@')) pattern += 2; return filename == filename_end && pattern == pattern_end; } /** * tomoyo_file_matches_pattern - Pattern matching without '/' character. * * @filename: The start of string to check. * @filename_end: The end of string to check. * @pattern: The start of pattern to compare. * @pattern_end: The end of pattern to compare. * * Returns true if @filename matches @pattern, false otherwise. */ static bool tomoyo_file_matches_pattern(const char *filename, const char *filename_end, const char *pattern, const char *pattern_end) { const char *pattern_start = pattern; bool first = true; bool result; while (pattern < pattern_end - 1) { /* Split at "\-" pattern. */ if (*pattern++ != '\\' || *pattern++ != '-') continue; result = tomoyo_file_matches_pattern2(filename, filename_end, pattern_start, pattern - 2); if (first) result = !result; if (result) return false; first = false; pattern_start = pattern; } result = tomoyo_file_matches_pattern2(filename, filename_end, pattern_start, pattern_end); return first ? result : !result; } /** * tomoyo_path_matches_pattern2 - Do pathname pattern matching. * * @f: The start of string to check. * @p: The start of pattern to compare. * * Returns true if @f matches @p, false otherwise. */ static bool tomoyo_path_matches_pattern2(const char *f, const char *p) { const char *f_delimiter; const char *p_delimiter; while (*f && *p) { f_delimiter = strchr(f, '/'); if (!f_delimiter) f_delimiter = f + strlen(f); p_delimiter = strchr(p, '/'); if (!p_delimiter) p_delimiter = p + strlen(p); if (*p == '\\' && *(p + 1) == '{') goto recursive; if (!tomoyo_file_matches_pattern(f, f_delimiter, p, p_delimiter)) return false; f = f_delimiter; if (*f) f++; p = p_delimiter; if (*p) p++; } /* Ignore trailing "\*" and "\@" in @pattern. */ while (*p == '\\' && (*(p + 1) == '*' || *(p + 1) == '@')) p += 2; return !*f && !*p; recursive: /* * The "\{" pattern is permitted only after '/' character. * This guarantees that below "*(p - 1)" is safe. * Also, the "\}" pattern is permitted only before '/' character * so that "\{" + "\}" pair will not break the "\-" operator. */ if (*(p - 1) != '/' || p_delimiter <= p + 3 || *p_delimiter != '/' || *(p_delimiter - 1) != '}' || *(p_delimiter - 2) != '\\') return false; /* Bad pattern. */ do { /* Compare current component with pattern. */ if (!tomoyo_file_matches_pattern(f, f_delimiter, p + 2, p_delimiter - 2)) break; /* Proceed to next component. */ f = f_delimiter; if (!*f) break; f++; /* Continue comparison. */ if (tomoyo_path_matches_pattern2(f, p_delimiter + 1)) return true; f_delimiter = strchr(f, '/'); } while (f_delimiter); return false; /* Not matched. */ } /** * tomoyo_path_matches_pattern - Check whether the given filename matches the given pattern. * * @filename: The filename to check. * @pattern: The pattern to compare. * * Returns true if matches, false otherwise. * * The following patterns are available. * \\ \ itself. * \ooo Octal representation of a byte. * \* Zero or more repetitions of characters other than '/'. * \@ Zero or more repetitions of characters other than '/' or '.'. * \? 1 byte character other than '/'. * \$ One or more repetitions of decimal digits. * \+ 1 decimal digit. * \X One or more repetitions of hexadecimal digits. * \x 1 hexadecimal digit. * \A One or more repetitions of alphabet characters. * \a 1 alphabet character. * * \- Subtraction operator. * * /\{dir\}/ '/' + 'One or more repetitions of dir/' (e.g. /dir/ /dir/dir/ * /dir/dir/dir/ ). */ bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, const struct tomoyo_path_info *pattern) { const char *f = filename->name; const char *p = pattern->name; const int len = pattern->const_len; /* If @pattern doesn't contain pattern, I can use strcmp(). */ if (!pattern->is_patterned) return !tomoyo_pathcmp(filename, pattern); /* Don't compare directory and non-directory. */ if (filename->is_dir != pattern->is_dir) return false; /* Compare the initial length without patterns. */ if (strncmp(f, p, len)) return false; f += len; p += len; return tomoyo_path_matches_pattern2(f, p); } /** * tomoyo_get_exe - Get tomoyo_realpath() of current process. * * Returns the tomoyo_realpath() of current process on success, NULL otherwise. * * This function uses kzalloc(), so the caller must call kfree() * if this function didn't return NULL. */ const char *tomoyo_get_exe(void) { struct file *exe_file; const char *cp; struct mm_struct *mm = current->mm; if (!mm) return NULL; exe_file = get_mm_exe_file(mm); if (!exe_file) return NULL; cp = tomoyo_realpath_from_path(&exe_file->f_path); fput(exe_file); return cp; } /** * tomoyo_get_mode - Get MAC mode. * * @ns: Pointer to "struct tomoyo_policy_namespace". * @profile: Profile number. * @index: Index number of functionality. * * Returns mode. */ int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, const u8 index) { u8 mode; struct tomoyo_profile *p; if (!tomoyo_policy_loaded) return TOMOYO_CONFIG_DISABLED; p = tomoyo_profile(ns, profile); mode = p->config[index]; if (mode == TOMOYO_CONFIG_USE_DEFAULT) mode = p->config[tomoyo_index2category[index] + TOMOYO_MAX_MAC_INDEX]; if (mode == TOMOYO_CONFIG_USE_DEFAULT) mode = p->default_config; return mode & 3; } /** * tomoyo_init_request_info - Initialize "struct tomoyo_request_info" members. * * @r: Pointer to "struct tomoyo_request_info" to initialize. * @domain: Pointer to "struct tomoyo_domain_info". NULL for tomoyo_domain(). * @index: Index number of functionality. * * Returns mode. */ int tomoyo_init_request_info(struct tomoyo_request_info *r, struct tomoyo_domain_info *domain, const u8 index) { u8 profile; memset(r, 0, sizeof(*r)); if (!domain) domain = tomoyo_domain(); r->domain = domain; profile = domain->profile; r->profile = profile; r->type = index; r->mode = tomoyo_get_mode(domain->ns, profile, index); return r->mode; } /** * tomoyo_domain_quota_is_ok - Check for domain's quota. * * @r: Pointer to "struct tomoyo_request_info". * * Returns true if the domain is not exceeded quota, false otherwise. * * Caller holds tomoyo_read_lock(). */ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) { unsigned int count = 0; struct tomoyo_domain_info *domain = r->domain; struct tomoyo_acl_info *ptr; if (r->mode != TOMOYO_CONFIG_LEARNING) return false; if (!domain) return true; if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; if (ptr->is_deleted) continue; /* * Reading perm bitmap might race with tomoyo_merge_*() because * caller does not hold tomoyo_policy_lock mutex. But exceeding * max_learning_entry parameter by a few entries does not harm. */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head) ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; break; default: perm = 1; } count += hweight16(perm); } if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); /* r->granted = false; */ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", domain->domainname->name); #endif return false; }
13 2 5 10 8 5 3 2 2 3 2 1 14 1 14 1 10 19 5 4 11 1 10 3 13 17 17 2 15 15 15 13 2 1 1 2 4 22 18 4 9 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 // SPDX-License-Identifier: GPL-2.0-or-later /* */ #include <linux/init.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <sound/core.h> #include <sound/pcm.h> #include "usbaudio.h" #include "card.h" #include "quirks.h" #include "helper.h" #include "clock.h" #include "format.h" /* * parse the audio format type I descriptor * and returns the corresponding pcm format * * @dev: usb device * @fp: audioformat record * @format: the format tag (wFormatTag) * @fmt: the format type descriptor (v1/v2) or AudioStreaming descriptor (v3) */ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, struct audioformat *fp, u64 format, void *_fmt) { int sample_width, sample_bytes; u64 pcm_formats = 0; switch (fp->protocol) { case UAC_VERSION_1: default: { struct uac_format_type_i_discrete_descriptor *fmt = _fmt; if (format >= 64) { usb_audio_info(chip, "%u:%d: invalid format type 0x%llx is detected, processed as PCM\n", fp->iface, fp->altsetting, format); format = UAC_FORMAT_TYPE_I_PCM; } sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubframeSize; format = 1ULL << format; break; } case UAC_VERSION_2: { struct uac_format_type_i_ext_descriptor *fmt = _fmt; sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubslotSize; if (format & UAC2_FORMAT_TYPE_I_RAW_DATA) { pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; /* flag potentially raw DSD capable altsettings */ fp->dsd_raw = true; /* clear special format bit to avoid "unsupported format" msg below */ format &= ~UAC2_FORMAT_TYPE_I_RAW_DATA; } format <<= 1; break; } case UAC_VERSION_3: { struct uac3_as_header_descriptor *as = _fmt; sample_width = as->bBitResolution; sample_bytes = as->bSubslotSize; if (format & UAC3_FORMAT_TYPE_I_RAW_DATA) { pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; /* clear special format bit to avoid "unsupported format" msg below */ format &= ~UAC3_FORMAT_TYPE_I_RAW_DATA; } format <<= 1; break; } } fp->fmt_bits = sample_width; if ((pcm_formats == 0) && (format == 0 || format == BIT(UAC_FORMAT_TYPE_I_UNDEFINED))) { /* some devices don't define this correctly... */ usb_audio_info(chip, "%u:%d : format type 0 is detected, processed as PCM\n", fp->iface, fp->altsetting); format = BIT(UAC_FORMAT_TYPE_I_PCM); } if (format & BIT(UAC_FORMAT_TYPE_I_PCM)) { if (((chip->usb_id == USB_ID(0x0582, 0x0016)) || /* Edirol SD-90 */ (chip->usb_id == USB_ID(0x0582, 0x000c))) && /* Roland SC-D70 */ sample_width == 24 && sample_bytes == 2) sample_bytes = 3; else if (sample_width > sample_bytes * 8) { usb_audio_info(chip, "%u:%d : sample bitwidth %d in over sample bytes %d\n", fp->iface, fp->altsetting, sample_width, sample_bytes); } /* check the format byte size */ switch (sample_bytes) { case 1: pcm_formats |= SNDRV_PCM_FMTBIT_S8; break; case 2: if (snd_usb_is_big_endian_format(chip, fp)) pcm_formats |= SNDRV_PCM_FMTBIT_S16_BE; /* grrr, big endian!! */ else pcm_formats |= SNDRV_PCM_FMTBIT_S16_LE; break; case 3: if (snd_usb_is_big_endian_format(chip, fp)) pcm_formats |= SNDRV_PCM_FMTBIT_S24_3BE; /* grrr, big endian!! */ else pcm_formats |= SNDRV_PCM_FMTBIT_S24_3LE; break; case 4: pcm_formats |= SNDRV_PCM_FMTBIT_S32_LE; break; default: usb_audio_info(chip, "%u:%d : unsupported sample bitwidth %d in %d bytes\n", fp->iface, fp->altsetting, sample_width, sample_bytes); break; } } if (format & BIT(UAC_FORMAT_TYPE_I_PCM8)) { /* Dallas DS4201 workaround: it advertises U8 format, but really supports S8. */ if (chip->usb_id == USB_ID(0x04fa, 0x4201)) pcm_formats |= SNDRV_PCM_FMTBIT_S8; else pcm_formats |= SNDRV_PCM_FMTBIT_U8; } if (format & BIT(UAC_FORMAT_TYPE_I_IEEE_FLOAT)) pcm_formats |= SNDRV_PCM_FMTBIT_FLOAT_LE; if (format & BIT(UAC_FORMAT_TYPE_I_ALAW)) pcm_formats |= SNDRV_PCM_FMTBIT_A_LAW; if (format & BIT(UAC_FORMAT_TYPE_I_MULAW)) pcm_formats |= SNDRV_PCM_FMTBIT_MU_LAW; if (format & ~0x3f) { usb_audio_info(chip, "%u:%d : unsupported format bits %#llx\n", fp->iface, fp->altsetting, format); } pcm_formats |= snd_usb_interface_dsd_format_quirks(chip, fp, sample_bytes); return pcm_formats; } static int set_fixed_rate(struct audioformat *fp, int rate, int rate_bits) { kfree(fp->rate_table); fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL); if (!fp->rate_table) return -ENOMEM; fp->nr_rates = 1; fp->rate_min = rate; fp->rate_max = rate; fp->rates = rate_bits; fp->rate_table[0] = rate; return 0; } /* set up rate_min, rate_max and rates from the rate table */ static void set_rate_table_min_max(struct audioformat *fp) { unsigned int rate; int i; fp->rate_min = INT_MAX; fp->rate_max = 0; fp->rates = 0; for (i = 0; i < fp->nr_rates; i++) { rate = fp->rate_table[i]; fp->rate_min = min(fp->rate_min, rate); fp->rate_max = max(fp->rate_max, rate); fp->rates |= snd_pcm_rate_to_rate_bit(rate); } } /* * parse the format descriptor and stores the possible sample rates * on the audioformat table (audio class v1). * * @dev: usb device * @fp: audioformat record * @fmt: the format descriptor * @offset: the start offset of descriptor pointing the rate type * (7 for type I and II, 8 for type II) */ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audioformat *fp, unsigned char *fmt, int offset) { int nr_rates = fmt[offset]; if (fmt[0] < offset + 1 + 3 * (nr_rates ? nr_rates : 2)) { usb_audio_err(chip, "%u:%d : invalid UAC_FORMAT_TYPE desc\n", fp->iface, fp->altsetting); return -EINVAL; } if (nr_rates) { /* * build the rate table and bitmap flags */ int r, idx; fp->rate_table = kmalloc_array(nr_rates, sizeof(int), GFP_KERNEL); if (fp->rate_table == NULL) return -ENOMEM; fp->nr_rates = 0; for (r = 0, idx = offset + 1; r < nr_rates; r++, idx += 3) { unsigned int rate = combine_triple(&fmt[idx]); if (!rate) continue; /* C-Media CM6501 mislabels its 96 kHz altsetting */ /* Terratec Aureon 7.1 USB C-Media 6206, too */ /* Ozone Z90 USB C-Media, too */ if (rate == 48000 && nr_rates == 1 && (chip->usb_id == USB_ID(0x0d8c, 0x0201) || chip->usb_id == USB_ID(0x0d8c, 0x0102) || chip->usb_id == USB_ID(0x0d8c, 0x0078) || chip->usb_id == USB_ID(0x0ccd, 0x00b1)) && fp->altsetting == 5 && fp->maxpacksize == 392) rate = 96000; /* Creative VF0420/VF0470 Live Cams report 16 kHz instead of 8kHz */ if (rate == 16000 && (chip->usb_id == USB_ID(0x041e, 0x4064) || chip->usb_id == USB_ID(0x041e, 0x4068))) rate = 8000; fp->rate_table[fp->nr_rates++] = rate; } if (!fp->nr_rates) { usb_audio_info(chip, "%u:%d: All rates were zero\n", fp->iface, fp->altsetting); return -EINVAL; } set_rate_table_min_max(fp); } else { /* continuous rates */ fp->rates = SNDRV_PCM_RATE_CONTINUOUS; fp->rate_min = combine_triple(&fmt[offset + 1]); fp->rate_max = combine_triple(&fmt[offset + 4]); } /* Jabra Evolve 65 headset */ if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); } return 0; } /* * Presonus Studio 1810c supports a limited set of sampling * rates per altsetting but reports the full set each time. * If we don't filter out the unsupported rates and attempt * to configure the card, it will hang refusing to do any * further audio I/O until a hard reset is performed. * * The list of supported rates per altsetting (set of available * I/O channels) is described in the owner's manual, section 2.2. */ static bool s1810c_valid_sample_rate(struct audioformat *fp, unsigned int rate) { switch (fp->altsetting) { case 1: /* All ADAT ports available */ return rate <= 48000; case 2: /* Half of ADAT ports available */ return (rate == 88200 || rate == 96000); case 3: /* Analog I/O only (no S/PDIF nor ADAT) */ return rate >= 176400; default: return false; } return false; } /* * Many Focusrite devices supports a limited set of sampling rates per * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type * descriptor which has a non-standard bLength = 10. */ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { struct usb_interface *iface; struct usb_host_interface *alts; unsigned char *fmt; unsigned int max_rate; iface = usb_ifnum_to_if(chip->dev, fp->iface); if (!iface) return true; alts = &iface->altsetting[fp->altset_idx]; fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) return true; if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); /* Validate max rate */ if (max_rate != 48000 && max_rate != 96000 && max_rate != 192000 && max_rate != 384000) { usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); return true; } return rate <= max_rate; } return true; } /* * Helper function to walk the array of sample rate triplets reported by * the device. The problem is that we need to parse whole array first to * get to know how many sample rates we have to expect. * Then fp->rate_table can be allocated and filled. */ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, struct audioformat *fp, int nr_triplets, const unsigned char *data) { int i, nr_rates = 0; for (i = 0; i < nr_triplets; i++) { int min = combine_quad(&data[2 + 12 * i]); int max = combine_quad(&data[6 + 12 * i]); int res = combine_quad(&data[10 + 12 * i]); unsigned int rate; if ((max < 0) || (min < 0) || (res < 0) || (max < min)) continue; /* * for ranges with res == 1, we announce a continuous sample * rate range, and this function should return 0 for no further * parsing. */ if (res == 1) { fp->rate_min = min; fp->rate_max = max; fp->rates = SNDRV_PCM_RATE_CONTINUOUS; return 0; } for (rate = min; rate <= max; rate += res) { /* Filter out invalid rates on Presonus Studio 1810c */ if (chip->usb_id == USB_ID(0x194f, 0x010c) && !s1810c_valid_sample_rate(fp, rate)) goto skip_rate; /* Filter out invalid rates on Presonus Studio 1824c */ if (chip->usb_id == USB_ID(0x194f, 0x010d) && !s1810c_valid_sample_rate(fp, rate)) goto skip_rate; /* Filter out invalid rates on Focusrite devices */ if (USB_ID_VENDOR(chip->usb_id) == 0x1235 && !focusrite_valid_sample_rate(chip, fp, rate)) goto skip_rate; if (fp->rate_table) fp->rate_table[nr_rates] = rate; nr_rates++; if (nr_rates >= MAX_NR_RATES) { usb_audio_err(chip, "invalid uac2 rates\n"); break; } skip_rate: /* avoid endless loop */ if (res == 0) break; } } return nr_rates; } /* Line6 Helix series and the Rode Rodecaster Pro don't support the * UAC2_CS_RANGE usb function call. Return a static table of known * clock rates. */ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, struct audioformat *fp) { switch (chip->usb_id) { case USB_ID(0x0e41, 0x4241): /* Line6 Helix */ case USB_ID(0x0e41, 0x4242): /* Line6 Helix Rack */ case USB_ID(0x0e41, 0x4244): /* Line6 Helix LT */ case USB_ID(0x0e41, 0x4246): /* Line6 HX-Stomp */ case USB_ID(0x0e41, 0x4253): /* Line6 HX-Stomp XL */ case USB_ID(0x0e41, 0x4247): /* Line6 Pod Go */ case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */ case USB_ID(0x0e41, 0x424b): /* Line6 Pod Go */ case USB_ID(0x19f7, 0x0011): /* Rode Rodecaster Pro */ return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); } return -ENODEV; } /* check whether the given altsetting is supported for the already set rate */ static bool check_valid_altsetting_v2v3(struct snd_usb_audio *chip, int iface, int altsetting) { struct usb_device *dev = chip->dev; __le64 raw_data = 0; u64 data; int err; /* we assume 64bit is enough for any altsettings */ if (snd_BUG_ON(altsetting >= 64 - 8)) return false; err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_AS_VAL_ALT_SETTINGS << 8, iface, &raw_data, sizeof(raw_data)); if (err < 0) return false; data = le64_to_cpu(raw_data); /* first byte contains the bitmap size */ if ((data & 0xff) * 8 < altsetting) return false; if (data & (1ULL << (altsetting + 8))) return true; return false; } /* * Validate each sample rate with the altsetting * Rebuild the rate table if only partial values are valid */ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, struct audioformat *fp, int clock) { struct usb_device *dev = chip->dev; struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the * known devices. */ if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); if (!alts) return 0; if (fp->protocol == UAC_VERSION_3) { struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); bmControls = le32_to_cpu(as->bmControls); } else { struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); bmControls = as->bmControls; } if (!uac_v2v3_control_is_readable(bmControls, UAC2_AS_VAL_ALT_SETTINGS)) return 0; table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; /* clear the interface altsetting at first */ usb_set_interface(dev, fp->iface, 0); nr_rates = 0; for (i = 0; i < fp->nr_rates; i++) { err = snd_usb_set_sample_rate_v2v3(chip, fp, clock, fp->rate_table[i]); if (err < 0) continue; if (check_valid_altsetting_v2v3(chip, fp->iface, fp->altsetting)) table[nr_rates++] = fp->rate_table[i]; } if (!nr_rates) { usb_audio_dbg(chip, "No valid sample rate available for %d:%d, assuming a firmware bug\n", fp->iface, fp->altsetting); nr_rates = fp->nr_rates; /* continue as is */ } if (fp->nr_rates == nr_rates) { kfree(table); return 0; } kfree(fp->rate_table); fp->rate_table = table; fp->nr_rates = nr_rates; return 0; } /* * parse the format descriptor and stores the possible sample rates * on the audioformat table (audio class v2 and v3). */ static int parse_audio_format_rates_v2v3(struct snd_usb_audio *chip, struct audioformat *fp) { struct usb_device *dev = chip->dev; unsigned char tmp[2], *data; int nr_triplets, data_size, ret = 0, ret_l6; int clock = snd_usb_clock_find_source(chip, fp, false); struct usb_host_interface *ctrl_intf; ctrl_intf = snd_usb_find_ctrl_interface(chip, fp->iface); if (clock < 0) { dev_err(&dev->dev, "%s(): unable to find clock source (clock %d)\n", __func__, clock); goto err; } /* get the number of sample rates first by only fetching 2 bytes */ ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_RANGE, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_CS_CONTROL_SAM_FREQ << 8, snd_usb_ctrl_intf(ctrl_intf) | (clock << 8), tmp, sizeof(tmp)); if (ret < 0) { /* line6 helix devices don't support UAC2_CS_CONTROL_SAM_FREQ call */ ret_l6 = line6_parse_audio_format_rates_quirk(chip, fp); if (ret_l6 == -ENODEV) { /* no line6 device found continue showing the error */ dev_err(&dev->dev, "%s(): unable to retrieve number of sample rates (clock %d)\n", __func__, clock); goto err; } if (ret_l6 == 0) { dev_info(&dev->dev, "%s(): unable to retrieve number of sample rates: set it to a predefined value (clock %d).\n", __func__, clock); return 0; } ret = ret_l6; goto err; } nr_triplets = (tmp[1] << 8) | tmp[0]; data_size = 2 + 12 * nr_triplets; data = kzalloc(data_size, GFP_KERNEL); if (!data) { ret = -ENOMEM; goto err; } /* now get the full information */ ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_RANGE, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_CS_CONTROL_SAM_FREQ << 8, snd_usb_ctrl_intf(ctrl_intf) | (clock << 8), data, data_size); if (ret < 0) { dev_err(&dev->dev, "%s(): unable to retrieve sample rate range (clock %d)\n", __func__, clock); ret = -EINVAL; goto err_free; } /* Call the triplet parser, and make sure fp->rate_table is NULL. * We just use the return value to know how many sample rates we * will have to deal with. */ kfree(fp->rate_table); fp->rate_table = NULL; fp->nr_rates = parse_uac2_sample_rate_range(chip, fp, nr_triplets, data); if (fp->nr_rates == 0) { /* SNDRV_PCM_RATE_CONTINUOUS */ ret = 0; goto err_free; } fp->rate_table = kmalloc_array(fp->nr_rates, sizeof(int), GFP_KERNEL); if (!fp->rate_table) { ret = -ENOMEM; goto err_free; } /* Call the triplet parser again, but this time, fp->rate_table is * allocated, so the rates will be stored */ parse_uac2_sample_rate_range(chip, fp, nr_triplets, data); ret = validate_sample_rate_table_v2v3(chip, fp, clock); if (ret < 0) goto err_free; set_rate_table_min_max(fp); err_free: kfree(data); err: return ret; } /* * parse the format type I and III descriptors */ static int parse_audio_format_i(struct snd_usb_audio *chip, struct audioformat *fp, u64 format, void *_fmt) { snd_pcm_format_t pcm_format; unsigned int fmt_type; int ret; switch (fp->protocol) { default: case UAC_VERSION_1: case UAC_VERSION_2: { struct uac_format_type_i_continuous_descriptor *fmt = _fmt; fmt_type = fmt->bFormatType; break; } case UAC_VERSION_3: { /* fp->fmt_type is already set in this case */ fmt_type = fp->fmt_type; break; } } if (fmt_type == UAC_FORMAT_TYPE_III) { /* FIXME: the format type is really IECxxx * but we give normal PCM format to get the existing * apps working... */ switch (chip->usb_id) { case USB_ID(0x0763, 0x2003): /* M-Audio Audiophile USB */ if (chip->setup == 0x00 && fp->altsetting == 6) pcm_format = SNDRV_PCM_FORMAT_S16_BE; else pcm_format = SNDRV_PCM_FORMAT_S16_LE; break; default: pcm_format = SNDRV_PCM_FORMAT_S16_LE; } fp->formats = pcm_format_to_bits(pcm_format); } else { fp->formats = parse_audio_format_i_type(chip, fp, format, _fmt); if (!fp->formats) return -EINVAL; } /* gather possible sample rates */ /* audio class v1 reports possible sample rates as part of the * proprietary class specific descriptor. * audio class v2 uses class specific EP0 range requests for that. */ switch (fp->protocol) { default: case UAC_VERSION_1: { struct uac_format_type_i_continuous_descriptor *fmt = _fmt; fp->channels = fmt->bNrChannels; ret = parse_audio_format_rates_v1(chip, fp, (unsigned char *) fmt, 7); break; } case UAC_VERSION_2: case UAC_VERSION_3: { /* fp->channels is already set in this case */ ret = parse_audio_format_rates_v2v3(chip, fp); break; } } if (fp->channels < 1) { usb_audio_err(chip, "%u:%d : invalid channels %d\n", fp->iface, fp->altsetting, fp->channels); return -EINVAL; } return ret; } /* * parse the format type II descriptor */ static int parse_audio_format_ii(struct snd_usb_audio *chip, struct audioformat *fp, u64 format, void *_fmt) { int brate, framesize, ret; switch (format) { case UAC_FORMAT_TYPE_II_AC3: /* FIXME: there is no AC3 format defined yet */ // fp->formats = SNDRV_PCM_FMTBIT_AC3; fp->formats = SNDRV_PCM_FMTBIT_U8; /* temporary hack to receive byte streams */ break; case UAC_FORMAT_TYPE_II_MPEG: fp->formats = SNDRV_PCM_FMTBIT_MPEG; break; default: usb_audio_info(chip, "%u:%d : unknown format tag %#llx is detected. processed as MPEG.\n", fp->iface, fp->altsetting, format); fp->formats = SNDRV_PCM_FMTBIT_MPEG; break; } fp->channels = 1; switch (fp->protocol) { default: case UAC_VERSION_1: { struct uac_format_type_ii_discrete_descriptor *fmt = _fmt; brate = le16_to_cpu(fmt->wMaxBitRate); framesize = le16_to_cpu(fmt->wSamplesPerFrame); usb_audio_info(chip, "found format II with max.bitrate = %d, frame size=%d\n", brate, framesize); fp->frame_size = framesize; ret = parse_audio_format_rates_v1(chip, fp, _fmt, 8); /* fmt[8..] sample rates */ break; } case UAC_VERSION_2: { struct uac_format_type_ii_ext_descriptor *fmt = _fmt; brate = le16_to_cpu(fmt->wMaxBitRate); framesize = le16_to_cpu(fmt->wSamplesPerFrame); usb_audio_info(chip, "found format II with max.bitrate = %d, frame size=%d\n", brate, framesize); fp->frame_size = framesize; ret = parse_audio_format_rates_v2v3(chip, fp); break; } } return ret; } int snd_usb_parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp, u64 format, struct uac_format_type_i_continuous_descriptor *fmt, int stream) { int err; switch (fmt->bFormatType) { case UAC_FORMAT_TYPE_I: case UAC_FORMAT_TYPE_III: err = parse_audio_format_i(chip, fp, format, fmt); break; case UAC_FORMAT_TYPE_II: err = parse_audio_format_ii(chip, fp, format, fmt); break; default: usb_audio_info(chip, "%u:%d : format type %d is not supported yet\n", fp->iface, fp->altsetting, fmt->bFormatType); return -ENOTSUPP; } fp->fmt_type = fmt->bFormatType; if (err < 0) return err; #if 1 /* FIXME: temporary hack for extigy/audigy 2 nx/zs */ /* extigy apparently supports sample rates other than 48k * but not in ordinary way. so we enable only 48k atm. */ if (chip->usb_id == USB_ID(0x041e, 0x3000) || chip->usb_id == USB_ID(0x041e, 0x3020) || chip->usb_id == USB_ID(0x041e, 0x3061)) { if (fmt->bFormatType == UAC_FORMAT_TYPE_I && fp->rates != SNDRV_PCM_RATE_48000 && fp->rates != SNDRV_PCM_RATE_96000) return -ENOTSUPP; } #endif return 0; } int snd_usb_parse_audio_format_v3(struct snd_usb_audio *chip, struct audioformat *fp, struct uac3_as_header_descriptor *as, int stream) { u64 format = le64_to_cpu(as->bmFormats); int err; /* * Type I format bits are D0..D6 * This test works because type IV is not supported */ if (format & 0x7f) fp->fmt_type = UAC_FORMAT_TYPE_I; else fp->fmt_type = UAC_FORMAT_TYPE_III; err = parse_audio_format_i(chip, fp, format, as); if (err < 0) return err; return 0; }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 /* IPv6-specific defines for netfilter. * (C)1998 Rusty Russell -- This code is GPL. * (C)1999 David Jeffery * this header was blatantly ripped from netfilter_ipv4.h * it's amazing what adding a bunch of 6s can do =8^) */ #ifndef __LINUX_IP6_NETFILTER_H #define __LINUX_IP6_NETFILTER_H #include <uapi/linux/netfilter_ipv6.h> #include <net/tcp.h> /* Check for an extension */ static inline int nf_ip6_ext_hdr(u8 nexthdr) { return (nexthdr == IPPROTO_HOPOPTS) || (nexthdr == IPPROTO_ROUTING) || (nexthdr == IPPROTO_FRAGMENT) || (nexthdr == IPPROTO_ESP) || (nexthdr == IPPROTO_AH) || (nexthdr == IPPROTO_NONE) || (nexthdr == IPPROTO_DSTOPTS); } /* Extra routing may needed on local out, as the QUEUE target never returns * control to the table. */ struct ip6_rt_info { struct in6_addr daddr; struct in6_addr saddr; u_int32_t mark; }; struct nf_queue_entry; struct nf_bridge_frag_data; /* * Hook functions for ipv6 to allow xt_* modules to be built-in even * if IPv6 is a module. */ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); #if IS_MODULE(CONFIG_IPV6) int (*br_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)); #endif }; #ifdef CONFIG_NETFILTER #include <net/addrconf.h> extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return rcu_dereference(nf_ipv6_ops); } static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return 1; return v6_ops->chk_addr(net, addr, dev, strict); #elif IS_BUILTIN(CONFIG_IPV6) return ipv6_chk_addr(net, addr, dev, strict); #else return 1; #endif } int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); if (v6ops) return v6ops->route(net, dst, fl, strict); return -EHOSTUNREACH; #endif #if IS_BUILTIN(CONFIG_IPV6) return __nf_ip6_route(net, dst, fl, strict); #else return -EHOSTUNREACH; #endif } #include <net/netfilter/ipv6/nf_defrag_ipv6.h> int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)); static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return 1; return v6_ops->br_fragment(net, sk, skb, data, output); #elif IS_BUILTIN(CONFIG_IPV6) return br_ip6_fragment(net, sk, skb, data, output); #else return 1; #endif } int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return -EHOSTUNREACH; return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif } static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_init_sequence(iph, th, mssp); #elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_init_sequence(iph, th, mssp); #endif #endif return 0; } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_check(iph, th); #endif #endif return 0; } __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); int ipv6_netfilter_init(void); void ipv6_netfilter_fini(void); #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return NULL; } #endif /* CONFIG_NETFILTER */ #endif /*__LINUX_IP6_NETFILTER_H*/
1338 38 1328 186 187 186 3 186 186 186 186 193 192 1 193 193 44 193 186 44 2 37 186 184 34 4 31 13 1 31 8 31 34 186 186 1 228 1 223 4 159 76 1 195 64 31 31 12 12 186 186 186 9 176 186 185 2 167 20 222 44 131 60 2 47 15 193 20 20 9 181 180 1 181 181 181 181 1 180 18 18 10 18 3 168 173 180 180 11 176 165 11 1 2 9 181 181 1 180 176 177 236 15 222 45 2 2 3 2 8 176 103 103 106 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/exec.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * #!-checking implemented by tytso. */ /* * Demand-loading implemented 01.12.91 - no need to read anything but * the header into memory. The inode of the executable is put into * "current->executable", and page faults do the actual loading. Clean. * * Once more I can proudly say that linux stood up to being changed: it * was less than 2 hours work to get demand-loading completely implemented. * * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary * formats. */ #include <linux/kernel_read_file.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/mm.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/signal.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/pagemap.h> #include <linux/perf_event.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/utsname.h> #include <linux/pid_namespace.h> #include <linux/module.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> #include <linux/kmod.h> #include <linux/fsnotify.h> #include <linux/fs_struct.h> #include <linux/oom.h> #include <linux/compat.h> #include <linux/vmalloc.h> #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> #include <linux/coredump.h> #include <linux/time_namespace.h> #include <linux/user_events.h> #include <linux/rseq.h> #include <linux/ksm.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/tlb.h> #include <trace/events/task.h> #include "internal.h" #include <trace/events/sched.h> static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); void __register_binfmt(struct linux_binfmt * fmt, int insert) { write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); } EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { write_lock(&binfmt_lock); list_del(&fmt->lh); write_unlock(&binfmt_lock); } EXPORT_SYMBOL(unregister_binfmt); static inline void put_binfmt(struct linux_binfmt * fmt) { module_put(fmt->module); } bool path_noexec(const struct path *path) { return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } #ifdef CONFIG_USELIB /* * Note that a shared library must be both readable and executable due to * security reasons. * * Also note that we take the address to load from the file itself. */ SYSCALL_DEFINE1(uselib, const char __user *, library) { struct linux_binfmt *fmt; struct file *file; struct filename *tmp = getname(library); int error = PTR_ERR(tmp); static const struct open_flags uselib_flags = { .open_flag = O_LARGEFILE | O_RDONLY, .acc_mode = MAY_READ | MAY_EXEC, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, }; if (IS_ERR(tmp)) goto out; file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); putname(tmp); error = PTR_ERR(file); if (IS_ERR(file)) goto out; /* * Check do_open_execat() for an explanation. */ error = -EACCES; if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || path_noexec(&file->f_path)) goto exit; error = -ENOEXEC; read_lock(&binfmt_lock); list_for_each_entry(fmt, &formats, lh) { if (!fmt->load_shlib) continue; if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); error = fmt->load_shlib(file); read_lock(&binfmt_lock); put_binfmt(fmt); if (error != -ENOEXEC) break; } read_unlock(&binfmt_lock); exit: fput(file); out: return error; } #endif /* #ifdef CONFIG_USELIB */ #ifdef CONFIG_MMU /* * The nascent bprm->mm is not visible until exec_mmap() but it can * use a lot of memory, account these pages in current->mm temporary * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we * change the counter back via acct_arg_size(0). */ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; long diff = (long)(pages - bprm->vma_pages); if (!mm || !diff) return; bprm->vma_pages = pages; add_mm_counter(mm, MM_ANONPAGES, diff); } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; struct vm_area_struct *vma = bprm->vma; struct mm_struct *mm = bprm->mm; int ret; /* * Avoid relying on expanding the stack down in GUP (which * does not work for STACK_GROWSUP anyway), and just do it * ahead of time. */ if (!mmap_read_lock_maybe_expand(mm, vma, pos, write)) return NULL; /* * We are doing an exec(). 'current' is the process * doing the exec and 'mm' is the new process's mm. */ ret = get_user_pages_remote(mm, pos, 1, write ? FOLL_WRITE : 0, &page, NULL); mmap_read_unlock(mm); if (ret <= 0) return NULL; if (write) acct_arg_size(bprm, vma_pages(vma)); return page; } static void put_arg_page(struct page *page) { put_page(page); } static void free_arg_pages(struct linux_binprm *bprm) { } static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, struct page *page) { flush_cache_page(bprm->vma, pos, page_to_pfn(page)); } static int __bprm_mm_init(struct linux_binprm *bprm) { int err; struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; vma_set_anonymous(vma); if (mmap_write_lock_killable(mm)) { err = -EINTR; goto err_free; } /* * Need to be called with mmap write lock * held, to avoid race with ksmd. */ err = ksm_execve(mm); if (err) goto err_ksm; /* * Place the stack at the largest stack address the architecture * supports. Later, we'll move this to an appropriate place. We don't * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); if (err) goto err; mm->stack_vm = mm->total_vm = 1; mmap_write_unlock(mm); bprm->p = vma->vm_end - sizeof(void *); return 0; err: ksm_exit(mm); err_ksm: mmap_write_unlock(mm); err_free: bprm->vma = NULL; vm_area_free(vma); return err; } static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= MAX_ARG_STRLEN; } #else static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; page = bprm->page[pos / PAGE_SIZE]; if (!page && write) { page = alloc_page(GFP_HIGHUSER|__GFP_ZERO); if (!page) return NULL; bprm->page[pos / PAGE_SIZE] = page; } return page; } static void put_arg_page(struct page *page) { } static void free_arg_page(struct linux_binprm *bprm, int i) { if (bprm->page[i]) { __free_page(bprm->page[i]); bprm->page[i] = NULL; } } static void free_arg_pages(struct linux_binprm *bprm) { int i; for (i = 0; i < MAX_ARG_PAGES; i++) free_arg_page(bprm, i); } static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, struct page *page) { } static int __bprm_mm_init(struct linux_binprm *bprm) { bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); return 0; } static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= bprm->p; } #endif /* CONFIG_MMU */ /* * Create a new mm_struct and populate it with a temporary stack * vm_area_struct. We don't have enough context at this point to set the stack * flags, permissions, and offset, so we use temporary values. We'll update * them later in setup_arg_pages(). */ static int bprm_mm_init(struct linux_binprm *bprm) { int err; struct mm_struct *mm = NULL; bprm->mm = mm = mm_alloc(); err = -ENOMEM; if (!mm) goto err; /* Save current stack limit for all calculations made during exec. */ task_lock(current->group_leader); bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; task_unlock(current->group_leader); err = __bprm_mm_init(bprm); if (err) goto err; return 0; err: if (mm) { bprm->mm = NULL; mmdrop(mm); } return err; } struct user_arg_ptr { #ifdef CONFIG_COMPAT bool is_compat; #endif union { const char __user *const __user *native; #ifdef CONFIG_COMPAT const compat_uptr_t __user *compat; #endif } ptr; }; static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) { const char __user *native; #ifdef CONFIG_COMPAT if (unlikely(argv.is_compat)) { compat_uptr_t compat; if (get_user(compat, argv.ptr.compat + nr)) return ERR_PTR(-EFAULT); return compat_ptr(compat); } #endif if (get_user(native, argv.ptr.native + nr)) return ERR_PTR(-EFAULT); return native; } /* * count() counts the number of strings in array ARGV. */ static int count(struct user_arg_ptr argv, int max) { int i = 0; if (argv.ptr.native != NULL) { for (;;) { const char __user *p = get_user_arg_ptr(argv, i); if (!p) break; if (IS_ERR(p)) return -EFAULT; if (i >= max) return -E2BIG; ++i; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } } return i; } static int count_strings_kernel(const char *const *argv) { int i; if (!argv) return 0; for (i = 0; argv[i]; ++i) { if (i >= MAX_ARG_STRINGS) return -E2BIG; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } return i; } static inline int bprm_set_stack_limit(struct linux_binprm *bprm, unsigned long limit) { #ifdef CONFIG_MMU /* Avoid a pathological bprm->p. */ if (bprm->p < limit) return -E2BIG; bprm->argmin = bprm->p - limit; #endif return 0; } static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm) { #ifdef CONFIG_MMU return bprm->p < bprm->argmin; #else return false; #endif } /* * Calculate bprm->argmin from: * - _STK_LIM * - ARG_MAX * - bprm->rlim_stack.rlim_cur * - bprm->argc * - bprm->envc * - bprm->p */ static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; /* * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM * (whichever is smaller) for the argv+env strings. * This ensures that: * - the remaining binfmt code will not run out of stack space, * - the program will have a reasonable amount of stack left * to work from. */ limit = _STK_LIM / 4 * 3; limit = min(limit, bprm->rlim_stack.rlim_cur / 4); /* * We've historically supported up to 32 pages (ARG_MAX) * of argument strings even with small stacks */ limit = max_t(unsigned long, limit, ARG_MAX); /* Reject totally pathological counts. */ if (bprm->argc < 0 || bprm->envc < 0) return -E2BIG; /* * We must account for the size of all the argv and envp pointers to * the argv and envp strings, since they will also take up space in * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. * * In the case of argc = 0, make sure there is space for adding a * empty string (which will bump argc to 1), to ensure confused * userspace programs don't start processing from argv[1], thinking * argc can never be 0, to keep them from walking envp by accident. * See do_execveat_common(). */ if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) || check_mul_overflow(ptr_size, sizeof(void *), &ptr_size)) return -E2BIG; if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; return bprm_set_stack_limit(bprm, limit); } /* * 'copy_strings()' copies argument/environment strings from the old * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ static int copy_strings(int argc, struct user_arg_ptr argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; char *kaddr = NULL; unsigned long kpos = 0; int ret; while (argc-- > 0) { const char __user *str; int len; unsigned long pos; ret = -EFAULT; str = get_user_arg_ptr(argv, argc); if (IS_ERR(str)) goto out; len = strnlen_user(str, MAX_ARG_STRLEN); if (!len) goto out; ret = -E2BIG; if (!valid_arg_len(bprm, len)) goto out; /* We're going to work our way backwards. */ pos = bprm->p; str += len; bprm->p -= len; if (bprm_hit_stack_limit(bprm)) goto out; while (len > 0) { int offset, bytes_to_copy; if (fatal_signal_pending(current)) { ret = -ERESTARTNOHAND; goto out; } cond_resched(); offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; bytes_to_copy = offset; if (bytes_to_copy > len) bytes_to_copy = len; offset -= bytes_to_copy; pos -= bytes_to_copy; str -= bytes_to_copy; len -= bytes_to_copy; if (!kmapped_page || kpos != (pos & PAGE_MASK)) { struct page *page; page = get_arg_page(bprm, pos, 1); if (!page) { ret = -E2BIG; goto out; } if (kmapped_page) { flush_dcache_page(kmapped_page); kunmap_local(kaddr); put_arg_page(kmapped_page); } kmapped_page = page; kaddr = kmap_local_page(kmapped_page); kpos = pos & PAGE_MASK; flush_arg_page(bprm, kpos, kmapped_page); } if (copy_from_user(kaddr+offset, str, bytes_to_copy)) { ret = -EFAULT; goto out; } } } ret = 0; out: if (kmapped_page) { flush_dcache_page(kmapped_page); kunmap_local(kaddr); put_arg_page(kmapped_page); } return ret; } /* * Copy and argument/environment string from the kernel to the processes stack. */ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) { int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */; unsigned long pos = bprm->p; if (len == 0) return -EFAULT; if (!valid_arg_len(bprm, len)) return -E2BIG; /* We're going to work our way backwards. */ arg += len; bprm->p -= len; if (bprm_hit_stack_limit(bprm)) return -E2BIG; while (len > 0) { unsigned int bytes_to_copy = min_t(unsigned int, len, min_not_zero(offset_in_page(pos), PAGE_SIZE)); struct page *page; pos -= bytes_to_copy; arg -= bytes_to_copy; len -= bytes_to_copy; page = get_arg_page(bprm, pos, 1); if (!page) return -E2BIG; flush_arg_page(bprm, pos & PAGE_MASK, page); memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy); put_arg_page(page); } return 0; } EXPORT_SYMBOL(copy_string_kernel); static int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm) { while (argc-- > 0) { int ret = copy_string_kernel(argv[argc], bprm); if (ret < 0) return ret; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } return 0; } #ifdef CONFIG_MMU /* * Finalizes the stack vm_area_struct. The flags and permissions are updated, * the stack is optionally relocated, and some extra space is added. */ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) { unsigned long ret; unsigned long stack_shift; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = bprm->vma; struct vm_area_struct *prev = NULL; unsigned long vm_flags; unsigned long stack_base; unsigned long stack_size; unsigned long stack_expand; unsigned long rlim_stack; struct mmu_gather tlb; struct vma_iterator vmi; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size */ stack_base = bprm->rlim_stack.rlim_max; stack_base = calc_max_stack_size(stack_base); /* Add space for stack randomization. */ if (current->flags & PF_RANDOMIZE) stack_base += (STACK_RND_MASK << PAGE_SHIFT); /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) return -ENOMEM; stack_base = PAGE_ALIGN(stack_top - stack_base); stack_shift = vma->vm_start - stack_base; mm->arg_start = bprm->p - stack_shift; bprm->p = vma->vm_end - stack_shift; #else stack_top = arch_align_stack(stack_top); stack_top = PAGE_ALIGN(stack_top); if (unlikely(stack_top < mmap_min_addr) || unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) return -ENOMEM; stack_shift = vma->vm_end - stack_top; bprm->p -= stack_shift; mm->arg_start = bprm->p; #endif bprm->exec -= stack_shift; if (mmap_write_lock_killable(mm)) return -EINTR; vm_flags = VM_STACK_FLAGS; /* * Adjust stack execute permissions; explicitly enable for * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone * (arch default) otherwise. */ if (unlikely(executable_stack == EXSTACK_ENABLE_X)) vm_flags |= VM_EXEC; else if (executable_stack == EXSTACK_DISABLE_X) vm_flags &= ~VM_EXEC; vm_flags |= mm->def_flags; vm_flags |= VM_STACK_INCOMPLETE_SETUP; vma_iter_init(&vmi, mm, vma->vm_start); tlb_gather_mmu(&tlb, mm); ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end, vm_flags); tlb_finish_mmu(&tlb); if (ret) goto out_unlock; BUG_ON(prev != vma); if (unlikely(vm_flags & VM_EXEC)) { pr_warn_once("process '%pD4' started with executable stack\n", bprm->file); } /* Move stack pages down in memory. */ if (stack_shift) { /* * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once * the binfmt code determines where the new stack should reside, we shift it to * its final location. */ ret = relocate_vma_down(vma, stack_shift); if (ret) goto out_unlock; } /* mprotect_fixup is overkill to remove the temporary stack flags */ vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP); stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ stack_size = vma->vm_end - vma->vm_start; /* * Align this down to a page boundary as expand_stack * will align it up. */ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; stack_expand = min(rlim_stack, stack_size + stack_expand); #ifdef CONFIG_STACK_GROWSUP stack_base = vma->vm_start + stack_expand; #else stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; ret = expand_stack_locked(vma, stack_base); if (ret) ret = -EFAULT; out_unlock: mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL(setup_arg_pages); #else /* * Transfer the program arguments and environment from the holding pages * onto the stack. The provided stack pointer is adjusted accordingly. */ int transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *sp_location) { unsigned long index, stop, sp; int ret = 0; stop = bprm->p >> PAGE_SHIFT; sp = *sp_location; for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; char *src = kmap_local_page(bprm->page[index]) + offset; sp -= PAGE_SIZE - offset; if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) ret = -EFAULT; kunmap_local(src); if (ret) goto out; } bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: return ret; } EXPORT_SYMBOL(transfer_args_to_stack); #endif /* CONFIG_MMU */ /* * On success, caller must call do_close_execat() on the returned * struct file to close it. */ static struct file *do_open_execat(int fd, struct filename *name, int flags) { int err; struct file *file __free(fput) = NULL; struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, }; if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0) return ERR_PTR(-EINVAL); if (flags & AT_SYMLINK_NOFOLLOW) open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; if (flags & AT_EMPTY_PATH) open_exec_flags.lookup_flags |= LOOKUP_EMPTY; file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) return file; /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || path_noexec(&file->f_path)) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); if (err) return ERR_PTR(err); return no_free_ptr(file); } /** * open_exec - Open a path name for execution * * @name: path name to open with the intent of executing it. * * Returns ERR_PTR on failure or allocated struct file on success. * * As this is a wrapper for the internal do_open_execat(), callers * must call exe_file_allow_write_access() before fput() on release. Also see * do_close_execat(). */ struct file *open_exec(const char *name) { struct filename *filename = getname_kernel(name); struct file *f = ERR_CAST(filename); if (!IS_ERR(filename)) { f = do_open_execat(AT_FDCWD, filename, 0); putname(filename); } return f; } EXPORT_SYMBOL(open_exec); #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC) ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) { ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); if (res > 0) flush_icache_user_range(addr, addr + len); return res; } EXPORT_SYMBOL(read_code); #endif /* * Maps the mm_struct mm into the current task struct. * On success, this function returns with exec_update_lock * held for writing. */ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); ret = down_write_killable(&tsk->signal->exec_update_lock); if (ret) return ret; if (old_mm) { /* * If there is a pending fatal signal perhaps a signal * whose default action is to create a coredump get * out and die instead of going through with the exec. */ ret = mmap_read_lock_killable(old_mm); if (ret) { up_write(&tsk->signal->exec_update_lock); return ret; } } task_lock(tsk); membarrier_exec_mmap(mm); local_irq_disable(); active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; mm_init_cid(mm, tsk); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for * lazy tlb mm refcounting when these are updated by context * switches. Not all architectures can handle irqs off over * activate_mm yet. */ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); lru_gen_add_mm(mm); task_unlock(tsk); lru_gen_use_mm(mm); if (old_mm) { mmap_read_unlock(old_mm); BUG_ON(active_mm != old_mm); setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); return 0; } mmdrop_lazy_tlb(active_mm); return 0; } static int de_thread(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct sighand_struct *oldsighand = tsk->sighand; spinlock_t *lock = &oldsighand->siglock; if (thread_group_empty(tsk)) goto no_thread_group; /* * Kill all other threads in the thread group. */ spin_lock_irq(lock); if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) { /* * Another group action in progress, just * return so that the signal is processed. */ spin_unlock_irq(lock); return -EAGAIN; } sig->group_exec_task = tsk; sig->notify_count = zap_other_threads(tsk); if (!thread_group_leader(tsk)) sig->notify_count--; while (sig->notify_count) { __set_current_state(TASK_KILLABLE); spin_unlock_irq(lock); schedule(); if (__fatal_signal_pending(tsk)) goto killed; spin_lock_irq(lock); } spin_unlock_irq(lock); /* * At this point all other threads have exited, all we have to * do is to wait for the thread group leader to become inactive, * and to assume its PID: */ if (!thread_group_leader(tsk)) { struct task_struct *leader = tsk->group_leader; for (;;) { cgroup_threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); /* * Do this under tasklist_lock to ensure that * exit_notify() can't miss ->group_exec_task */ sig->notify_count = -1; if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); schedule(); if (__fatal_signal_pending(tsk)) goto killed; } /* * The only record we have of the real-time age of a * process, regardless of execs it's done, is start_time. * All the past CPU time is accumulated in signal_struct * from sister threads now dead. But in this non-leader * exec, nothing survives from the original leader thread, * whose birth marks the true age of this process now. * When we take on its identity by switching to its PID, we * also take its birthdate (always earlier than our own). */ tsk->start_time = leader->start_time; tsk->start_boottime = leader->start_boottime; BUG_ON(!same_thread_group(leader, tsk)); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the * two threads with a switched PID, and release * the former thread group leader: */ /* Become a process group leader with the old leader's pid. * The old leader becomes a thread of the this thread group. */ exchange_tids(tsk, leader); transfer_pid(leader, tsk, PIDTYPE_TGID); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); list_replace_rcu(&leader->tasks, &tsk->tasks); list_replace_init(&leader->sibling, &tsk->sibling); tsk->group_leader = tsk; leader->group_leader = tsk; tsk->exit_signal = SIGCHLD; leader->exit_signal = -1; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; /* * We are going to release_task()->ptrace_unlink() silently, * the tracer can sleep in do_wait(). EXIT_DEAD guarantees * the tracer won't block again waiting for this thread. */ if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); release_task(leader); } sig->group_exec_task = NULL; sig->notify_count = 0; no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; BUG_ON(!thread_group_leader(tsk)); return 0; killed: /* protects against exit_notify() and __exit_signal() */ read_lock(&tasklist_lock); sig->group_exec_task = NULL; sig->notify_count = 0; read_unlock(&tasklist_lock); return -EAGAIN; } /* * This function makes sure the current process has its own signal table, * so that flush_signal_handlers can later reset the handlers without * disturbing other processes. (Other processes might share the signal * table via the CLONE_SIGHAND option to clone().) */ static int unshare_sighand(struct task_struct *me) { struct sighand_struct *oldsighand = me->sighand; if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; /* * This ->sighand is shared with the CLONE_SIGHAND * but not CLONE_THREAD task, switch to the new one. */ newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); if (!newsighand) return -ENOMEM; refcount_set(&newsighand->count, 1); write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); __cleanup_sighand(oldsighand); } return 0; } /* * This is unlocked -- the string will always be NUL-terminated, but * may show overlapping contents if racing concurrent reads. */ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { size_t len = min(strlen(buf), sizeof(tsk->comm) - 1); trace_task_rename(tsk, buf); memcpy(tsk->comm, buf, len); memset(&tsk->comm[len], 0, sizeof(tsk->comm) - len); perf_event_comm(tsk, exec); } /* * Calling this is the point of no return. None of the failures will be * seen by userspace since either the process is already taking a fatal * signal (via de_thread() or coredump), or will have SEGV raised * (after exec_mmap()) by search_binary_handler (see below). */ int begin_new_exec(struct linux_binprm * bprm) { struct task_struct *me = current; int retval; /* Once we are committed compute the creds */ retval = bprm_creds_from_file(bprm); if (retval) return retval; /* * This tracepoint marks the point before flushing the old exec where * the current task is still unchanged, but errors are fatal (point of * no return). The later "sched_process_exec" tracepoint is called after * the current task has successfully switched to the new exec. */ trace_sched_prepare_exec(current, bprm); /* * Ensure all future errors are fatal. */ bprm->point_of_no_return = true; /* Make this the only thread in the thread group */ retval = de_thread(me); if (retval) goto out; /* see the comment in check_unsafe_exec() */ current->fs->in_exec = 0; /* * Cancel any io_uring activity across execve */ io_uring_task_cancel(); /* Ensure the files table is not shared. */ retval = unshare_files(); if (retval) goto out; /* * Must be called _before_ exec_mmap() as bprm->mm is * not visible until then. Doing it here also ensures * we don't race against replace_mm_exe_file(). */ retval = set_mm_exe_file(bprm->mm, bprm->file); if (retval) goto out; /* If the binary is not readable then enforce mm->dumpable=0 */ would_dump(bprm, bprm->file); if (bprm->have_execfd) would_dump(bprm, bprm->executable); /* * Release all of the old mmap stuff */ acct_arg_size(bprm, 0); retval = exec_mmap(bprm->mm); if (retval) goto out; bprm->mm = NULL; retval = exec_task_namespaces(); if (retval) goto out_unlock; #ifdef CONFIG_POSIX_TIMERS spin_lock_irq(&me->sighand->siglock); posix_cpu_timers_exit(me); spin_unlock_irq(&me->sighand->siglock); exit_itimers(me); flush_itimer_signals(); #endif /* * Make the signal table private. */ retval = unshare_sighand(me); if (retval) goto out_unlock; me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); me->personality &= ~bprm->per_clear; clear_syscall_work_syscall_user_dispatch(me); /* * We have to apply CLOEXEC before we change whether the process is * dumpable (in setup_new_exec) to avoid a race with a process in userspace * trying to access the should-be-closed file descriptors of a process * undergoing exec(2). */ do_close_on_exec(me->files); if (bprm->secureexec) { /* Make sure parent cannot signal privileged process. */ me->pdeath_signal = 0; /* * For secureexec, reset the stack limit to sane default to * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid * needing to clean up the change on failure. */ if (bprm->rlim_stack.rlim_cur > _STK_LIM) bprm->rlim_stack.rlim_cur = _STK_LIM; } me->sas_ss_sp = me->sas_ss_size = 0; /* * Figure out dumpability. Note that this checking only of current * is wrong, but userspace depends on it. This should be testing * bprm->secureexec instead. */ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || !(uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))) set_dumpable(current->mm, suid_dumpable); else set_dumpable(current->mm, SUID_DUMP_USER); perf_event_exec(); /* * If the original filename was empty, alloc_bprm() made up a path * that will probably not be useful to admins running ps or similar. * Let's fix it up to be something reasonable. */ if (bprm->comm_from_dentry) { /* * Hold RCU lock to keep the name from being freed behind our back. * Use acquire semantics to make sure the terminating NUL from * __d_alloc() is seen. * * Note, we're deliberately sloppy here. We don't need to care about * detecting a concurrent rename and just want a terminated name. */ rcu_read_lock(); __set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name), true); rcu_read_unlock(); } else { __set_task_comm(me, kbasename(bprm->filename), true); } /* An exec changes our domain. We are no longer part of the thread group */ WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); flush_signal_handlers(me, 0); retval = set_cred_ucounts(bprm->cred); if (retval < 0) goto out_unlock; /* * install the new credentials for this executable */ security_bprm_committing_creds(bprm); commit_creds(bprm->cred); bprm->cred = NULL; /* * Disable monitoring for regular users * when executing setuid binaries. Must * wait until new credentials are committed * by commit_creds() above */ if (get_dumpable(me->mm) != SUID_DUMP_USER) perf_event_exit_task(me); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); /* Pass the opened binary to the interpreter. */ if (bprm->have_execfd) { retval = get_unused_fd_flags(0); if (retval < 0) goto out_unlock; fd_install(retval, bprm->executable); bprm->executable = NULL; bprm->execfd = retval; } return 0; out_unlock: up_write(&me->signal->exec_update_lock); if (!bprm->cred) mutex_unlock(&me->signal->cred_guard_mutex); out: return retval; } EXPORT_SYMBOL(begin_new_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); struct mnt_idmap *idmap = file_mnt_idmap(file); if (inode_permission(idmap, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ user_ns = old = bprm->mm->user_ns; while ((user_ns != &init_user_ns) && !privileged_wrt_inode_uidgid(user_ns, idmap, inode)) user_ns = user_ns->parent; if (old != user_ns) { bprm->mm->user_ns = get_user_ns(user_ns); put_user_ns(old); } } } EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { /* Setup things that can depend upon the personality */ struct task_struct *me = current; arch_pick_mmap_layout(me->mm, &bprm->rlim_stack); arch_setup_new_exec(); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc */ me->mm->task_size = TASK_SIZE; up_write(&me->signal->exec_update_lock); mutex_unlock(&me->signal->cred_guard_mutex); } EXPORT_SYMBOL(setup_new_exec); /* Runs immediately before start_thread() takes over. */ void finalize_exec(struct linux_binprm *bprm) { /* Store any stack rlimit changes before starting thread. */ task_lock(current->group_leader); current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack; task_unlock(current->group_leader); } EXPORT_SYMBOL(finalize_exec); /* * Prepare credentials and lock ->cred_guard_mutex. * setup_new_exec() commits the new creds and drops the lock. * Or, if exec fails before, free_bprm() should release ->cred * and unlock. */ static int prepare_bprm_creds(struct linux_binprm *bprm) { if (mutex_lock_interruptible(&current->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; mutex_unlock(&current->signal->cred_guard_mutex); return -ENOMEM; } /* Matches do_open_execat() */ static void do_close_execat(struct file *file) { if (!file) return; exe_file_allow_write_access(file); fput(file); } static void free_bprm(struct linux_binprm *bprm) { if (bprm->mm) { acct_arg_size(bprm, 0); mmput(bprm->mm); } free_arg_pages(bprm); if (bprm->cred) { /* in case exec fails before de_thread() succeeds */ current->fs->in_exec = 0; mutex_unlock(&current->signal->cred_guard_mutex); abort_creds(bprm->cred); } do_close_execat(bprm->file); if (bprm->executable) fput(bprm->executable); /* If a binfmt changed the interp, free it. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); kfree(bprm->fdpath); kfree(bprm); } static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags) { struct linux_binprm *bprm; struct file *file; int retval = -ENOMEM; file = do_open_execat(fd, filename, flags); if (IS_ERR(file)) return ERR_CAST(file); bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) { do_close_execat(file); return ERR_PTR(-ENOMEM); } bprm->file = file; if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); bprm->comm_from_dentry = 1; } else { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", fd, filename->name); } if (!bprm->fdpath) goto out_free; /* * Record that a name derived from an O_CLOEXEC fd will be * inaccessible after exec. This allows the code in exec to * choose to fail when the executable is not mmaped into the * interpreter and an open file descriptor is not passed to * the interpreter. This makes for a better user experience * than having the interpreter start and then immediately fail * when it finds the executable is inaccessible. */ if (get_close_on_exec(fd)) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; bprm->filename = bprm->fdpath; } bprm->interp = bprm->filename; /* * At this point, security_file_open() has already been called (with * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will * stop just after the security_bprm_creds_for_exec() call in * bprm_execve(). Indeed, the kernel should not try to parse the * content of the file with exec_binprm() nor change the calling * thread, which means that the following security functions will not * be called: * - security_bprm_check() * - security_bprm_creds_from_file() * - security_bprm_committing_creds() * - security_bprm_committed_creds() */ bprm->is_check = !!(flags & AT_EXECVE_CHECK); retval = bprm_mm_init(bprm); if (!retval) return bprm; out_free: free_bprm(bprm); return ERR_PTR(retval); } int bprm_change_interp(const char *interp, struct linux_binprm *bprm) { /* If a binfmt changed the interp, free it first. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); bprm->interp = kstrdup(interp, GFP_KERNEL); if (!bprm->interp) return -ENOMEM; return 0; } EXPORT_SYMBOL(bprm_change_interp); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH or seccomp thread-sync */ static void check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned n_fs; if (p->ptrace) bprm->unsafe |= LSM_UNSAFE_PTRACE; /* * This isn't strictly necessary, but it makes it harder for LSMs to * mess up. */ if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; /* * If another task is sharing our fs, we cannot safely * suid exec because the differently privileged task * will be able to manipulate the current directory, etc. * It would be nice to force an unshare instead... * * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS) * from another sub-thread until de_thread() succeeds, this * state is protected by cred_guard_mutex we hold. */ n_fs = 1; spin_lock(&p->fs->lock); rcu_read_lock(); for_other_threads(p, t) { if (t->fs == p->fs) n_fs++; } rcu_read_unlock(); /* "users" and "in_exec" locked for copy_fs() */ if (p->fs->users > n_fs) bprm->unsafe |= LSM_UNSAFE_SHARE; else p->fs->in_exec = 1; spin_unlock(&p->fs->lock); } static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) { /* Handle suid and sgid on files */ struct mnt_idmap *idmap; struct inode *inode = file_inode(file); unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; int err; if (!mnt_may_suid(file->f_path.mnt)) return; if (task_no_new_privs(current)) return; mode = READ_ONCE(inode->i_mode); if (!(mode & (S_ISUID|S_ISGID))) return; idmap = file_mnt_idmap(file); /* Be careful if suid/sgid is set */ inode_lock(inode); /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); /* Did the exec bit vanish out from under us? Give up. */ if (err) return; /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) return; if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = vfsuid_into_kuid(vfsuid); } if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = vfsgid_into_kgid(vfsgid); } } /* * Compute brpm->cred based upon the final binary. */ static int bprm_creds_from_file(struct linux_binprm *bprm) { /* Compute creds based on which file? */ struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file; bprm_fill_uid(bprm, file); return security_bprm_creds_from_file(bprm, file); } /* * Fill the binprm structure from the inode. * Read the first BINPRM_BUF_SIZE bytes * * This may be called multiple times for binary chains (scripts for example). */ static int prepare_binprm(struct linux_binprm *bprm) { loff_t pos = 0; memset(bprm->buf, 0, BINPRM_BUF_SIZE); return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos); } /* * Arguments are '\0' separated strings found at the location bprm->p * points to; chop off the first by relocating brpm->p to right after * the first '\0' encountered. */ int remove_arg_zero(struct linux_binprm *bprm) { unsigned long offset; char *kaddr; struct page *page; if (!bprm->argc) return 0; do { offset = bprm->p & ~PAGE_MASK; page = get_arg_page(bprm, bprm->p, 0); if (!page) return -EFAULT; kaddr = kmap_local_page(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; kunmap_local(kaddr); put_arg_page(page); } while (offset == PAGE_SIZE); bprm->p++; bprm->argc--; return 0; } EXPORT_SYMBOL(remove_arg_zero); /* * cycle the list of binary formats handler, until one recognizes the image */ static int search_binary_handler(struct linux_binprm *bprm) { struct linux_binfmt *fmt; int retval; retval = prepare_binprm(bprm); if (retval < 0) return retval; retval = security_bprm_check(bprm); if (retval) return retval; read_lock(&binfmt_lock); list_for_each_entry(fmt, &formats, lh) { if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); retval = fmt->load_binary(bprm); read_lock(&binfmt_lock); put_binfmt(fmt); if (bprm->point_of_no_return || (retval != -ENOEXEC)) { read_unlock(&binfmt_lock); return retval; } } read_unlock(&binfmt_lock); return -ENOEXEC; } /* binfmt handlers will call back into begin_new_exec() on success. */ static int exec_binprm(struct linux_binprm *bprm) { pid_t old_pid, old_vpid; int ret, depth; /* Need to fetch pid before load_binary changes it */ old_pid = current->pid; rcu_read_lock(); old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); /* This allows 4 levels of binfmt rewrites before failing hard. */ for (depth = 0;; depth++) { struct file *exec; if (depth > 5) return -ELOOP; ret = search_binary_handler(bprm); if (ret < 0) return ret; if (!bprm->interpreter) break; exec = bprm->file; bprm->file = bprm->interpreter; bprm->interpreter = NULL; exe_file_allow_write_access(exec); if (unlikely(bprm->have_execfd)) { if (bprm->executable) { fput(exec); return -ENOEXEC; } bprm->executable = exec; } else fput(exec); } audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); proc_exec_connector(current); return 0; } static int bprm_execve(struct linux_binprm *bprm) { int retval; retval = prepare_bprm_creds(bprm); if (retval) return retval; /* * Check for unsafe execution states before exec_binprm(), which * will call back into begin_new_exec(), into bprm_creds_from_file(), * where setuid-ness is evaluated. */ check_unsafe_exec(bprm); current->in_execve = 1; sched_mm_cid_before_execve(current); sched_exec(); /* Set the unchanging part of bprm->cred */ retval = security_bprm_creds_for_exec(bprm); if (retval || bprm->is_check) goto out; retval = exec_binprm(bprm); if (retval < 0) goto out; sched_mm_cid_after_execve(current); rseq_execve(current); /* execve succeeded */ current->in_execve = 0; user_events_execve(current); acct_update_integrals(current); task_numa_free(current, false); return retval; out: /* * If past the point of no return ensure the code never * returns to the userspace process. Use an existing fatal * signal if present otherwise terminate the process with * SIGSEGV. */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); rseq_set_notify_resume(current); current->in_execve = 0; return retval; } static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, int flags) { struct linux_binprm *bprm; int retval; if (IS_ERR(filename)) return PTR_ERR(filename); /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs * don't check setuid() return code. Here we additionally recheck * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { retval = -EAGAIN; goto out_ret; } /* We're below the limit (still or again), so we don't want to make * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; bprm = alloc_bprm(fd, filename, flags); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; } retval = count(argv, MAX_ARG_STRINGS); if (retval < 0) goto out_free; bprm->argc = retval; retval = count(envp, MAX_ARG_STRINGS); if (retval < 0) goto out_free; bprm->envc = retval; retval = bprm_stack_limits(bprm); if (retval < 0) goto out_free; retval = copy_string_kernel(bprm->filename, bprm); if (retval < 0) goto out_free; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out_free; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out_free; /* * When argv is empty, add an empty string ("") as argv[0] to * ensure confused userspace programs that start processing * from argv[1] won't end up walking envp. See also * bprm_stack_limits(). */ if (bprm->argc == 0) { retval = copy_string_kernel("", bprm); if (retval < 0) goto out_free; bprm->argc = 1; pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", current->comm, bprm->filename); } retval = bprm_execve(bprm); out_free: free_bprm(bprm); out_ret: putname(filename); return retval; } int kernel_execve(const char *kernel_filename, const char *const *argv, const char *const *envp) { struct filename *filename; struct linux_binprm *bprm; int fd = AT_FDCWD; int retval; /* It is non-sense for kernel threads to call execve */ if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) return -EINVAL; filename = getname_kernel(kernel_filename); if (IS_ERR(filename)) return PTR_ERR(filename); bprm = alloc_bprm(fd, filename, 0); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; } retval = count_strings_kernel(argv); if (WARN_ON_ONCE(retval == 0)) retval = -EINVAL; if (retval < 0) goto out_free; bprm->argc = retval; retval = count_strings_kernel(envp); if (retval < 0) goto out_free; bprm->envc = retval; retval = bprm_stack_limits(bprm); if (retval < 0) goto out_free; retval = copy_string_kernel(bprm->filename, bprm); if (retval < 0) goto out_free; bprm->exec = bprm->p; retval = copy_strings_kernel(bprm->envc, envp, bprm); if (retval < 0) goto out_free; retval = copy_strings_kernel(bprm->argc, argv, bprm); if (retval < 0) goto out_free; retval = bprm_execve(bprm); out_free: free_bprm(bprm); out_ret: putname(filename); return retval; } static int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } static int do_execveat(int fd, struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp, int flags) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execveat_common(fd, filename, argv, envp, flags); } #ifdef CONFIG_COMPAT static int compat_do_execve(struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp) { struct user_arg_ptr argv = { .is_compat = true, .ptr.compat = __argv, }; struct user_arg_ptr envp = { .is_compat = true, .ptr.compat = __envp, }; return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } static int compat_do_execveat(int fd, struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp, int flags) { struct user_arg_ptr argv = { .is_compat = true, .ptr.compat = __argv, }; struct user_arg_ptr envp = { .is_compat = true, .ptr.compat = __envp, }; return do_execveat_common(fd, filename, argv, envp, flags); } #endif void set_binfmt(struct linux_binfmt *new) { struct mm_struct *mm = current->mm; if (mm->binfmt) module_put(mm->binfmt->module); mm->binfmt = new; if (new) __module_get(new->module); } EXPORT_SYMBOL(set_binfmt); /* * set_dumpable stores three-value SUID_DUMP_* into mm->flags. */ void set_dumpable(struct mm_struct *mm, int value) { if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) return; set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value); } SYSCALL_DEFINE3(execve, const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp) { return do_execve(getname(filename), argv, envp); } SYSCALL_DEFINE5(execveat, int, fd, const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp, int, flags) { return do_execveat(fd, getname_uflags(filename, flags), argv, envp, flags); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, const compat_uptr_t __user *, argv, const compat_uptr_t __user *, envp) { return compat_do_execve(getname(filename), argv, envp); } COMPAT_SYSCALL_DEFINE5(execveat, int, fd, const char __user *, filename, const compat_uptr_t __user *, argv, const compat_uptr_t __user *, envp, int, flags) { return compat_do_execveat(fd, getname_uflags(filename, flags), argv, envp, flags); } #endif #ifdef CONFIG_SYSCTL static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!error) validate_coredump_safety(); return error; } static const struct ctl_table fs_exec_sysctls[] = { { .procname = "suid_dumpable", .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_coredump, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, }; static int __init init_fs_exec_sysctls(void) { register_sysctl_init("fs", fs_exec_sysctls); return 0; } fs_initcall(init_fs_exec_sysctls); #endif /* CONFIG_SYSCTL */ #ifdef CONFIG_EXEC_KUNIT_TEST #include "tests/exec_kunit.c" #endif
135 92 43 131 3 10 120 3 123 123 13 8 8 8 8 8 10 1 9 10 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> #include <linux/socket.h> #include <linux/udp.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/in6.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { struct sockaddr_in6 udp6_addr = {}; int err; struct socket *sock = NULL; err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; if (cfg->ipv6_v6only) { err = ip6_sock_set_v6only(sock->sk); if (err < 0) goto error; } if (cfg->bind_ifindex) { err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); if (err < 0) goto error; } udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->local_udp_port; err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, sizeof(udp6_addr)); if (err < 0) goto error; if (cfg->peer_udp_port) { memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->peer_udp_port; err = kernel_connect(sock, (struct sockaddr *)&udp6_addr, sizeof(udp6_addr), 0); } if (err < 0) goto error; udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); *sockp = sock; return 0; error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } *sockp = NULL; return err; } EXPORT_SYMBOL_GPL(udp_sock_create6); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); ip6_flow_hdr(ip6h, prio, label); ip6h->payload_len = htons(skb->len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = ttl; ip6h->daddr = *daddr; ip6h->saddr = *saddr; ip6tunnel_xmit(sk, skb, dev); return 0; } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); /** * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel * @skb: Packet for which lookup is done * @dev: Tunnel device * @net: Network namespace of tunnel device * @sock: Socket which provides route info * @oif: Index of the output interface * @saddr: Memory to store the src ip address * @key: Tunnel information * @sport: UDP source port * @dport: UDP destination port * @dsfield: The traffic class field * @dst_cache: The dst cache to use for lookup * This function performs a route lookup on a UDP tunnel * * It returns a valid dst pointer and stores src address to be used in * tunnel in param saddr on success, else a pointer encoded error code. */ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, struct socket *sock, int oif, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, struct dst_cache *dst_cache) { struct dst_entry *dst = NULL; struct flowi6 fl6; #ifdef CONFIG_DST_CACHE if (dst_cache) { dst = dst_cache_get_ip6(dst_cache, saddr); if (dst) return dst; } #endif memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; fl6.flowi6_oif = oif; fl6.daddr = key->u.ipv6.dst; fl6.saddr = key->u.ipv6.src; fl6.fl6_sport = sport; fl6.fl6_dport = dport; fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, NULL); if (IS_ERR(dst)) { netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); } if (dst->dev == dev) { /* is this necessary? */ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); dst_release(dst); return ERR_PTR(-ELOOP); } #ifdef CONFIG_DST_CACHE if (dst_cache) dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); #endif *saddr = fl6.saddr; return dst; } EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver"); MODULE_LICENSE("GPL");
6 2172 1985 198 17 17 4 6 6 6 6 4 1 1 2 1 1 5 1 1 1 1 1 4 4 19 19 22 1 1 1 19 11 6 6 6 6 8 1 2 3 1 1 4 4 7 7 7 2 3 2 4 3 1 4 4 6 2 4 4 4 1 3 3 1 1 1 1 1 73 15 1 1 5 2 1 1 1 16 2 2 2 2 4 4 2 5 3 4 2 1 5 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 15 15 17 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 // SPDX-License-Identifier: GPL-2.0-or-later /* * X.25 Packet Layer release 002 * * This is ALPHA test software. This code may break your machine, * randomly fail to work with new releases, misbehave and/or generally * screw up. It might even work. * * This code REQUIRES 2.1.15 or higher * * History * X.25 001 Jonathan Naylor Started coding. * X.25 002 Jonathan Naylor Centralised disconnect handling. * New timer architecture. * 2000-03-11 Henner Eisen MSG_EOR handling more POSIX compliant. * 2000-03-22 Daniela Squassoni Allowed disabling/enabling of * facilities negotiation and increased * the throughput upper limit. * 2000-08-27 Arnaldo C. Melo s/suser/capable/ + micro cleanups * 2000-09-04 Henner Eisen Set sock->state in x25_accept(). * Fixed x25_output() related skb leakage. * 2000-10-02 Henner Eisen Made x25_kick() single threaded per socket. * 2000-10-27 Henner Eisen MSG_DONTWAIT for fragment allocation. * 2000-11-14 Henner Eisen Closing datalink from NETDEV_GOING_DOWN * 2002-10-06 Arnaldo C. Melo Get rid of cli/sti, move proc stuff to * x25_proc.c, using seq_file * 2005-04-02 Shaun Pereira Selective sub address matching * with call user data * 2005-04-15 Shaun Pereira Fast select with no restriction on * response */ #define pr_fmt(fmt) "X25: " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/notifier.h> #include <linux/init.h> #include <linux/compat.h> #include <linux/ctype.h> #include <net/x25.h> #include <net/compat.h> int sysctl_x25_restart_request_timeout = X25_DEFAULT_T20; int sysctl_x25_call_request_timeout = X25_DEFAULT_T21; int sysctl_x25_reset_request_timeout = X25_DEFAULT_T22; int sysctl_x25_clear_request_timeout = X25_DEFAULT_T23; int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; int sysctl_x25_forward = 0; HLIST_HEAD(x25_list); DEFINE_RWLOCK(x25_list_lock); static const struct proto_ops x25_proto_ops; static const struct x25_address null_x25_address = {" "}; #ifdef CONFIG_COMPAT struct compat_x25_subscrip_struct { char device[200-sizeof(compat_ulong_t)]; compat_ulong_t global_facil_mask; compat_uint_t extended; }; #endif int x25_parse_address_block(struct sk_buff *skb, struct x25_address *called_addr, struct x25_address *calling_addr) { unsigned char len; int needed; int rc; if (!pskb_may_pull(skb, 1)) { /* packet has no address block */ rc = 0; goto empty; } len = *skb->data; needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2; if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims to hold */ rc = -1; goto empty; } return x25_addr_ntoa(skb->data, called_addr, calling_addr); empty: *called_addr->x25_addr = 0; *calling_addr->x25_addr = 0; return rc; } int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr, struct x25_address *calling_addr) { unsigned int called_len, calling_len; char *called, *calling; unsigned int i; called_len = (*p >> 0) & 0x0F; calling_len = (*p >> 4) & 0x0F; called = called_addr->x25_addr; calling = calling_addr->x25_addr; p++; for (i = 0; i < (called_len + calling_len); i++) { if (i < called_len) { if (i % 2 != 0) { *called++ = ((*p >> 0) & 0x0F) + '0'; p++; } else { *called++ = ((*p >> 4) & 0x0F) + '0'; } } else { if (i % 2 != 0) { *calling++ = ((*p >> 0) & 0x0F) + '0'; p++; } else { *calling++ = ((*p >> 4) & 0x0F) + '0'; } } } *called = *calling = '\0'; return 1 + (called_len + calling_len + 1) / 2; } int x25_addr_aton(unsigned char *p, struct x25_address *called_addr, struct x25_address *calling_addr) { unsigned int called_len, calling_len; char *called, *calling; int i; called = called_addr->x25_addr; calling = calling_addr->x25_addr; called_len = strlen(called); calling_len = strlen(calling); *p++ = (calling_len << 4) | (called_len << 0); for (i = 0; i < (called_len + calling_len); i++) { if (i < called_len) { if (i % 2 != 0) { *p |= (*called++ - '0') << 0; p++; } else { *p = 0x00; *p |= (*called++ - '0') << 4; } } else { if (i % 2 != 0) { *p |= (*calling++ - '0') << 0; p++; } else { *p = 0x00; *p |= (*calling++ - '0') << 4; } } } return 1 + (called_len + calling_len + 1) / 2; } /* * Socket removal during an interrupt is now safe. */ static void x25_remove_socket(struct sock *sk) { write_lock_bh(&x25_list_lock); sk_del_node_init(sk); write_unlock_bh(&x25_list_lock); } /* * Handle device status changes. */ static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type == ARPHRD_X25) { switch (event) { case NETDEV_REGISTER: case NETDEV_POST_TYPE_CHANGE: x25_link_device_up(dev); break; case NETDEV_DOWN: nb = x25_get_neigh(dev); if (nb) { x25_link_terminated(nb); x25_neigh_put(nb); } x25_route_device_down(dev); break; case NETDEV_PRE_TYPE_CHANGE: case NETDEV_UNREGISTER: x25_link_device_down(dev); break; case NETDEV_CHANGE: if (!netif_carrier_ok(dev)) { nb = x25_get_neigh(dev); if (nb) { x25_link_terminated(nb); x25_neigh_put(nb); } } break; } } return NOTIFY_DONE; } /* * Add a socket to the bound sockets list. */ static void x25_insert_socket(struct sock *sk) { write_lock_bh(&x25_list_lock); sk_add_node(sk, &x25_list); write_unlock_bh(&x25_list_lock); } /* * Find a socket that wants to accept the Call Request we just * received. Check the full list for an address/cud match. * If no cuds match return the next_best thing, an address match. * Note: if a listening socket has cud set it must only get calls * with matching cud. */ static struct sock *x25_find_listener(struct x25_address *addr, struct sk_buff *skb) { struct sock *s; struct sock *next_best; read_lock_bh(&x25_list_lock); next_best = NULL; sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || !strcmp(x25_sk(s)->source_addr.x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { /* * Found a listening socket, now check the incoming * call user data vs this sockets call user data */ if (x25_sk(s)->cudmatchlength > 0 && skb->len >= x25_sk(s)->cudmatchlength) { if((memcmp(x25_sk(s)->calluserdata.cuddata, skb->data, x25_sk(s)->cudmatchlength)) == 0) { sock_hold(s); goto found; } } else next_best = s; } if (next_best) { s = next_best; sock_hold(s); goto found; } s = NULL; found: read_unlock_bh(&x25_list_lock); return s; } /* * Find a connected X.25 socket given my LCI and neighbour. */ static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; sk_for_each(s, &x25_list) if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) { sock_hold(s); goto found; } s = NULL; found: return s; } struct sock *x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; read_lock_bh(&x25_list_lock); s = __x25_find_socket(lci, nb); read_unlock_bh(&x25_list_lock); return s; } /* * Find a unique LCI for a given device. */ static unsigned int x25_new_lci(struct x25_neigh *nb) { unsigned int lci = 1; struct sock *sk; while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } cond_resched(); } return lci; } /* * Deferred destroy. */ static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ static void x25_destroy_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); x25_destroy_socket_from_timer(sk); } /* * This is called from user mode and the timers. Thus it protects itself * against interrupting users but doesn't worry about being called during * work. Once it is removed from the queue no interrupt or bottom half * will touch it and we are (fairly 8-) ) safe. * Not static as it's used by the timer */ static void __x25_destroy_socket(struct sock *sk) { struct sk_buff *skb; x25_stop_heartbeat(sk); x25_stop_timer(sk); x25_remove_socket(sk); x25_clear_queues(sk); /* Flush the queues */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (skb->sk != sk) { /* A pending connection */ /* * Queue the unaccepted socket for death */ skb->sk->sk_state = TCP_LISTEN; sock_set_flag(skb->sk, SOCK_DEAD); x25_start_heartbeat(skb->sk); x25_sk(skb->sk)->state = X25_STATE_0; } kfree_skb(skb); } if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; sk->sk_timer.function = x25_destroy_timer; add_timer(&sk->sk_timer); } else { /* drop last reference so sock_put will free */ __sock_put(sk); } } void x25_destroy_socket_from_timer(struct sock *sk) { sock_hold(sk); bh_lock_sock(sk); __x25_destroy_socket(sk); bh_unlock_sock(sk); sock_put(sk); } /* * Handling for system calls applied via the various interfaces to a * X.25 socket object. */ static int x25_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { int opt; struct sock *sk = sock->sk; int rc = -ENOPROTOOPT; if (level != SOL_X25 || optname != X25_QBITINCL) goto out; rc = -EINVAL; if (optlen < sizeof(int)) goto out; rc = -EFAULT; if (copy_from_sockptr(&opt, optval, sizeof(int))) goto out; if (opt) set_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags); else clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags); rc = 0; out: return rc; } static int x25_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; int val, len, rc = -ENOPROTOOPT; if (level != SOL_X25 || optname != X25_QBITINCL) goto out; rc = -EFAULT; if (get_user(len, optlen)) goto out; rc = -EINVAL; if (len < 0) goto out; len = min_t(unsigned int, len, sizeof(int)); rc = -EFAULT; if (put_user(len, optlen)) goto out; val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags); rc = copy_to_user(optval, &val, len) ? -EFAULT : 0; out: return rc; } static int x25_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int rc = -EOPNOTSUPP; lock_sock(sk); if (sock->state != SS_UNCONNECTED) { rc = -EINVAL; release_sock(sk); return rc; } if (sk->sk_state != TCP_LISTEN) { memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; rc = 0; } release_sock(sk); return rc; } static struct proto x25_proto = { .name = "X25", .owner = THIS_MODULE, .obj_size = sizeof(struct x25_sock), }; static struct sock *x25_alloc_socket(struct net *net, int kern) { struct x25_sock *x25; struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern); if (!sk) goto out; sock_init_data(NULL, sk); x25 = x25_sk(sk); skb_queue_head_init(&x25->ack_queue); skb_queue_head_init(&x25->fragment_queue); skb_queue_head_init(&x25->interrupt_in_queue); skb_queue_head_init(&x25->interrupt_out_queue); out: return sk; } static int x25_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct x25_sock *x25; int rc = -EAFNOSUPPORT; if (!net_eq(net, &init_net)) goto out; rc = -ESOCKTNOSUPPORT; if (sock->type != SOCK_SEQPACKET) goto out; rc = -EINVAL; if (protocol) goto out; rc = -ENOMEM; if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; x25 = x25_sk(sk); sock_init_data(sock, sk); x25_init_timers(sk); sock->ops = &x25_proto_ops; sk->sk_protocol = protocol; sk->sk_backlog_rcv = x25_backlog_rcv; x25->t21 = sysctl_x25_call_request_timeout; x25->t22 = sysctl_x25_reset_request_timeout; x25->t23 = sysctl_x25_clear_request_timeout; x25->t2 = sysctl_x25_ack_holdback_timeout; x25->state = X25_STATE_0; x25->cudmatchlength = 0; set_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); /* normally no cud */ /* on call accept */ x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE; x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE; x25->facilities.pacsize_in = X25_DEFAULT_PACKET_SIZE; x25->facilities.pacsize_out = X25_DEFAULT_PACKET_SIZE; x25->facilities.throughput = 0; /* by default don't negotiate throughput */ x25->facilities.reverse = X25_DEFAULT_REVERSE; x25->dte_facilities.calling_len = 0; x25->dte_facilities.called_len = 0; memset(x25->dte_facilities.called_ae, '\0', sizeof(x25->dte_facilities.called_ae)); memset(x25->dte_facilities.calling_ae, '\0', sizeof(x25->dte_facilities.calling_ae)); rc = 0; out: return rc; } static struct sock *x25_make_new(struct sock *osk) { struct sock *sk = NULL; struct x25_sock *x25, *ox25; if (osk->sk_type != SOCK_SEQPACKET) goto out; if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL) goto out; x25 = x25_sk(sk); sk->sk_type = osk->sk_type; sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_backlog_rcv = osk->sk_backlog_rcv; sock_copy_flags(sk, osk); ox25 = x25_sk(osk); x25->t21 = ox25->t21; x25->t22 = ox25->t22; x25->t23 = ox25->t23; x25->t2 = ox25->t2; x25->flags = ox25->flags; x25->facilities = ox25->facilities; x25->dte_facilities = ox25->dte_facilities; x25->cudmatchlength = ox25->cudmatchlength; clear_bit(X25_INTERRUPT_FLAG, &x25->flags); x25_init_timers(sk); out: return sk; } static int x25_release(struct socket *sock) { struct sock *sk = sock->sk; struct x25_sock *x25; if (!sk) return 0; x25 = x25_sk(sk); sock_hold(sk); lock_sock(sk); switch (x25->state) { case X25_STATE_0: case X25_STATE_2: x25_disconnect(sk, 0, 0, 0); __x25_destroy_socket(sk); goto out; case X25_STATE_1: case X25_STATE_3: case X25_STATE_4: x25_clear_queues(sk); x25_write_internal(sk, X25_CLEAR_REQUEST); x25_start_t23timer(sk); x25->state = X25_STATE_2; sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); sock_set_flag(sk, SOCK_DESTROY); break; case X25_STATE_5: x25_write_internal(sk, X25_CLEAR_REQUEST); x25_disconnect(sk, 0, 0, 0); __x25_destroy_socket(sk); goto out; } sock_orphan(sk); out: release_sock(sk); sock_put(sk); return 0; } static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; int len, i, rc = 0; if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25 || strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) { rc = -EINVAL; goto out; } /* check for the null_x25_address */ if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) { len = strlen(addr->sx25_addr.x25_addr); for (i = 0; i < len; i++) { if (!isdigit(addr->sx25_addr.x25_addr[i])) { rc = -EINVAL; goto out; } } } lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) { x25_sk(sk)->source_addr = addr->sx25_addr; x25_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); } else { rc = -EINVAL; } release_sock(sk); net_dbg_ratelimited("x25_bind: socket is bound\n"); out: return rc; } static int x25_wait_for_connection_establishment(struct sock *sk) { DECLARE_WAITQUEUE(wait, current); int rc; add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = sock_error(sk); if (rc) { sk->sk_socket->state = SS_UNCONNECTED; break; } rc = -ENOTCONN; if (sk->sk_state == TCP_CLOSE) { sk->sk_socket->state = SS_UNCONNECTED; break; } rc = 0; if (sk->sk_state != TCP_ESTABLISHED) { release_sock(sk); schedule(); lock_sock(sk); } else break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return rc; } static int x25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; struct x25_route *rt; int rc = 0; lock_sock(sk); if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { sock->state = SS_CONNECTED; goto out; /* Connect completed during a ERESTARTSYS event */ } rc = -ECONNREFUSED; if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { sock->state = SS_UNCONNECTED; goto out; } rc = -EISCONN; /* No reconnect on a seqpacket socket */ if (sk->sk_state == TCP_ESTABLISHED) goto out; rc = -EALREADY; /* Do nothing if call is already in progress */ if (sk->sk_state == TCP_SYN_SENT) goto out; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; rc = -EINVAL; if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25 || strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) goto out; rc = -ENETUNREACH; rt = x25_get_route(&addr->sx25_addr); if (!rt) goto out; x25->neighbour = x25_get_neigh(rt->dev); if (!x25->neighbour) goto out_put_route; x25_limit_facilities(&x25->facilities, x25->neighbour); x25->lci = x25_new_lci(x25->neighbour); if (!x25->lci) goto out_put_neigh; rc = -EINVAL; if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ goto out_put_neigh; if (!strcmp(x25->source_addr.x25_addr, null_x25_address.x25_addr)) memset(&x25->source_addr, '\0', X25_ADDR_LEN); x25->dest_addr = addr->sx25_addr; /* Move to connecting socket, start sending Connect Requests */ sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; x25->state = X25_STATE_1; x25_write_internal(sk, X25_CALL_REQUEST); x25_start_heartbeat(sk); x25_start_t21timer(sk); /* Now the loop */ rc = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) goto out; rc = x25_wait_for_connection_establishment(sk); if (rc) goto out_put_neigh; sock->state = SS_CONNECTED; rc = 0; out_put_neigh: if (rc && x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; read_unlock_bh(&x25_list_lock); x25->state = X25_STATE_0; } out_put_route: x25_route_put(rt); out: release_sock(sk); return rc; } static int x25_wait_for_data(struct sock *sk, long timeout) { DECLARE_WAITQUEUE(wait, current); int rc = 0; add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); if (sk->sk_shutdown & RCV_SHUTDOWN) break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; rc = 0; if (skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); } else break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return rc; } static int x25_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sock *newsk; struct sk_buff *skb; int rc = -EINVAL; if (!sk) goto out; rc = -EOPNOTSUPP; if (sk->sk_type != SOCK_SEQPACKET) goto out; lock_sock(sk); rc = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out2; rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); if (rc) goto out2; skb = skb_dequeue(&sk->sk_receive_queue); rc = -EINVAL; if (!skb->sk) goto out2; newsk = skb->sk; sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; rc = 0; out2: release_sock(sk); out: return rc; } static int x25_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr; struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); int rc = 0; if (peer) { if (sk->sk_state != TCP_ESTABLISHED) { rc = -ENOTCONN; goto out; } sx25->sx25_addr = x25->dest_addr; } else sx25->sx25_addr = x25->source_addr; sx25->sx25_family = AF_X25; rc = sizeof(*sx25); out: return rc; } int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, unsigned int lci) { struct sock *sk; struct sock *make; struct x25_sock *makex25; struct x25_address source_addr, dest_addr; struct x25_facilities facilities; struct x25_dte_facilities dte_facilities; int len, addr_len, rc; /* * Remove the LCI and frame type. */ skb_pull(skb, X25_STD_MIN_LEN); /* * Extract the X.25 addresses and convert them to ASCII strings, * and remove them. * * Address block is mandatory in call request packets */ addr_len = x25_parse_address_block(skb, &source_addr, &dest_addr); if (addr_len <= 0) goto out_clear_request; skb_pull(skb, addr_len); /* * Get the length of the facilities, skip past them for the moment * get the call user data because this is needed to determine * the correct listener * * Facilities length is mandatory in call request packets */ if (!pskb_may_pull(skb, 1)) goto out_clear_request; len = skb->data[0] + 1; if (!pskb_may_pull(skb, len)) goto out_clear_request; skb_pull(skb,len); /* * Ensure that the amount of call user data is valid. */ if (skb->len > X25_MAX_CUD_LEN) goto out_clear_request; /* * Get all the call user data so it can be used in * x25_find_listener and skb_copy_from_linear_data up ahead. */ if (!pskb_may_pull(skb, skb->len)) goto out_clear_request; /* * Find a listener for the particular address/cud pair. */ sk = x25_find_listener(&source_addr,skb); skb_push(skb,len); if (sk != NULL && sk_acceptq_is_full(sk)) { goto out_sock_put; } /* * We dont have any listeners for this incoming call. * Try forwarding it. */ if (sk == NULL) { skb_push(skb, addr_len + X25_STD_MIN_LEN); if (sysctl_x25_forward && x25_forward_call(&dest_addr, nb, skb, lci) > 0) { /* Call was forwarded, dont process it any more */ kfree_skb(skb); rc = 1; goto out; } else { /* No listeners, can't forward, clear the call */ goto out_clear_request; } } /* * Try to reach a compromise on the requested facilities. */ len = x25_negotiate_facilities(skb, sk, &facilities, &dte_facilities); if (len == -1) goto out_sock_put; /* * current neighbour/link might impose additional limits * on certain facilities */ x25_limit_facilities(&facilities, nb); /* * Try to create a new socket. */ make = x25_make_new(sk); if (!make) goto out_sock_put; /* * Remove the facilities */ skb_pull(skb, len); skb->sk = make; make->sk_state = TCP_ESTABLISHED; makex25 = x25_sk(make); makex25->lci = lci; makex25->dest_addr = dest_addr; makex25->source_addr = source_addr; x25_neigh_hold(nb); makex25->neighbour = nb; makex25->facilities = facilities; makex25->dte_facilities= dte_facilities; makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask; /* ensure no reverse facil on accept */ makex25->vc_facil_mask &= ~X25_MASK_REVERSE; /* ensure no calling address extension on accept */ makex25->vc_facil_mask &= ~X25_MASK_CALLING_AE; makex25->cudmatchlength = x25_sk(sk)->cudmatchlength; /* Normally all calls are accepted immediately */ if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) { x25_write_internal(make, X25_CALL_ACCEPTED); makex25->state = X25_STATE_3; } else { makex25->state = X25_STATE_5; } /* * Incoming Call User Data. */ skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); makex25->calluserdata.cudlength = skb->len; sk_acceptq_added(sk); x25_insert_socket(make); skb_queue_head(&sk->sk_receive_queue, skb); x25_start_heartbeat(make); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); rc = 1; sock_put(sk); out: return rc; out_sock_put: sock_put(sk); out_clear_request: rc = 0; x25_transmit_clear_request(nb, lci, 0x01); goto out; } static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); DECLARE_SOCKADDR(struct sockaddr_x25 *, usx25, msg->msg_name); struct sockaddr_x25 sx25; struct sk_buff *skb; unsigned char *asmptr; int noblock = msg->msg_flags & MSG_DONTWAIT; size_t size; int qbit = 0, rc = -EINVAL; lock_sock(sk); if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) goto out; /* we currently don't support segmented records at the user interface */ if (!(msg->msg_flags & (MSG_EOR|MSG_OOB))) goto out; rc = -EADDRNOTAVAIL; if (sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 0); goto out; } rc = -ENETUNREACH; if (!x25->neighbour) goto out; if (usx25) { rc = -EINVAL; if (msg->msg_namelen < sizeof(sx25)) goto out; memcpy(&sx25, usx25, sizeof(sx25)); rc = -EISCONN; if (strcmp(x25->dest_addr.x25_addr, sx25.sx25_addr.x25_addr)) goto out; rc = -EINVAL; if (sx25.sx25_family != AF_X25) goto out; } else { /* * FIXME 1003.1g - if the socket is like this because * it has become closed (not started closed) we ought * to SIGPIPE, EPIPE; */ rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) goto out; sx25.sx25_family = AF_X25; sx25.sx25_addr = x25->dest_addr; } /* Sanity check the packet size */ if (len > 65535) { rc = -EMSGSIZE; goto out; } net_dbg_ratelimited("x25_sendmsg: sendto: Addresses built.\n"); /* Build a packet */ net_dbg_ratelimited("x25_sendmsg: sendto: building packet.\n"); if ((msg->msg_flags & MSG_OOB) && len > 32) len = 32; size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) goto out; X25_SKB_CB(skb)->flags = msg->msg_flags; skb_reserve(skb, X25_MAX_L2_LEN + X25_EXT_MIN_LEN); /* * Put the data on the end */ net_dbg_ratelimited("x25_sendmsg: Copying user data\n"); skb_reset_transport_header(skb); skb_put(skb, len); rc = memcpy_from_msg(skb_transport_header(skb), msg, len); if (rc) goto out_kfree_skb; /* * If the Q BIT Include socket option is in force, the first * byte of the user data is the logical value of the Q Bit. */ if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { if (!pskb_may_pull(skb, 1)) goto out_kfree_skb; qbit = skb->data[0]; skb_pull(skb, 1); } /* * Push down the X.25 header */ net_dbg_ratelimited("x25_sendmsg: Building X.25 Header.\n"); if (msg->msg_flags & MSG_OOB) { if (x25->neighbour->extended) { asmptr = skb_push(skb, X25_STD_MIN_LEN); *asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_EXTSEQ; *asmptr++ = (x25->lci >> 0) & 0xFF; *asmptr++ = X25_INTERRUPT; } else { asmptr = skb_push(skb, X25_STD_MIN_LEN); *asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_STDSEQ; *asmptr++ = (x25->lci >> 0) & 0xFF; *asmptr++ = X25_INTERRUPT; } } else { if (x25->neighbour->extended) { /* Build an Extended X.25 header */ asmptr = skb_push(skb, X25_EXT_MIN_LEN); *asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_EXTSEQ; *asmptr++ = (x25->lci >> 0) & 0xFF; *asmptr++ = X25_DATA; *asmptr++ = X25_DATA; } else { /* Build an Standard X.25 header */ asmptr = skb_push(skb, X25_STD_MIN_LEN); *asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_STDSEQ; *asmptr++ = (x25->lci >> 0) & 0xFF; *asmptr++ = X25_DATA; } if (qbit) skb->data[0] |= X25_Q_BIT; } net_dbg_ratelimited("x25_sendmsg: Built header.\n"); net_dbg_ratelimited("x25_sendmsg: Transmitting buffer\n"); rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) goto out_kfree_skb; if (msg->msg_flags & MSG_OOB) skb_queue_tail(&x25->interrupt_out_queue, skb); else { rc = x25_output(sk, skb); len = rc; if (rc < 0) kfree_skb(skb); else if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) len++; } x25_kick(sk); rc = len; out: release_sock(sk); return rc; out_kfree_skb: kfree_skb(skb); goto out; } static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); DECLARE_SOCKADDR(struct sockaddr_x25 *, sx25, msg->msg_name); size_t copied; int qbit, header_len; struct sk_buff *skb; unsigned char *asmptr; int rc = -ENOTCONN; lock_sock(sk); if (x25->neighbour == NULL) goto out; header_len = x25->neighbour->extended ? X25_EXT_MIN_LEN : X25_STD_MIN_LEN; /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though */ if (sk->sk_state != TCP_ESTABLISHED) goto out; if (flags & MSG_OOB) { rc = -EINVAL; if (sock_flag(sk, SOCK_URGINLINE) || !skb_peek(&x25->interrupt_in_queue)) goto out; skb = skb_dequeue(&x25->interrupt_in_queue); if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) goto out_free_dgram; skb_pull(skb, X25_STD_MIN_LEN); /* * No Q bit information on Interrupt data. */ if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { asmptr = skb_push(skb, 1); *asmptr = 0x00; } msg->msg_flags |= MSG_OOB; } else { /* Now we can treat all alike */ release_sock(sk); skb = skb_recv_datagram(sk, flags, &rc); lock_sock(sk); if (!skb) goto out; if (!pskb_may_pull(skb, header_len)) goto out_free_dgram; qbit = (skb->data[0] & X25_Q_BIT) == X25_Q_BIT; skb_pull(skb, header_len); if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { asmptr = skb_push(skb, 1); *asmptr = qbit; } } skb_reset_transport_header(skb); copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } /* Currently, each datagram always contains a complete record */ msg->msg_flags |= MSG_EOR; rc = skb_copy_datagram_msg(skb, 0, msg, copied); if (rc) goto out_free_dgram; if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; msg->msg_namelen = sizeof(*sx25); } x25_check_rbuf(sk); rc = copied; out_free_dgram: skb_free_datagram(sk, skb); out: release_sock(sk); return rc; } static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); void __user *argp = (void __user *)arg; int rc; switch (cmd) { case TIOCOUTQ: { int amount; amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); break; } case TIOCINQ: { struct sk_buff *skb; int amount = 0; /* * These two are safe on a single CPU system as * only user tasks fiddle here */ lock_sock(sk); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; release_sock(sk); rc = put_user(amount, (unsigned int __user *)argp); break; } case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFMETRIC: case SIOCSIFMETRIC: rc = -EINVAL; break; case SIOCADDRT: case SIOCDELRT: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; rc = x25_route_ioctl(cmd, argp); break; case SIOCX25GSUBSCRIP: rc = x25_subscr_ioctl(cmd, argp); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; rc = x25_subscr_ioctl(cmd, argp); break; case SIOCX25GFACILITIES: { lock_sock(sk); rc = copy_to_user(argp, &x25->facilities, sizeof(x25->facilities)) ? -EFAULT : 0; release_sock(sk); break; } case SIOCX25SFACILITIES: { struct x25_facilities facilities; rc = -EFAULT; if (copy_from_user(&facilities, argp, sizeof(facilities))) break; rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) goto out_fac_release; if (facilities.pacsize_in < X25_PS16 || facilities.pacsize_in > X25_PS4096) goto out_fac_release; if (facilities.pacsize_out < X25_PS16 || facilities.pacsize_out > X25_PS4096) goto out_fac_release; if (facilities.winsize_in < 1 || facilities.winsize_in > 127) goto out_fac_release; if (facilities.throughput) { int out = facilities.throughput & 0xf0; int in = facilities.throughput & 0x0f; if (!out) facilities.throughput |= X25_DEFAULT_THROUGHPUT << 4; else if (out < 0x30 || out > 0xD0) goto out_fac_release; if (!in) facilities.throughput |= X25_DEFAULT_THROUGHPUT; else if (in < 0x03 || in > 0x0D) goto out_fac_release; } if (facilities.reverse && (facilities.reverse & 0x81) != 0x81) goto out_fac_release; x25->facilities = facilities; rc = 0; out_fac_release: release_sock(sk); break; } case SIOCX25GDTEFACILITIES: { lock_sock(sk); rc = copy_to_user(argp, &x25->dte_facilities, sizeof(x25->dte_facilities)); release_sock(sk); if (rc) rc = -EFAULT; break; } case SIOCX25SDTEFACILITIES: { struct x25_dte_facilities dtefacs; rc = -EFAULT; if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) break; rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; out_dtefac_release: release_sock(sk); break; } case SIOCX25GCALLUSERDATA: { lock_sock(sk); rc = copy_to_user(argp, &x25->calluserdata, sizeof(x25->calluserdata)) ? -EFAULT : 0; release_sock(sk); break; } case SIOCX25SCALLUSERDATA: { struct x25_calluserdata calluserdata; rc = -EFAULT; if (copy_from_user(&calluserdata, argp, sizeof(calluserdata))) break; rc = -EINVAL; if (calluserdata.cudlength > X25_MAX_CUD_LEN) break; lock_sock(sk); x25->calluserdata = calluserdata; release_sock(sk); rc = 0; break; } case SIOCX25GCAUSEDIAG: { lock_sock(sk); rc = copy_to_user(argp, &x25->causediag, sizeof(x25->causediag)) ? -EFAULT : 0; release_sock(sk); break; } case SIOCX25SCAUSEDIAG: { struct x25_causediag causediag; rc = -EFAULT; if (copy_from_user(&causediag, argp, sizeof(causediag))) break; lock_sock(sk); x25->causediag = causediag; release_sock(sk); rc = 0; break; } case SIOCX25SCUDMATCHLEN: { struct x25_subaddr sub_addr; rc = -EINVAL; lock_sock(sk); if(sk->sk_state != TCP_CLOSE) goto out_cud_release; rc = -EFAULT; if (copy_from_user(&sub_addr, argp, sizeof(sub_addr))) goto out_cud_release; rc = -EINVAL; if (sub_addr.cudmatchlength > X25_MAX_CUD_LEN) goto out_cud_release; x25->cudmatchlength = sub_addr.cudmatchlength; rc = 0; out_cud_release: release_sock(sk); break; } case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); if (sk->sk_state == TCP_CLOSE) { clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); rc = 0; } release_sock(sk); break; } case SIOCX25SENDCALLACCPT: { rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; rc = 0; out_sendcallaccpt_release: release_sock(sk); break; } default: rc = -ENOIOCTLCMD; break; } return rc; } static const struct net_proto_family x25_family_ops = { .family = AF_X25, .create = x25_create, .owner = THIS_MODULE, }; #ifdef CONFIG_COMPAT static int compat_x25_subscr_ioctl(unsigned int cmd, struct compat_x25_subscrip_struct __user *x25_subscr32) { struct compat_x25_subscrip_struct x25_subscr; struct x25_neigh *nb; struct net_device *dev; int rc = -EINVAL; rc = -EFAULT; if (copy_from_user(&x25_subscr, x25_subscr32, sizeof(*x25_subscr32))) goto out; rc = -EINVAL; dev = x25_dev_get(x25_subscr.device); if (dev == NULL) goto out; nb = x25_get_neigh(dev); if (nb == NULL) goto out_dev_put; dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(x25_subscr32, &x25_subscr, sizeof(*x25_subscr32)) ? -EFAULT : 0; } else { rc = -EINVAL; if (x25_subscr.extended == 0 || x25_subscr.extended == 1) { rc = 0; write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); out: return rc; out_dev_put: dev_put(dev); goto out; } static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); int rc = -ENOIOCTLCMD; switch(cmd) { case TIOCOUTQ: case TIOCINQ: rc = x25_ioctl(sock, cmd, (unsigned long)argp); break; case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFMETRIC: case SIOCSIFMETRIC: rc = -EINVAL; break; case SIOCADDRT: case SIOCDELRT: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; rc = x25_route_ioctl(cmd, argp); break; case SIOCX25GSUBSCRIP: rc = compat_x25_subscr_ioctl(cmd, argp); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; rc = compat_x25_subscr_ioctl(cmd, argp); break; case SIOCX25GFACILITIES: case SIOCX25SFACILITIES: case SIOCX25GDTEFACILITIES: case SIOCX25SDTEFACILITIES: case SIOCX25GCALLUSERDATA: case SIOCX25SCALLUSERDATA: case SIOCX25GCAUSEDIAG: case SIOCX25SCAUSEDIAG: case SIOCX25SCUDMATCHLEN: case SIOCX25CALLACCPTAPPRV: case SIOCX25SENDCALLACCPT: rc = x25_ioctl(sock, cmd, (unsigned long)argp); break; default: rc = -ENOIOCTLCMD; break; } return rc; } #endif static const struct proto_ops x25_proto_ops = { .family = AF_X25, .owner = THIS_MODULE, .release = x25_release, .bind = x25_bind, .connect = x25_connect, .socketpair = sock_no_socketpair, .accept = x25_accept, .getname = x25_getname, .poll = datagram_poll, .ioctl = x25_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, #endif .gettstamp = sock_gettstamp, .listen = x25_listen, .shutdown = sock_no_shutdown, .setsockopt = x25_setsockopt, .getsockopt = x25_getsockopt, .sendmsg = x25_sendmsg, .recvmsg = x25_recvmsg, .mmap = sock_no_mmap, }; static struct packet_type x25_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_X25), .func = x25_lapb_receive_frame, }; static struct notifier_block x25_dev_notifier = { .notifier_call = x25_device_event, }; void x25_kill_by_neigh(struct x25_neigh *nb) { struct sock *s; write_lock_bh(&x25_list_lock); sk_for_each(s, &x25_list) { if (x25_sk(s)->neighbour == nb) { write_unlock_bh(&x25_list_lock); lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); release_sock(s); write_lock_bh(&x25_list_lock); } } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ x25_clear_forward_by_dev(nb->dev); } static int __init x25_init(void) { int rc; rc = proto_register(&x25_proto, 0); if (rc) goto out; rc = sock_register(&x25_family_ops); if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); if (rc) goto out_sock; rc = x25_register_sysctl(); if (rc) goto out_dev; rc = x25_proc_init(); if (rc) goto out_sysctl; pr_info("Linux Version 0.2\n"); out: return rc; out_sysctl: x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); goto out; } module_init(x25_init); static void __exit x25_exit(void) { x25_proc_exit(); x25_link_free(); x25_route_free(); x25_unregister_sysctl(); unregister_netdevice_notifier(&x25_dev_notifier); dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); proto_unregister(&x25_proto); } module_exit(x25_exit); MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>"); MODULE_DESCRIPTION("The X.25 Packet Layer network layer protocol"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_X25);
132 27 8604 65 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Landlock - Credential hooks * * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2019-2020 ANSSI * Copyright © 2021-2025 Microsoft Corporation */ #ifndef _SECURITY_LANDLOCK_CRED_H #define _SECURITY_LANDLOCK_CRED_H #include <linux/container_of.h> #include <linux/cred.h> #include <linux/init.h> #include <linux/rcupdate.h> #include "access.h" #include "limits.h" #include "ruleset.h" #include "setup.h" /** * struct landlock_cred_security - Credential security blob * * This structure is packed to minimize the size of struct * landlock_file_security. However, it is always aligned in the LSM cred blob, * see lsm_set_blob_size(). */ struct landlock_cred_security { /** * @domain: Immutable ruleset enforced on a task. */ struct landlock_ruleset *domain; #ifdef CONFIG_AUDIT /** * @domain_exec: Bitmask identifying the domain layers that were enforced by * the current task's executed file (i.e. no new execve(2) since * landlock_restrict_self(2)). */ u16 domain_exec; /** * @log_subdomains_off: Set if the domain descendants's log_status should be * set to %LANDLOCK_LOG_DISABLED. This is not a landlock_hierarchy * configuration because it applies to future descendant domains and it does * not require a current domain. */ u8 log_subdomains_off : 1; #endif /* CONFIG_AUDIT */ } __packed; #ifdef CONFIG_AUDIT /* Makes sure all layer executions can be stored. */ static_assert(BITS_PER_TYPE(typeof_member(struct landlock_cred_security, domain_exec)) >= LANDLOCK_MAX_NUM_LAYERS); #endif /* CONFIG_AUDIT */ static inline struct landlock_cred_security * landlock_cred(const struct cred *cred) { return cred->security + landlock_blob_sizes.lbs_cred; } static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; } /* * The call needs to come from an RCU read-side critical section. */ static inline const struct landlock_ruleset * landlock_get_task_domain(const struct task_struct *const task) { return landlock_cred(__task_cred(task))->domain; } static inline bool landlocked(const struct task_struct *const task) { bool has_dom; if (task == current) return !!landlock_get_current_domain(); rcu_read_lock(); has_dom = !!landlock_get_task_domain(task); rcu_read_unlock(); return has_dom; } /** * landlock_get_applicable_subject - Return the subject's Landlock credential * if its enforced domain applies to (i.e. * handles) at least one of the access rights * specified in @masks * * @cred: credential * @masks: access masks * @handle_layer: returned youngest layer handling a subset of @masks. Not set * if the function returns NULL. * * Returns: landlock_cred(@cred) if any access rights specified in @masks is * handled, or NULL otherwise. */ static inline const struct landlock_cred_security * landlock_get_applicable_subject(const struct cred *const cred, const struct access_masks masks, size_t *const handle_layer) { const union access_masks_all masks_all = { .masks = masks, }; const struct landlock_ruleset *domain; ssize_t layer_level; if (!cred) return NULL; domain = landlock_cred(cred)->domain; if (!domain) return NULL; for (layer_level = domain->num_layers - 1; layer_level >= 0; layer_level--) { union access_masks_all layer = { .masks = domain->access_masks[layer_level], }; if (layer.all & masks_all.all) { if (handle_layer) *handle_layer = layer_level; return landlock_cred(cred); } } return NULL; } __init void landlock_add_cred_hooks(void); #endif /* _SECURITY_LANDLOCK_CRED_H */
6 5 2 7 1 5 4 5 5 3 2 4 7 7 6 6 1 4 1 1 7 4 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2007, Frank A Kingswood <frank@kingswood-consulting.co.uk> * Copyright 2007, Werner Cornelius <werner@cornelius-consult.de> * Copyright 2009, Boris Hajduk <boris@hajduk.org> * * ch341.c implements a serial port driver for the Winchiphead CH341. * * The CH341 device can be used to implement an RS232 asynchronous * serial port, an IEEE-1284 parallel printer port or a memory-like * interface. In all cases the CH341 supports an I2C interface as well. * This driver only supports the asynchronous serial interface. */ #include <linux/kernel.h> #include <linux/tty.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/serial.h> #include <linux/unaligned.h> #define DEFAULT_BAUD_RATE 9600 #define DEFAULT_TIMEOUT 1000 /* flags for IO-Bits */ #define CH341_BIT_RTS (1 << 6) #define CH341_BIT_DTR (1 << 5) /******************************/ /* interrupt pipe definitions */ /******************************/ /* always 4 interrupt bytes */ /* first irq byte normally 0x08 */ /* second irq byte base 0x7d + below */ /* third irq byte base 0x94 + below */ /* fourth irq byte normally 0xee */ /* second interrupt byte */ #define CH341_MULT_STAT 0x04 /* multiple status since last interrupt event */ /* status returned in third interrupt answer byte, inverted in data from irq */ #define CH341_BIT_CTS 0x01 #define CH341_BIT_DSR 0x02 #define CH341_BIT_RI 0x04 #define CH341_BIT_DCD 0x08 #define CH341_BITS_MODEM_STAT 0x0f /* all bits */ /* Break support - the information used to implement this was gleaned from * the Net/FreeBSD uchcom.c driver by Takanori Watanabe. Domo arigato. */ #define CH341_REQ_READ_VERSION 0x5F #define CH341_REQ_WRITE_REG 0x9A #define CH341_REQ_READ_REG 0x95 #define CH341_REQ_SERIAL_INIT 0xA1 #define CH341_REQ_MODEM_CTRL 0xA4 #define CH341_REG_BREAK 0x05 #define CH341_REG_PRESCALER 0x12 #define CH341_REG_DIVISOR 0x13 #define CH341_REG_LCR 0x18 #define CH341_REG_LCR2 0x25 #define CH341_REG_FLOW_CTL 0x27 #define CH341_NBREAK_BITS 0x01 #define CH341_LCR_ENABLE_RX 0x80 #define CH341_LCR_ENABLE_TX 0x40 #define CH341_LCR_MARK_SPACE 0x20 #define CH341_LCR_PAR_EVEN 0x10 #define CH341_LCR_ENABLE_PAR 0x08 #define CH341_LCR_STOP_BITS_2 0x04 #define CH341_LCR_CS8 0x03 #define CH341_LCR_CS7 0x02 #define CH341_LCR_CS6 0x01 #define CH341_LCR_CS5 0x00 #define CH341_FLOW_CTL_NONE 0x00 #define CH341_FLOW_CTL_RTSCTS 0x01 #define CH341_QUIRK_LIMITED_PRESCALER BIT(0) #define CH341_QUIRK_SIMULATE_BREAK BIT(1) static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1a86, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, { USB_DEVICE(0x2184, 0x0057) }, { USB_DEVICE(0x4348, 0x5523) }, { USB_DEVICE(0x9986, 0x7523) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); struct ch341_private { spinlock_t lock; /* access lock */ unsigned baud_rate; /* set baud rate */ u8 mcr; u8 msr; u8 lcr; unsigned long quirks; u8 version; unsigned long break_end; }; static void ch341_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int ch341_control_out(struct usb_device *dev, u8 request, u16 value, u16 index) { int r; dev_dbg(&dev->dev, "%s - (%02x,%04x,%04x)\n", __func__, request, value, index); r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, index, NULL, 0, DEFAULT_TIMEOUT); if (r < 0) dev_err(&dev->dev, "failed to send control message: %d\n", r); return r; } static int ch341_control_in(struct usb_device *dev, u8 request, u16 value, u16 index, char *buf, unsigned bufsize) { int r; dev_dbg(&dev->dev, "%s - (%02x,%04x,%04x,%u)\n", __func__, request, value, index, bufsize); r = usb_control_msg_recv(dev, 0, request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT, GFP_KERNEL); if (r) { dev_err(&dev->dev, "failed to receive control message: %d\n", r); return r; } return 0; } #define CH341_CLKRATE 48000000 #define CH341_CLK_DIV(ps, fact) (1 << (12 - 3 * (ps) - (fact))) #define CH341_MIN_RATE(ps) (CH341_CLKRATE / (CH341_CLK_DIV((ps), 1) * 512)) static const speed_t ch341_min_rates[] = { CH341_MIN_RATE(0), CH341_MIN_RATE(1), CH341_MIN_RATE(2), CH341_MIN_RATE(3), }; /* Supported range is 46 to 3000000 bps. */ #define CH341_MIN_BPS DIV_ROUND_UP(CH341_CLKRATE, CH341_CLK_DIV(0, 0) * 256) #define CH341_MAX_BPS (CH341_CLKRATE / (CH341_CLK_DIV(3, 0) * 2)) /* * The device line speed is given by the following equation: * * baudrate = 48000000 / (2^(12 - 3 * ps - fact) * div), where * * 0 <= ps <= 3, * 0 <= fact <= 1, * 2 <= div <= 256 if fact = 0, or * 9 <= div <= 256 if fact = 1 */ static int ch341_get_divisor(struct ch341_private *priv, speed_t speed) { unsigned int fact, div, clk_div; bool force_fact0 = false; int ps; /* * Clamp to supported range, this makes the (ps < 0) and (div < 2) * sanity checks below redundant. */ speed = clamp_val(speed, CH341_MIN_BPS, CH341_MAX_BPS); /* * Start with highest possible base clock (fact = 1) that will give a * divisor strictly less than 512. */ fact = 1; for (ps = 3; ps >= 0; ps--) { if (speed > ch341_min_rates[ps]) break; } if (ps < 0) return -EINVAL; /* Determine corresponding divisor, rounding down. */ clk_div = CH341_CLK_DIV(ps, fact); div = CH341_CLKRATE / (clk_div * speed); /* Some devices require a lower base clock if ps < 3. */ if (ps < 3 && (priv->quirks & CH341_QUIRK_LIMITED_PRESCALER)) force_fact0 = true; /* Halve base clock (fact = 0) if required. */ if (div < 9 || div > 255 || force_fact0) { div /= 2; clk_div *= 2; fact = 0; } if (div < 2) return -EINVAL; /* * Pick next divisor if resulting rate is closer to the requested one, * scale up to avoid rounding errors on low rates. */ if (16 * CH341_CLKRATE / (clk_div * div) - 16 * speed >= 16 * speed - 16 * CH341_CLKRATE / (clk_div * (div + 1))) div++; /* * Prefer lower base clock (fact = 0) if even divisor. * * Note that this makes the receiver more tolerant to errors. */ if (fact == 1 && div % 2 == 0) { div /= 2; fact = 0; } return (0x100 - div) << 8 | fact << 2 | ps; } static int ch341_set_baudrate_lcr(struct usb_device *dev, struct ch341_private *priv, speed_t baud_rate, u8 lcr) { int val; int r; if (!baud_rate) return -EINVAL; val = ch341_get_divisor(priv, baud_rate); if (val < 0) return -EINVAL; /* * CH341A buffers data until a full endpoint-size packet (32 bytes) * has been received unless bit 7 is set. * * At least one device with version 0x27 appears to have this bit * inverted. */ if (priv->version > 0x27) val |= BIT(7); r = ch341_control_out(dev, CH341_REQ_WRITE_REG, CH341_REG_DIVISOR << 8 | CH341_REG_PRESCALER, val); if (r) return r; /* * Chip versions before version 0x30 as read using * CH341_REQ_READ_VERSION used separate registers for line control * (stop bits, parity and word length). Version 0x30 and above use * CH341_REG_LCR only and CH341_REG_LCR2 is always set to zero. */ if (priv->version < 0x30) return 0; r = ch341_control_out(dev, CH341_REQ_WRITE_REG, CH341_REG_LCR2 << 8 | CH341_REG_LCR, lcr); if (r) return r; return r; } static int ch341_set_handshake(struct usb_device *dev, u8 control) { return ch341_control_out(dev, CH341_REQ_MODEM_CTRL, ~control, 0); } static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) { const unsigned int size = 2; u8 buffer[2]; int r; unsigned long flags; r = ch341_control_in(dev, CH341_REQ_READ_REG, 0x0706, 0, buffer, size); if (r) return r; spin_lock_irqsave(&priv->lock, flags); priv->msr = (~(*buffer)) & CH341_BITS_MODEM_STAT; spin_unlock_irqrestore(&priv->lock, flags); return 0; } /* -------------------------------------------------------------------------- */ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) { const unsigned int size = 2; u8 buffer[2]; int r; /* expect two bytes 0x27 0x00 */ r = ch341_control_in(dev, CH341_REQ_READ_VERSION, 0, 0, buffer, size); if (r) return r; priv->version = buffer[0]; dev_dbg(&dev->dev, "Chip version: 0x%02x\n", priv->version); r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, 0, 0); if (r < 0) return r; r = ch341_set_baudrate_lcr(dev, priv, priv->baud_rate, priv->lcr); if (r < 0) return r; r = ch341_set_handshake(dev, priv->mcr); if (r < 0) return r; return 0; } static int ch341_detect_quirks(struct usb_serial_port *port) { struct ch341_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; const unsigned int size = 2; unsigned long quirks = 0; u8 buffer[2]; int r; /* * A subset of CH34x devices does not support all features. The * prescaler is limited and there is no support for sending a RS232 * break condition. A read failure when trying to set up the latter is * used to detect these devices. */ r = usb_control_msg_recv(udev, 0, CH341_REQ_READ_REG, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, CH341_REG_BREAK, 0, &buffer, size, DEFAULT_TIMEOUT, GFP_KERNEL); if (r == -EPIPE) { dev_info(&port->dev, "break control not supported, using simulated break\n"); quirks = CH341_QUIRK_LIMITED_PRESCALER | CH341_QUIRK_SIMULATE_BREAK; r = 0; } else if (r) { dev_err(&port->dev, "failed to read break control: %d\n", r); } if (quirks) { dev_dbg(&port->dev, "enabling quirk flags: 0x%02lx\n", quirks); priv->quirks |= quirks; } return r; } static int ch341_port_probe(struct usb_serial_port *port) { struct ch341_private *priv; int r; priv = kzalloc(sizeof(struct ch341_private), GFP_KERNEL); if (!priv) return -ENOMEM; spin_lock_init(&priv->lock); priv->baud_rate = DEFAULT_BAUD_RATE; /* * Some CH340 devices appear unable to change the initial LCR * settings, so set a sane 8N1 default. */ priv->lcr = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX | CH341_LCR_CS8; r = ch341_configure(port->serial->dev, priv); if (r < 0) goto error; usb_set_serial_port_data(port, priv); r = ch341_detect_quirks(port); if (r < 0) goto error; return 0; error: kfree(priv); return r; } static void ch341_port_remove(struct usb_serial_port *port) { struct ch341_private *priv; priv = usb_get_serial_port_data(port); kfree(priv); } static int ch341_carrier_raised(struct usb_serial_port *port) { struct ch341_private *priv = usb_get_serial_port_data(port); if (priv->msr & CH341_BIT_DCD) return 1; return 0; } static void ch341_dtr_rts(struct usb_serial_port *port, int on) { struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; /* drop DTR and RTS */ spin_lock_irqsave(&priv->lock, flags); if (on) priv->mcr |= CH341_BIT_RTS | CH341_BIT_DTR; else priv->mcr &= ~(CH341_BIT_RTS | CH341_BIT_DTR); spin_unlock_irqrestore(&priv->lock, flags); ch341_set_handshake(port->serial->dev, priv->mcr); } static void ch341_close(struct usb_serial_port *port) { usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); } /* open this device, set default parameters */ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) { struct ch341_private *priv = usb_get_serial_port_data(port); int r; if (tty) ch341_set_termios(tty, port, NULL); dev_dbg(&port->dev, "%s - submitting interrupt urb\n", __func__); r = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (r) { dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n", __func__, r); return r; } r = ch341_get_status(port->serial->dev, priv); if (r < 0) { dev_err(&port->dev, "failed to read modem status: %d\n", r); goto err_kill_interrupt_urb; } r = usb_serial_generic_open(tty, port); if (r) goto err_kill_interrupt_urb; return 0; err_kill_interrupt_urb: usb_kill_urb(port->interrupt_in_urb); return r; } static void ch341_set_flow_control(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { u16 flow_ctl; int r; if (C_CRTSCTS(tty)) flow_ctl = CH341_FLOW_CTL_RTSCTS; else flow_ctl = CH341_FLOW_CTL_NONE; r = ch341_control_out(port->serial->dev, CH341_REQ_WRITE_REG, (CH341_REG_FLOW_CTL << 8) | CH341_REG_FLOW_CTL, (flow_ctl << 8) | flow_ctl); if (r < 0 && old_termios) { tty->termios.c_cflag &= ~CRTSCTS; tty->termios.c_cflag |= (old_termios->c_cflag & CRTSCTS); } } /* Old_termios contains the original termios settings and * tty->termios contains the new setting to be used. */ static void ch341_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct ch341_private *priv = usb_get_serial_port_data(port); unsigned baud_rate; unsigned long flags; u8 lcr; int r; /* redundant changes may cause the chip to lose bytes */ if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios)) return; baud_rate = tty_get_baud_rate(tty); lcr = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX; switch (C_CSIZE(tty)) { case CS5: lcr |= CH341_LCR_CS5; break; case CS6: lcr |= CH341_LCR_CS6; break; case CS7: lcr |= CH341_LCR_CS7; break; case CS8: lcr |= CH341_LCR_CS8; break; } if (C_PARENB(tty)) { lcr |= CH341_LCR_ENABLE_PAR; if (C_PARODD(tty) == 0) lcr |= CH341_LCR_PAR_EVEN; if (C_CMSPAR(tty)) lcr |= CH341_LCR_MARK_SPACE; } if (C_CSTOPB(tty)) lcr |= CH341_LCR_STOP_BITS_2; if (baud_rate) { priv->baud_rate = baud_rate; r = ch341_set_baudrate_lcr(port->serial->dev, priv, priv->baud_rate, lcr); if (r < 0 && old_termios) { priv->baud_rate = tty_termios_baud_rate(old_termios); tty_termios_copy_hw(&tty->termios, old_termios); } else if (r == 0) { priv->lcr = lcr; } } spin_lock_irqsave(&priv->lock, flags); if (C_BAUD(tty) == B0) priv->mcr &= ~(CH341_BIT_DTR | CH341_BIT_RTS); else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) priv->mcr |= (CH341_BIT_DTR | CH341_BIT_RTS); spin_unlock_irqrestore(&priv->lock, flags); ch341_set_handshake(port->serial->dev, priv->mcr); ch341_set_flow_control(tty, port, old_termios); } /* * A subset of all CH34x devices don't support a real break condition and * reading CH341_REG_BREAK fails (see also ch341_detect_quirks). This function * simulates a break condition by lowering the baud rate to the minimum * supported by the hardware upon enabling the break condition and sending * a NUL byte. * * Incoming data is corrupted while the break condition is being simulated. * * Normally the duration of the break condition can be controlled individually * by userspace using TIOCSBRK and TIOCCBRK or by passing an argument to * TCSBRKP. Due to how the simulation is implemented the duration can't be * controlled. The duration is always about (1s / 46bd * 9bit) = 196ms. */ static int ch341_simulate_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long now, delay; int r, r2; if (break_state != 0) { dev_dbg(&port->dev, "enter break state requested\n"); r = ch341_set_baudrate_lcr(port->serial->dev, priv, CH341_MIN_BPS, CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX | CH341_LCR_CS8); if (r < 0) { dev_err(&port->dev, "failed to change baud rate to %u: %d\n", CH341_MIN_BPS, r); goto restore; } r = tty_put_char(tty, '\0'); if (r < 0) { dev_err(&port->dev, "failed to write NUL byte for simulated break condition: %d\n", r); goto restore; } /* * Compute expected transmission duration including safety * margin. The original baud rate is only restored after the * computed point in time. * * 11 bits = 1 start, 8 data, 1 stop, 1 margin */ priv->break_end = jiffies + (11 * HZ / CH341_MIN_BPS); return 0; } dev_dbg(&port->dev, "leave break state requested\n"); now = jiffies; if (time_before(now, priv->break_end)) { /* Wait until NUL byte is written */ delay = priv->break_end - now; dev_dbg(&port->dev, "wait %d ms while transmitting NUL byte at %u baud\n", jiffies_to_msecs(delay), CH341_MIN_BPS); schedule_timeout_interruptible(delay); } r = 0; restore: /* Restore original baud rate */ r2 = ch341_set_baudrate_lcr(port->serial->dev, priv, priv->baud_rate, priv->lcr); if (r2 < 0) { dev_err(&port->dev, "restoring original baud rate of %u failed: %d\n", priv->baud_rate, r2); return r2; } return r; } static int ch341_break_ctl(struct tty_struct *tty, int break_state) { const u16 ch341_break_reg = (CH341_REG_LCR << 8) | CH341_REG_BREAK; struct usb_serial_port *port = tty->driver_data; struct ch341_private *priv = usb_get_serial_port_data(port); u16 reg_contents; u8 break_reg[2]; int r; if (priv->quirks & CH341_QUIRK_SIMULATE_BREAK) return ch341_simulate_break(tty, break_state); r = ch341_control_in(port->serial->dev, CH341_REQ_READ_REG, ch341_break_reg, 0, break_reg, 2); if (r) { dev_err(&port->dev, "%s - USB control read error (%d)\n", __func__, r); if (r > 0) r = -EIO; return r; } dev_dbg(&port->dev, "%s - initial ch341 break register contents - reg1: %x, reg2: %x\n", __func__, break_reg[0], break_reg[1]); if (break_state != 0) { dev_dbg(&port->dev, "%s - Enter break state requested\n", __func__); break_reg[0] &= ~CH341_NBREAK_BITS; break_reg[1] &= ~CH341_LCR_ENABLE_TX; } else { dev_dbg(&port->dev, "%s - Leave break state requested\n", __func__); break_reg[0] |= CH341_NBREAK_BITS; break_reg[1] |= CH341_LCR_ENABLE_TX; } dev_dbg(&port->dev, "%s - New ch341 break register contents - reg1: %x, reg2: %x\n", __func__, break_reg[0], break_reg[1]); reg_contents = get_unaligned_le16(break_reg); r = ch341_control_out(port->serial->dev, CH341_REQ_WRITE_REG, ch341_break_reg, reg_contents); if (r < 0) { dev_err(&port->dev, "%s - USB control write error (%d)\n", __func__, r); return r; } return 0; } static int ch341_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; u8 control; spin_lock_irqsave(&priv->lock, flags); if (set & TIOCM_RTS) priv->mcr |= CH341_BIT_RTS; if (set & TIOCM_DTR) priv->mcr |= CH341_BIT_DTR; if (clear & TIOCM_RTS) priv->mcr &= ~CH341_BIT_RTS; if (clear & TIOCM_DTR) priv->mcr &= ~CH341_BIT_DTR; control = priv->mcr; spin_unlock_irqrestore(&priv->lock, flags); return ch341_set_handshake(port->serial->dev, control); } static void ch341_update_status(struct usb_serial_port *port, unsigned char *data, size_t len) { struct ch341_private *priv = usb_get_serial_port_data(port); struct tty_struct *tty; unsigned long flags; u8 status; u8 delta; if (len < 4) return; status = ~data[2] & CH341_BITS_MODEM_STAT; spin_lock_irqsave(&priv->lock, flags); delta = status ^ priv->msr; priv->msr = status; spin_unlock_irqrestore(&priv->lock, flags); if (data[1] & CH341_MULT_STAT) dev_dbg(&port->dev, "%s - multiple status change\n", __func__); if (!delta) return; if (delta & CH341_BIT_CTS) port->icount.cts++; if (delta & CH341_BIT_DSR) port->icount.dsr++; if (delta & CH341_BIT_RI) port->icount.rng++; if (delta & CH341_BIT_DCD) { port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) { usb_serial_handle_dcd_change(port, tty, status & CH341_BIT_DCD); tty_kref_put(tty); } } wake_up_interruptible(&port->port.delta_msr_wait); } static void ch341_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned int len = urb->actual_length; int status; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&urb->dev->dev, "%s - urb shutting down: %d\n", __func__, urb->status); return; default: dev_dbg(&urb->dev->dev, "%s - nonzero urb status: %d\n", __func__, urb->status); goto exit; } usb_serial_debug_data(&port->dev, __func__, len, data); ch341_update_status(port, data, len); exit: status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { dev_err(&urb->dev->dev, "%s - usb_submit_urb failed: %d\n", __func__, status); } } static int ch341_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; u8 mcr; u8 status; unsigned int result; spin_lock_irqsave(&priv->lock, flags); mcr = priv->mcr; status = priv->msr; spin_unlock_irqrestore(&priv->lock, flags); result = ((mcr & CH341_BIT_DTR) ? TIOCM_DTR : 0) | ((mcr & CH341_BIT_RTS) ? TIOCM_RTS : 0) | ((status & CH341_BIT_CTS) ? TIOCM_CTS : 0) | ((status & CH341_BIT_DSR) ? TIOCM_DSR : 0) | ((status & CH341_BIT_RI) ? TIOCM_RI : 0) | ((status & CH341_BIT_DCD) ? TIOCM_CD : 0); dev_dbg(&port->dev, "%s - result = %x\n", __func__, result); return result; } static int ch341_reset_resume(struct usb_serial *serial) { struct usb_serial_port *port = serial->port[0]; struct ch341_private *priv; int ret; priv = usb_get_serial_port_data(port); if (!priv) return 0; /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); if (tty_port_initialized(&port->port)) { ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); if (ret) { dev_err(&port->dev, "failed to submit interrupt urb: %d\n", ret); return ret; } ret = ch341_get_status(port->serial->dev, priv); if (ret < 0) { dev_err(&port->dev, "failed to read modem status: %d\n", ret); } } return usb_serial_generic_resume(serial); } static struct usb_serial_driver ch341_device = { .driver = { .name = "ch341-uart", }, .id_table = id_table, .num_ports = 1, .open = ch341_open, .dtr_rts = ch341_dtr_rts, .carrier_raised = ch341_carrier_raised, .close = ch341_close, .set_termios = ch341_set_termios, .break_ctl = ch341_break_ctl, .tiocmget = ch341_tiocmget, .tiocmset = ch341_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .read_int_callback = ch341_read_int_callback, .port_probe = ch341_port_probe, .port_remove = ch341_port_remove, .reset_resume = ch341_reset_resume, }; static struct usb_serial_driver * const serial_drivers[] = { &ch341_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_DESCRIPTION("Winchiphead CH341 USB Serial driver"); MODULE_LICENSE("GPL v2");
10 11 11 11 10 9 9 18 11 2 1 1 1 1 1 6 6 10 10 10 4 6 4 4 4 3 1 4 4 1 4 10 10 4 2 4 4 4 3 1 10 10 10 10 7 3 10 6 3 9 9 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 // SPDX-License-Identifier: GPL-2.0-only /* * super.c * * PURPOSE * Super block routines for the OSTA-UDF(tm) filesystem. * * DESCRIPTION * OSTA-UDF(tm) = Optical Storage Technology Association * Universal Disk Format. * * This code is based on version 2.00 of the UDF specification, * and revision 3 of the ECMA 167 standard [equivalent to ISO 13346]. * http://www.osta.org/ * https://www.ecma.ch/ * https://www.iso.org/ * * COPYRIGHT * (C) 1998 Dave Boynton * (C) 1998-2004 Ben Fennema * (C) 2000 Stelias Computing Inc * * HISTORY * * 09/24/98 dgb changed to allow compiling outside of kernel, and * added some debugging. * 10/01/98 dgb updated to allow (some) possibility of compiling w/2.0.34 * 10/16/98 attempting some multi-session support * 10/17/98 added freespace count for "df" * 11/11/98 gr added novrs option * 11/26/98 dgb added fileset,anchor mount options * 12/06/98 blf really hosed things royally. vat/sparing support. sequenced * vol descs. rewrote option handling based on isofs * 12/20/98 find the free space bitmap (if it exists) */ #include "udfdecl.h" #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/stat.h> #include <linux/cdrom.h> #include <linux/nls.h> #include <linux/vfs.h> #include <linux/vmalloc.h> #include <linux/errno.h> #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/crc-itu-t.h> #include <linux/log2.h> #include <asm/byteorder.h> #include <linux/iversion.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include "udf_sb.h" #include "udf_i.h" #include <linux/init.h> #include <linux/uaccess.h> enum { VDS_POS_PRIMARY_VOL_DESC, VDS_POS_UNALLOC_SPACE_DESC, VDS_POS_LOGICAL_VOL_DESC, VDS_POS_IMP_USE_VOL_DESC, VDS_POS_LENGTH }; #define VSD_FIRST_SECTOR_OFFSET 32768 #define VSD_MAX_SECTOR_OFFSET 0x800000 /* * Maximum number of Terminating Descriptor / Logical Volume Integrity * Descriptor redirections. The chosen numbers are arbitrary - just that we * hopefully don't limit any real use of rewritten inode on write-once media * but avoid looping for too long on corrupted media. */ #define UDF_MAX_TD_NESTING 64 #define UDF_MAX_LVID_NESTING 1000 enum { UDF_MAX_LINKS = 0xffff }; /* * We limit filesize to 4TB. This is arbitrary as the on-disk format supports * more but because the file space is described by a linked list of extents, * each of which can have at most 1GB, the creation and handling of extents * gets unusably slow beyond certain point... */ #define UDF_MAX_FILESIZE (1ULL << 42) /* These are the "meat" - everything else is stuffing */ static int udf_fill_super(struct super_block *sb, struct fs_context *fc); static void udf_put_super(struct super_block *); static int udf_sync_fs(struct super_block *, int); static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad); static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); static int udf_show_options(struct seq_file *, struct dentry *); static int udf_init_fs_context(struct fs_context *fc); static int udf_parse_param(struct fs_context *fc, struct fs_parameter *param); static int udf_reconfigure(struct fs_context *fc); static void udf_free_fc(struct fs_context *fc); static const struct fs_parameter_spec udf_param_spec[]; struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) { struct logicalVolIntegrityDesc *lvid; unsigned int partnum; unsigned int offset; if (!UDF_SB(sb)->s_lvid_bh) return NULL; lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; partnum = le32_to_cpu(lvid->numOfPartitions); /* The offset is to skip freeSpaceTable and sizeTable arrays */ offset = partnum * 2 * sizeof(uint32_t); return (struct logicalVolIntegrityDescImpUse *) (((uint8_t *)(lvid + 1)) + offset); } /* UDF filesystem type */ static int udf_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, udf_fill_super); } static const struct fs_context_operations udf_context_ops = { .parse_param = udf_parse_param, .get_tree = udf_get_tree, .reconfigure = udf_reconfigure, .free = udf_free_fc, }; static struct file_system_type udf_fstype = { .owner = THIS_MODULE, .name = "udf", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = udf_init_fs_context, .parameters = udf_param_spec, }; MODULE_ALIAS_FS("udf"); static struct kmem_cache *udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { struct udf_inode_info *ei; ei = alloc_inode_sb(sb, udf_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->i_unique = 0; ei->i_lenExtents = 0; ei->i_lenStreams = 0; ei->i_next_alloc_block = 0; ei->i_next_alloc_goal = 0; ei->i_strat4096 = 0; ei->i_streamdir = 0; ei->i_hidden = 0; init_rwsem(&ei->i_data_sem); ei->cached_extent.lstart = -1; spin_lock_init(&ei->i_extent_cache_lock); inode_set_iversion(&ei->vfs_inode, 1); return &ei->vfs_inode; } static void udf_free_in_core_inode(struct inode *inode) { kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } static void init_once(void *foo) { struct udf_inode_info *ei = foo; ei->i_data = NULL; inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { udf_inode_cachep = kmem_cache_create("udf_inode_cache", sizeof(struct udf_inode_info), 0, (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once); if (!udf_inode_cachep) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(udf_inode_cachep); } /* Superblock operations */ static const struct super_operations udf_sb_ops = { .alloc_inode = udf_alloc_inode, .free_inode = udf_free_in_core_inode, .write_inode = udf_write_inode, .evict_inode = udf_evict_inode, .put_super = udf_put_super, .sync_fs = udf_sync_fs, .statfs = udf_statfs, .show_options = udf_show_options, }; struct udf_options { unsigned int blocksize; unsigned int session; unsigned int lastblock; unsigned int anchor; unsigned int flags; umode_t umask; kgid_t gid; kuid_t uid; umode_t fmode; umode_t dmode; struct nls_table *nls_map; }; /* * UDF has historically preserved prior mount options across * a remount, so copy those here if remounting, otherwise set * initial mount defaults. */ static void udf_init_options(struct fs_context *fc, struct udf_options *uopt) { if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { struct super_block *sb = fc->root->d_sb; struct udf_sb_info *sbi = UDF_SB(sb); uopt->flags = sbi->s_flags; uopt->uid = sbi->s_uid; uopt->gid = sbi->s_gid; uopt->umask = sbi->s_umask; uopt->fmode = sbi->s_fmode; uopt->dmode = sbi->s_dmode; uopt->nls_map = NULL; } else { uopt->flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); /* * By default we'll use overflow[ug]id when UDF * inode [ug]id == -1 */ uopt->uid = make_kuid(current_user_ns(), overflowuid); uopt->gid = make_kgid(current_user_ns(), overflowgid); uopt->umask = 0; uopt->fmode = UDF_INVALID_MODE; uopt->dmode = UDF_INVALID_MODE; uopt->nls_map = NULL; uopt->session = 0xFFFFFFFF; } } static int udf_init_fs_context(struct fs_context *fc) { struct udf_options *uopt; uopt = kzalloc(sizeof(*uopt), GFP_KERNEL); if (!uopt) return -ENOMEM; udf_init_options(fc, uopt); fc->fs_private = uopt; fc->ops = &udf_context_ops; return 0; } static void udf_free_fc(struct fs_context *fc) { struct udf_options *uopt = fc->fs_private; unload_nls(uopt->nls_map); kfree(fc->fs_private); } static int __init init_udf_fs(void) { int err; err = init_inodecache(); if (err) goto out1; err = register_filesystem(&udf_fstype); if (err) goto out; return 0; out: destroy_inodecache(); out1: return err; } static void __exit exit_udf_fs(void) { unregister_filesystem(&udf_fstype); destroy_inodecache(); } static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) { struct udf_sb_info *sbi = UDF_SB(sb); sbi->s_partmaps = kcalloc(count, sizeof(*sbi->s_partmaps), GFP_KERNEL); if (!sbi->s_partmaps) { sbi->s_partitions = 0; return -ENOMEM; } sbi->s_partitions = count; return 0; } static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) { int i; int nr_groups = bitmap->s_nr_groups; for (i = 0; i < nr_groups; i++) if (!IS_ERR_OR_NULL(bitmap->s_block_bitmap[i])) brelse(bitmap->s_block_bitmap[i]); kvfree(bitmap); } static void udf_free_partition(struct udf_part_map *map) { int i; struct udf_meta_data *mdata; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) iput(map->s_uspace.s_table); if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) udf_sb_free_bitmap(map->s_uspace.s_bitmap); if (map->s_partition_type == UDF_SPARABLE_MAP15) for (i = 0; i < 4; i++) brelse(map->s_type_specific.s_sparing.s_spar_map[i]); else if (map->s_partition_type == UDF_METADATA_MAP25) { mdata = &map->s_type_specific.s_metadata; iput(mdata->s_metadata_fe); mdata->s_metadata_fe = NULL; iput(mdata->s_mirror_fe); mdata->s_mirror_fe = NULL; iput(mdata->s_bitmap_fe); mdata->s_bitmap_fe = NULL; } } static void udf_sb_free_partitions(struct super_block *sb) { struct udf_sb_info *sbi = UDF_SB(sb); int i; if (!sbi->s_partmaps) return; for (i = 0; i < sbi->s_partitions; i++) udf_free_partition(&sbi->s_partmaps[i]); kfree(sbi->s_partmaps); sbi->s_partmaps = NULL; } static int udf_show_options(struct seq_file *seq, struct dentry *root) { struct super_block *sb = root->d_sb; struct udf_sb_info *sbi = UDF_SB(sb); if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) seq_puts(seq, ",nostrict"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_BLOCKSIZE_SET)) seq_printf(seq, ",bs=%lu", sb->s_blocksize); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) seq_puts(seq, ",unhide"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE)) seq_puts(seq, ",undelete"); if (!UDF_QUERY_FLAG(sb, UDF_FLAG_USE_AD_IN_ICB)) seq_puts(seq, ",noadinicb"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_USE_SHORT_AD)) seq_puts(seq, ",shortad"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_FORGET)) seq_puts(seq, ",uid=forget"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_FORGET)) seq_puts(seq, ",gid=forget"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid)); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, sbi->s_gid)); if (sbi->s_umask != 0) seq_printf(seq, ",umask=%ho", sbi->s_umask); if (sbi->s_fmode != UDF_INVALID_MODE) seq_printf(seq, ",mode=%ho", sbi->s_fmode); if (sbi->s_dmode != UDF_INVALID_MODE) seq_printf(seq, ",dmode=%ho", sbi->s_dmode); if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) seq_printf(seq, ",session=%d", sbi->s_session); if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); if (sbi->s_nls_map) seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset); else seq_puts(seq, ",iocharset=utf8"); return 0; } /* * udf_parse_param * * PURPOSE * Parse mount options. * * DESCRIPTION * The following mount options are supported: * * gid= Set the default group. * umask= Set the default umask. * mode= Set the default file permissions. * dmode= Set the default directory permissions. * uid= Set the default user. * bs= Set the block size. * unhide Show otherwise hidden files. * undelete Show deleted files in lists. * adinicb Embed data in the inode (default) * noadinicb Don't embed data in the inode * shortad Use short ad's * longad Use long ad's (default) * nostrict Unset strict conformance * iocharset= Set the NLS character set * * The remaining are for debugging and disaster recovery: * * novrs Skip volume sequence recognition * * The following expect a offset from 0. * * session= Set the CDROM session (default= last session) * anchor= Override standard anchor location. (default= 256) * volume= Override the VolumeDesc location. (unused) * partition= Override the PartitionDesc location. (unused) * lastblock= Set the last block of the filesystem/ * * The following expect a offset from the partition root. * * fileset= Override the fileset block location. (unused) * rootdir= Override the root directory location. (unused) * WARNING: overriding the rootdir to a non-directory may * yield highly unpredictable results. * * PRE-CONDITIONS * fc fs_context with pointer to mount options variable. * param Pointer to fs_parameter being parsed. * * POST-CONDITIONS * <return> 0 Mount options parsed okay. * <return> errno Error parsing mount options. * * HISTORY * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ enum { Opt_novrs, Opt_nostrict, Opt_bs, Opt_unhide, Opt_undelete, Opt_noadinicb, Opt_adinicb, Opt_shortad, Opt_longad, Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock, Opt_anchor, Opt_volume, Opt_partition, Opt_fileset, Opt_rootdir, Opt_utf8, Opt_iocharset, Opt_err, Opt_fmode, Opt_dmode }; static const struct fs_parameter_spec udf_param_spec[] = { fsparam_flag ("novrs", Opt_novrs), fsparam_flag ("nostrict", Opt_nostrict), fsparam_u32 ("bs", Opt_bs), fsparam_flag ("unhide", Opt_unhide), fsparam_flag ("undelete", Opt_undelete), fsparam_flag_no ("adinicb", Opt_adinicb), fsparam_flag ("shortad", Opt_shortad), fsparam_flag ("longad", Opt_longad), fsparam_string ("gid", Opt_gid), fsparam_string ("uid", Opt_uid), fsparam_u32 ("umask", Opt_umask), fsparam_u32 ("session", Opt_session), fsparam_u32 ("lastblock", Opt_lastblock), fsparam_u32 ("anchor", Opt_anchor), fsparam_u32 ("volume", Opt_volume), fsparam_u32 ("partition", Opt_partition), fsparam_u32 ("fileset", Opt_fileset), fsparam_u32 ("rootdir", Opt_rootdir), fsparam_flag ("utf8", Opt_utf8), fsparam_string ("iocharset", Opt_iocharset), fsparam_u32 ("mode", Opt_fmode), fsparam_u32 ("dmode", Opt_dmode), {} }; static int udf_parse_param(struct fs_context *fc, struct fs_parameter *param) { unsigned int uv; unsigned int n; struct udf_options *uopt = fc->fs_private; struct fs_parse_result result; int token; bool remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE); token = fs_parse(fc, udf_param_spec, param, &result); if (token < 0) return token; switch (token) { case Opt_novrs: uopt->flags |= (1 << UDF_FLAG_NOVRS); break; case Opt_bs: n = result.uint_32; if (n != 512 && n != 1024 && n != 2048 && n != 4096) return -EINVAL; uopt->blocksize = n; uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); break; case Opt_unhide: uopt->flags |= (1 << UDF_FLAG_UNHIDE); break; case Opt_undelete: uopt->flags |= (1 << UDF_FLAG_UNDELETE); break; case Opt_adinicb: if (result.negated) uopt->flags &= ~(1 << UDF_FLAG_USE_AD_IN_ICB); else uopt->flags |= (1 << UDF_FLAG_USE_AD_IN_ICB); break; case Opt_shortad: uopt->flags |= (1 << UDF_FLAG_USE_SHORT_AD); break; case Opt_longad: uopt->flags &= ~(1 << UDF_FLAG_USE_SHORT_AD); break; case Opt_gid: if (kstrtoint(param->string, 10, &uv) == 0) { kgid_t gid = make_kgid(current_user_ns(), uv); if (!gid_valid(gid)) return -EINVAL; uopt->gid = gid; uopt->flags |= (1 << UDF_FLAG_GID_SET); } else if (!strcmp(param->string, "forget")) { uopt->flags |= (1 << UDF_FLAG_GID_FORGET); } else if (!strcmp(param->string, "ignore")) { /* this option is superseded by gid=<number> */ ; } else { return -EINVAL; } break; case Opt_uid: if (kstrtoint(param->string, 10, &uv) == 0) { kuid_t uid = make_kuid(current_user_ns(), uv); if (!uid_valid(uid)) return -EINVAL; uopt->uid = uid; uopt->flags |= (1 << UDF_FLAG_UID_SET); } else if (!strcmp(param->string, "forget")) { uopt->flags |= (1 << UDF_FLAG_UID_FORGET); } else if (!strcmp(param->string, "ignore")) { /* this option is superseded by uid=<number> */ ; } else { return -EINVAL; } break; case Opt_umask: uopt->umask = result.uint_32; break; case Opt_nostrict: uopt->flags &= ~(1 << UDF_FLAG_STRICT); break; case Opt_session: uopt->session = result.uint_32; if (!remount) uopt->flags |= (1 << UDF_FLAG_SESSION_SET); break; case Opt_lastblock: uopt->lastblock = result.uint_32; if (!remount) uopt->flags |= (1 << UDF_FLAG_LASTBLOCK_SET); break; case Opt_anchor: uopt->anchor = result.uint_32; break; case Opt_volume: case Opt_partition: case Opt_fileset: case Opt_rootdir: /* Ignored (never implemented properly) */ break; case Opt_utf8: if (!remount) { unload_nls(uopt->nls_map); uopt->nls_map = NULL; } break; case Opt_iocharset: if (!remount) { unload_nls(uopt->nls_map); uopt->nls_map = NULL; } /* When nls_map is not loaded then UTF-8 is used */ if (!remount && strcmp(param->string, "utf8") != 0) { uopt->nls_map = load_nls(param->string); if (!uopt->nls_map) { errorf(fc, "iocharset %s not found", param->string); return -EINVAL; } } break; case Opt_fmode: uopt->fmode = result.uint_32 & 0777; break; case Opt_dmode: uopt->dmode = result.uint_32 & 0777; break; default: return -EINVAL; } return 0; } static int udf_reconfigure(struct fs_context *fc) { struct udf_options *uopt = fc->fs_private; struct super_block *sb = fc->root->d_sb; struct udf_sb_info *sbi = UDF_SB(sb); int readonly = fc->sb_flags & SB_RDONLY; int error = 0; if (!readonly && UDF_QUERY_FLAG(sb, UDF_FLAG_RW_INCOMPAT)) return -EACCES; sync_filesystem(sb); write_lock(&sbi->s_cred_lock); sbi->s_flags = uopt->flags; sbi->s_uid = uopt->uid; sbi->s_gid = uopt->gid; sbi->s_umask = uopt->umask; sbi->s_fmode = uopt->fmode; sbi->s_dmode = uopt->dmode; write_unlock(&sbi->s_cred_lock); if (readonly == sb_rdonly(sb)) goto out_unlock; if (readonly) udf_close_lvid(sb); else udf_open_lvid(sb); out_unlock: return error; } /* * Check VSD descriptor. Returns -1 in case we are at the end of volume * recognition area, 0 if the descriptor is valid but non-interesting, 1 if * we found one of NSR descriptors we are looking for. */ static int identify_vsd(const struct volStructDesc *vsd) { int ret = 0; if (!memcmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) { switch (vsd->structType) { case 0: udf_debug("ISO9660 Boot Record found\n"); break; case 1: udf_debug("ISO9660 Primary Volume Descriptor found\n"); break; case 2: udf_debug("ISO9660 Supplementary Volume Descriptor found\n"); break; case 3: udf_debug("ISO9660 Volume Partition Descriptor found\n"); break; case 255: udf_debug("ISO9660 Volume Descriptor Set Terminator found\n"); break; default: udf_debug("ISO9660 VRS (%u) found\n", vsd->structType); break; } } else if (!memcmp(vsd->stdIdent, VSD_STD_ID_BEA01, VSD_STD_ID_LEN)) ; /* ret = 0 */ else if (!memcmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) ret = 1; else if (!memcmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) ret = 1; else if (!memcmp(vsd->stdIdent, VSD_STD_ID_BOOT2, VSD_STD_ID_LEN)) ; /* ret = 0 */ else if (!memcmp(vsd->stdIdent, VSD_STD_ID_CDW02, VSD_STD_ID_LEN)) ; /* ret = 0 */ else { /* TEA01 or invalid id : end of volume recognition area */ ret = -1; } return ret; } /* * Check Volume Structure Descriptors (ECMA 167 2/9.1) * We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) * @return 1 if NSR02 or NSR03 found, * -1 if first sector read error, 0 otherwise */ static int udf_check_vsd(struct super_block *sb) { struct volStructDesc *vsd = NULL; loff_t sector = VSD_FIRST_SECTOR_OFFSET; int sectorsize; struct buffer_head *bh = NULL; int nsr = 0; struct udf_sb_info *sbi; loff_t session_offset; sbi = UDF_SB(sb); if (sb->s_blocksize < sizeof(struct volStructDesc)) sectorsize = sizeof(struct volStructDesc); else sectorsize = sb->s_blocksize; session_offset = (loff_t)sbi->s_session << sb->s_blocksize_bits; sector += session_offset; udf_debug("Starting at sector %u (%lu byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), sb->s_blocksize); /* Process the sequence (if applicable). The hard limit on the sector * offset is arbitrary, hopefully large enough so that all valid UDF * filesystems will be recognised. There is no mention of an upper * bound to the size of the volume recognition area in the standard. * The limit will prevent the code to read all the sectors of a * specially crafted image (like a bluray disc full of CD001 sectors), * potentially causing minutes or even hours of uninterruptible I/O * activity. This actually happened with uninitialised SSD partitions * (all 0xFF) before the check for the limit and all valid IDs were * added */ for (; !nsr && sector < VSD_MAX_SECTOR_OFFSET; sector += sectorsize) { /* Read a block */ bh = sb_bread(sb, sector >> sb->s_blocksize_bits); if (!bh) break; vsd = (struct volStructDesc *)(bh->b_data + (sector & (sb->s_blocksize - 1))); nsr = identify_vsd(vsd); /* Found NSR or end? */ if (nsr) { brelse(bh); break; } /* * Special handling for improperly formatted VRS (e.g., Win10) * where components are separated by 2048 bytes even though * sectors are 4K */ if (sb->s_blocksize == 4096) { nsr = identify_vsd(vsd + 1); /* Ignore unknown IDs... */ if (nsr < 0) nsr = 0; } brelse(bh); } if (nsr > 0) return 1; else if (!bh && sector - session_offset == VSD_FIRST_SECTOR_OFFSET) return -1; else return 0; } static int udf_verify_domain_identifier(struct super_block *sb, struct regid *ident, char *dname) { struct domainIdentSuffix *suffix; if (memcmp(ident->ident, UDF_ID_COMPLIANT, strlen(UDF_ID_COMPLIANT))) { udf_warn(sb, "Not OSTA UDF compliant %s descriptor.\n", dname); goto force_ro; } if (ident->flags & ENTITYID_FLAGS_DIRTY) { udf_warn(sb, "Possibly not OSTA UDF compliant %s descriptor.\n", dname); goto force_ro; } suffix = (struct domainIdentSuffix *)ident->identSuffix; if ((suffix->domainFlags & DOMAIN_FLAGS_HARD_WRITE_PROTECT) || (suffix->domainFlags & DOMAIN_FLAGS_SOFT_WRITE_PROTECT)) { if (!sb_rdonly(sb)) { udf_warn(sb, "Descriptor for %s marked write protected." " Forcing read only mount.\n", dname); } goto force_ro; } return 0; force_ro: if (!sb_rdonly(sb)) return -EACCES; UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); return 0; } static int udf_load_fileset(struct super_block *sb, struct fileSetDesc *fset, struct kernel_lb_addr *root) { int ret; ret = udf_verify_domain_identifier(sb, &fset->domainIdent, "file set"); if (ret < 0) return ret; *root = lelb_to_cpu(fset->rootDirectoryICB.extLocation); UDF_SB(sb)->s_serial_number = le16_to_cpu(fset->descTag.tagSerialNum); udf_debug("Rootdir at block=%u, partition=%u\n", root->logicalBlockNum, root->partitionReferenceNum); return 0; } static int udf_find_fileset(struct super_block *sb, struct kernel_lb_addr *fileset, struct kernel_lb_addr *root) { struct buffer_head *bh; uint16_t ident; int ret; if (fileset->logicalBlockNum == 0xFFFFFFFF && fileset->partitionReferenceNum == 0xFFFF) return -EINVAL; bh = udf_read_ptagged(sb, fileset, 0, &ident); if (!bh) return -EIO; if (ident != TAG_IDENT_FSD) { brelse(bh); return -EINVAL; } udf_debug("Fileset at block=%u, partition=%u\n", fileset->logicalBlockNum, fileset->partitionReferenceNum); UDF_SB(sb)->s_partition = fileset->partitionReferenceNum; ret = udf_load_fileset(sb, (struct fileSetDesc *)bh->b_data, root); brelse(bh); return ret; } /* * Load primary Volume Descriptor Sequence * * Return <0 on error, 0 on success. -EAGAIN is special meaning next sequence * should be tried. */ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) { struct primaryVolDesc *pvoldesc; uint8_t *outstr; struct buffer_head *bh; uint16_t ident; int ret; struct timestamp *ts; outstr = kzalloc(128, GFP_KERNEL); if (!outstr) return -ENOMEM; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) { ret = -EAGAIN; goto out2; } if (ident != TAG_IDENT_PVD) { ret = -EIO; goto out_bh; } pvoldesc = (struct primaryVolDesc *)bh->b_data; udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, pvoldesc->recordingDateAndTime); ts = &pvoldesc->recordingDateAndTime; udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n", le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, ts->minute, le16_to_cpu(ts->typeAndTimezone)); ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32); if (ret < 0) { strscpy_pad(UDF_SB(sb)->s_volume_ident, "InvalidName"); pr_warn("incorrect volume identification, setting to " "'InvalidName'\n"); } else { strscpy_pad(UDF_SB(sb)->s_volume_ident, outstr); } udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128); if (ret < 0) { ret = 0; goto out_bh; } outstr[ret] = 0; udf_debug("volSetIdent[] = '%s'\n", outstr); ret = 0; out_bh: brelse(bh); out2: kfree(outstr); return ret; } struct inode *udf_find_metadata_inode_efe(struct super_block *sb, u32 meta_file_loc, u32 partition_ref) { struct kernel_lb_addr addr; struct inode *metadata_fe; addr.logicalBlockNum = meta_file_loc; addr.partitionReferenceNum = partition_ref; metadata_fe = udf_iget_special(sb, &addr); if (IS_ERR(metadata_fe)) { udf_warn(sb, "metadata inode efe not found\n"); return metadata_fe; } if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); iput(metadata_fe); return ERR_PTR(-EIO); } return metadata_fe; } static int udf_load_metadata_files(struct super_block *sb, int partition, int type1_index) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; struct udf_meta_data *mdata; struct kernel_lb_addr addr; struct inode *fe; map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; mdata->s_phys_partition_ref = type1_index; /* metadata address */ udf_debug("Metadata file location: block = %u part = %u\n", mdata->s_meta_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, mdata->s_phys_partition_ref); if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %u part = %u\n", mdata->s_mirror_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, mdata->s_phys_partition_ref); if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); return PTR_ERR(fe); } mdata->s_mirror_fe = fe; } else mdata->s_metadata_fe = fe; /* * bitmap file entry * Note: * Load only if bitmap file location differs from 0xFFFFFFFF (DCN-5102) */ if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { addr.logicalBlockNum = mdata->s_bitmap_file_loc; addr.partitionReferenceNum = mdata->s_phys_partition_ref; udf_debug("Bitmap file location: block = %u part = %u\n", addr.logicalBlockNum, addr.partitionReferenceNum); fe = udf_iget_special(sb, &addr); if (IS_ERR(fe)) { if (sb_rdonly(sb)) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); return PTR_ERR(fe); } } else mdata->s_bitmap_fe = fe; } udf_debug("udf_load_metadata_files Ok\n"); return 0; } int udf_compute_nr_groups(struct super_block *sb, u32 partition) { struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; return DIV_ROUND_UP(map->s_partition_len + (sizeof(struct spaceBitmapDesc) << 3), sb->s_blocksize * 8); } static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) { struct udf_bitmap *bitmap; int nr_groups = udf_compute_nr_groups(sb, index); bitmap = kvzalloc(struct_size(bitmap, s_block_bitmap, nr_groups), GFP_KERNEL); if (!bitmap) return NULL; bitmap->s_nr_groups = nr_groups; return bitmap; } static int check_partition_desc(struct super_block *sb, struct partitionDesc *p, struct udf_part_map *map) { bool umap, utable, fmap, ftable; struct partitionHeaderDesc *phd; switch (le32_to_cpu(p->accessType)) { case PD_ACCESS_TYPE_READ_ONLY: case PD_ACCESS_TYPE_WRITE_ONCE: case PD_ACCESS_TYPE_NONE: goto force_ro; } /* No Partition Header Descriptor? */ if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) goto force_ro; phd = (struct partitionHeaderDesc *)p->partitionContentsUse; utable = phd->unallocSpaceTable.extLength; umap = phd->unallocSpaceBitmap.extLength; ftable = phd->freedSpaceTable.extLength; fmap = phd->freedSpaceBitmap.extLength; /* No allocation info? */ if (!utable && !umap && !ftable && !fmap) goto force_ro; /* We don't support blocks that require erasing before overwrite */ if (ftable || fmap) goto force_ro; /* UDF 2.60: 2.3.3 - no mixing of tables & bitmaps, no VAT. */ if (utable && umap) goto force_ro; if (map->s_partition_type == UDF_VIRTUAL_MAP15 || map->s_partition_type == UDF_VIRTUAL_MAP20 || map->s_partition_type == UDF_METADATA_MAP25) goto force_ro; return 0; force_ro: if (!sb_rdonly(sb)) return -EACCES; UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); return 0; } static int udf_fill_partdesc_info(struct super_block *sb, struct partitionDesc *p, int p_index) { struct udf_part_map *map; struct udf_sb_info *sbi = UDF_SB(sb); struct partitionHeaderDesc *phd; u32 sum; int err; map = &sbi->s_partmaps[p_index]; map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */ map->s_partition_root = le32_to_cpu(p->partitionStartingLocation); if (check_add_overflow(map->s_partition_root, map->s_partition_len, &sum)) { udf_err(sb, "Partition %d has invalid location %u + %u\n", p_index, map->s_partition_root, map->s_partition_len); return -EFSCORRUPTED; } if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY; if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) map->s_partition_flags |= UDF_PART_FLAG_WRITE_ONCE; if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) map->s_partition_flags |= UDF_PART_FLAG_REWRITABLE; if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; udf_debug("Partition (%d type %x) starts at physical %u, block length %u\n", p_index, map->s_partition_type, map->s_partition_root, map->s_partition_len); err = check_partition_desc(sb, p, map); if (err) return err; /* * Skip loading allocation info it we cannot ever write to the fs. * This is a correctness thing as we may have decided to force ro mount * to avoid allocation info we don't support. */ if (UDF_QUERY_FLAG(sb, UDF_FLAG_RW_INCOMPAT)) return 0; phd = (struct partitionHeaderDesc *)p->partitionContentsUse; if (phd->unallocSpaceTable.extLength) { struct kernel_lb_addr loc = { .logicalBlockNum = le32_to_cpu( phd->unallocSpaceTable.extPosition), .partitionReferenceNum = p_index, }; struct inode *inode; inode = udf_iget_special(sb, &loc); if (IS_ERR(inode)) { udf_debug("cannot load unallocSpaceTable (part %d)\n", p_index); return PTR_ERR(inode); } map->s_uspace.s_table = inode; map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %lu\n", p_index, map->s_uspace.s_table->i_ino); } if (phd->unallocSpaceBitmap.extLength) { struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); if (!bitmap) return -ENOMEM; map->s_uspace.s_bitmap = bitmap; bitmap->s_extPosition = le32_to_cpu( phd->unallocSpaceBitmap.extPosition); map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; /* Check whether math over bitmap won't overflow. */ if (check_add_overflow(map->s_partition_len, sizeof(struct spaceBitmapDesc) << 3, &sum)) { udf_err(sb, "Partition %d is too long (%u)\n", p_index, map->s_partition_len); return -EFSCORRUPTED; } udf_debug("unallocSpaceBitmap (part %d) @ %u\n", p_index, bitmap->s_extPosition); } return 0; } static void udf_find_vat_block(struct super_block *sb, int p_index, int type1_index, sector_t start_block) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map = &sbi->s_partmaps[p_index]; sector_t vat_block; struct kernel_lb_addr ino; struct inode *inode; /* * VAT file entry is in the last recorded block. Some broken disks have * it a few blocks before so try a bit harder... */ ino.partitionReferenceNum = type1_index; for (vat_block = start_block; vat_block >= map->s_partition_root && vat_block >= start_block - 3; vat_block--) { ino.logicalBlockNum = vat_block - map->s_partition_root; inode = udf_iget_special(sb, &ino); if (!IS_ERR(inode)) { sbi->s_vat_inode = inode; break; } } } static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map = &sbi->s_partmaps[p_index]; struct buffer_head *bh = NULL; struct udf_inode_info *vati; struct virtualAllocationTable20 *vat20; sector_t blocks = sb_bdev_nr_blocks(sb); udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); if (!sbi->s_vat_inode && sbi->s_last_block != blocks - 1) { pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n", (unsigned long)sbi->s_last_block, (unsigned long)blocks - 1); udf_find_vat_block(sb, p_index, type1_index, blocks - 1); } if (!sbi->s_vat_inode) return -EIO; if (map->s_partition_type == UDF_VIRTUAL_MAP15) { map->s_type_specific.s_virtual.s_start_offset = 0; map->s_type_specific.s_virtual.s_num_entries = (sbi->s_vat_inode->i_size - 36) >> 2; } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { vati = UDF_I(sbi->s_vat_inode); if (vati->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { int err = 0; bh = udf_bread(sbi->s_vat_inode, 0, 0, &err); if (!bh) { if (!err) err = -EFSCORRUPTED; return err; } vat20 = (struct virtualAllocationTable20 *)bh->b_data; } else { vat20 = (struct virtualAllocationTable20 *) vati->i_data; } map->s_type_specific.s_virtual.s_start_offset = le16_to_cpu(vat20->lengthHeader); map->s_type_specific.s_virtual.s_num_entries = (sbi->s_vat_inode->i_size - map->s_type_specific.s_virtual. s_start_offset) >> 2; brelse(bh); } return 0; } /* * Load partition descriptor block * * Returns <0 on error, 0 on success, -EAGAIN is special - try next descriptor * sequence. */ static int udf_load_partdesc(struct super_block *sb, sector_t block) { struct buffer_head *bh; struct partitionDesc *p; struct udf_part_map *map; struct udf_sb_info *sbi = UDF_SB(sb); int i, type1_idx; uint16_t partitionNumber; uint16_t ident; int ret; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return -EAGAIN; if (ident != TAG_IDENT_PD) { ret = 0; goto out_bh; } p = (struct partitionDesc *)bh->b_data; partitionNumber = le16_to_cpu(p->partitionNumber); /* First scan for TYPE1 and SPARABLE partitions */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%u == %u)\n", map->s_partition_num, partitionNumber); if (map->s_partition_num == partitionNumber && (map->s_partition_type == UDF_TYPE1_MAP15 || map->s_partition_type == UDF_SPARABLE_MAP15)) break; } if (i >= sbi->s_partitions) { udf_debug("Partition (%u) not found in partition map\n", partitionNumber); ret = 0; goto out_bh; } ret = udf_fill_partdesc_info(sb, p, i); if (ret < 0) goto out_bh; /* * Now rescan for VIRTUAL or METADATA partitions when SPARABLE and * PHYSICAL partitions are already set up */ type1_idx = i; map = NULL; /* supress 'maybe used uninitialized' warning */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; if (map->s_partition_num == partitionNumber && (map->s_partition_type == UDF_VIRTUAL_MAP15 || map->s_partition_type == UDF_VIRTUAL_MAP20 || map->s_partition_type == UDF_METADATA_MAP25)) break; } if (i >= sbi->s_partitions) { ret = 0; goto out_bh; } ret = udf_fill_partdesc_info(sb, p, i); if (ret < 0) goto out_bh; if (map->s_partition_type == UDF_METADATA_MAP25) { ret = udf_load_metadata_files(sb, i, type1_idx); if (ret < 0) { udf_err(sb, "error loading MetaData partition map %d\n", i); goto out_bh; } } else { /* * If we have a partition with virtual map, we don't handle * writing to it (we overwrite blocks instead of relocating * them). */ if (!sb_rdonly(sb)) { ret = -EACCES; goto out_bh; } UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); ret = udf_load_vat(sb, i, type1_idx); if (ret < 0) goto out_bh; } ret = 0; out_bh: /* In case loading failed, we handle cleanup in udf_fill_super */ brelse(bh); return ret; } static int udf_load_sparable_map(struct super_block *sb, struct udf_part_map *map, struct sparablePartitionMap *spm) { uint32_t loc; uint16_t ident; struct sparingTable *st; struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing; int i; struct buffer_head *bh; map->s_partition_type = UDF_SPARABLE_MAP15; sdata->s_packet_len = le16_to_cpu(spm->packetLength); if (!is_power_of_2(sdata->s_packet_len)) { udf_err(sb, "error loading logical volume descriptor: " "Invalid packet length %u\n", (unsigned)sdata->s_packet_len); return -EIO; } if (spm->numSparingTables > 4) { udf_err(sb, "error loading logical volume descriptor: " "Too many sparing tables (%d)\n", (int)spm->numSparingTables); return -EIO; } if (le32_to_cpu(spm->sizeSparingTable) > sb->s_blocksize) { udf_err(sb, "error loading logical volume descriptor: " "Too big sparing table size (%u)\n", le32_to_cpu(spm->sizeSparingTable)); return -EIO; } for (i = 0; i < spm->numSparingTables; i++) { loc = le32_to_cpu(spm->locSparingTable[i]); bh = udf_read_tagged(sb, loc, loc, &ident); if (!bh) continue; st = (struct sparingTable *)bh->b_data; if (ident != 0 || strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING)) || sizeof(*st) + le16_to_cpu(st->reallocationTableLen) > sb->s_blocksize) { brelse(bh); continue; } sdata->s_spar_map[i] = bh; } map->s_partition_func = udf_get_pblock_spar15; return 0; } static int udf_load_logicalvol(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) { struct logicalVolDesc *lvd; int i, offset; uint8_t type; struct udf_sb_info *sbi = UDF_SB(sb); struct genericPartitionMap *gpm; uint16_t ident; struct buffer_head *bh; unsigned int table_len; int ret; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return -EAGAIN; BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); if (table_len > sb->s_blocksize - sizeof(*lvd)) { udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); ret = -EIO; goto out_bh; } ret = udf_verify_domain_identifier(sb, &lvd->domainIdent, "logical volume"); if (ret) goto out_bh; ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); if (ret) goto out_bh; for (i = 0, offset = 0; i < sbi->s_partitions && offset < table_len; i++, offset += gpm->partitionMapLength) { struct udf_part_map *map = &sbi->s_partmaps[i]; gpm = (struct genericPartitionMap *) &(lvd->partitionMaps[offset]); type = gpm->partitionMapType; if (type == 1) { struct genericPartitionMap1 *gpm1 = (struct genericPartitionMap1 *)gpm; map->s_partition_type = UDF_TYPE1_MAP15; map->s_volumeseqnum = le16_to_cpu(gpm1->volSeqNum); map->s_partition_num = le16_to_cpu(gpm1->partitionNum); map->s_partition_func = NULL; } else if (type == 2) { struct udfPartitionMap2 *upm2 = (struct udfPartitionMap2 *)gpm; if (!strncmp(upm2->partIdent.ident, UDF_ID_VIRTUAL, strlen(UDF_ID_VIRTUAL))) { u16 suf = le16_to_cpu(((__le16 *)upm2->partIdent. identSuffix)[0]); if (suf < 0x0200) { map->s_partition_type = UDF_VIRTUAL_MAP15; map->s_partition_func = udf_get_pblock_virt15; } else { map->s_partition_type = UDF_VIRTUAL_MAP20; map->s_partition_func = udf_get_pblock_virt20; } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { ret = udf_load_sparable_map(sb, map, (struct sparablePartitionMap *)gpm); if (ret < 0) goto out_bh; } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { struct udf_meta_data *mdata = &map->s_type_specific.s_metadata; struct metadataPartitionMap *mdm = (struct metadataPartitionMap *) &(lvd->partitionMaps[offset]); udf_debug("Parsing Logical vol part %d type %u id=%s\n", i, type, UDF_ID_METADATA); map->s_partition_type = UDF_METADATA_MAP25; map->s_partition_func = udf_get_pblock_meta25; mdata->s_meta_file_loc = le32_to_cpu(mdm->metadataFileLoc); mdata->s_mirror_file_loc = le32_to_cpu(mdm->metadataMirrorFileLoc); mdata->s_bitmap_file_loc = le32_to_cpu(mdm->metadataBitmapFileLoc); mdata->s_alloc_unit_size = le32_to_cpu(mdm->allocUnitSize); mdata->s_align_unit_size = le16_to_cpu(mdm->alignUnitSize); if (mdm->flags & 0x01) mdata->s_flags |= MF_DUPLICATE_MD; udf_debug("Metadata Ident suffix=0x%x\n", le16_to_cpu(*(__le16 *) mdm->partIdent.identSuffix)); udf_debug("Metadata part num=%u\n", le16_to_cpu(mdm->partitionNum)); udf_debug("Metadata part alloc unit size=%u\n", le32_to_cpu(mdm->allocUnitSize)); udf_debug("Metadata file loc=%u\n", le32_to_cpu(mdm->metadataFileLoc)); udf_debug("Mirror file loc=%u\n", le32_to_cpu(mdm->metadataMirrorFileLoc)); udf_debug("Bitmap file loc=%u\n", le32_to_cpu(mdm->metadataBitmapFileLoc)); udf_debug("Flags: %d %u\n", mdata->s_flags, mdm->flags); } else { udf_debug("Unknown ident: %s\n", upm2->partIdent.ident); continue; } map->s_volumeseqnum = le16_to_cpu(upm2->volSeqNum); map->s_partition_num = le16_to_cpu(upm2->partitionNum); } udf_debug("Partition (%d:%u) type %u on volume %u\n", i, map->s_partition_num, type, map->s_volumeseqnum); } if (fileset) { struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); *fileset = lelb_to_cpu(la->extLocation); udf_debug("FileSet found in LogicalVolDesc at block=%u, partition=%u\n", fileset->logicalBlockNum, fileset->partitionReferenceNum); } if (lvd->integritySeqExt.extLength) udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); ret = 0; if (!sbi->s_lvid_bh) { /* We can't generate unique IDs without a valid LVID */ if (sb_rdonly(sb)) { UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); } else { udf_warn(sb, "Damaged or missing LVID, forcing " "readonly mount\n"); ret = -EACCES; } } out_bh: brelse(bh); return ret; } static bool udf_lvid_valid(struct super_block *sb, struct logicalVolIntegrityDesc *lvid) { u32 parts, impuselen; parts = le32_to_cpu(lvid->numOfPartitions); impuselen = le32_to_cpu(lvid->lengthOfImpUse); if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize || sizeof(struct logicalVolIntegrityDesc) + impuselen + 2 * parts * sizeof(u32) > sb->s_blocksize) return false; return true; } /* * Find the prevailing Logical Volume Integrity Descriptor. */ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc) { struct buffer_head *bh, *final_bh; uint16_t ident; struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; int indirections = 0; while (++indirections <= UDF_MAX_LVID_NESTING) { final_bh = NULL; while (loc.extLength > 0 && (bh = udf_read_tagged(sb, loc.extLocation, loc.extLocation, &ident))) { if (ident != TAG_IDENT_LVID) { brelse(bh); break; } brelse(final_bh); final_bh = bh; loc.extLength -= sb->s_blocksize; loc.extLocation++; } if (!final_bh) return; lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data; if (udf_lvid_valid(sb, lvid)) { brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = final_bh; } else { udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), " "ignoring.\n", le32_to_cpu(lvid->numOfPartitions), le32_to_cpu(lvid->lengthOfImpUse)); } if (lvid->nextIntegrityExt.extLength == 0) return; loc = leea_to_cpu(lvid->nextIntegrityExt); } udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n", UDF_MAX_LVID_NESTING); brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = NULL; } /* * Step for reallocation of table of partition descriptor sequence numbers. * Must be power of 2. */ #define PART_DESC_ALLOC_STEP 32 struct part_desc_seq_scan_data { struct udf_vds_record rec; u32 partnum; }; struct desc_seq_scan_data { struct udf_vds_record vds[VDS_POS_LENGTH]; unsigned int size_part_descs; unsigned int num_part_descs; struct part_desc_seq_scan_data *part_descs_loc; }; static struct udf_vds_record *handle_partition_descriptor( struct buffer_head *bh, struct desc_seq_scan_data *data) { struct partitionDesc *desc = (struct partitionDesc *)bh->b_data; int partnum; int i; partnum = le16_to_cpu(desc->partitionNumber); for (i = 0; i < data->num_part_descs; i++) if (partnum == data->part_descs_loc[i].partnum) return &(data->part_descs_loc[i].rec); if (data->num_part_descs >= data->size_part_descs) { struct part_desc_seq_scan_data *new_loc; unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP); new_loc = kcalloc(new_size, sizeof(*new_loc), GFP_KERNEL); if (!new_loc) return ERR_PTR(-ENOMEM); memcpy(new_loc, data->part_descs_loc, data->size_part_descs * sizeof(*new_loc)); kfree(data->part_descs_loc); data->part_descs_loc = new_loc; data->size_part_descs = new_size; } return &(data->part_descs_loc[data->num_part_descs++].rec); } static struct udf_vds_record *get_volume_descriptor_record(uint16_t ident, struct buffer_head *bh, struct desc_seq_scan_data *data) { switch (ident) { case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ return &(data->vds[VDS_POS_PRIMARY_VOL_DESC]); case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ return &(data->vds[VDS_POS_IMP_USE_VOL_DESC]); case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ return &(data->vds[VDS_POS_LOGICAL_VOL_DESC]); case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ return &(data->vds[VDS_POS_UNALLOC_SPACE_DESC]); case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ return handle_partition_descriptor(bh, data); } return NULL; } /* * Process a main/reserve volume descriptor sequence. * @block First block of first extent of the sequence. * @lastblock Lastblock of first extent of the sequence. * @fileset There we store extent containing root fileset * * Returns <0 on error, 0 on success. -EAGAIN is special - try next descriptor * sequence */ static noinline int udf_process_sequence( struct super_block *sb, sector_t block, sector_t lastblock, struct kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; struct udf_vds_record *curr; struct generic_desc *gd; struct volDescPtr *vdp; bool done = false; uint32_t vdsn; uint16_t ident; int ret; unsigned int indirections = 0; struct desc_seq_scan_data data; unsigned int i; memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); data.size_part_descs = PART_DESC_ALLOC_STEP; data.num_part_descs = 0; data.part_descs_loc = kcalloc(data.size_part_descs, sizeof(*data.part_descs_loc), GFP_KERNEL); if (!data.part_descs_loc) return -ENOMEM; /* * Read the main descriptor sequence and find which descriptors * are in it. */ for (; (!done && block <= lastblock); block++) { bh = udf_read_tagged(sb, block, block, &ident); if (!bh) break; /* Process each descriptor (ISO 13346 3/8.3-8.4) */ gd = (struct generic_desc *)bh->b_data; vdsn = le32_to_cpu(gd->volDescSeqNum); switch (ident) { case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ if (++indirections > UDF_MAX_TD_NESTING) { udf_err(sb, "too many Volume Descriptor " "Pointers (max %u supported)\n", UDF_MAX_TD_NESTING); brelse(bh); ret = -EIO; goto out; } vdp = (struct volDescPtr *)bh->b_data; block = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation); lastblock = le32_to_cpu( vdp->nextVolDescSeqExt.extLength) >> sb->s_blocksize_bits; lastblock += block - 1; /* For loop is going to increment 'block' again */ block--; break; case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ curr = get_volume_descriptor_record(ident, bh, &data); if (IS_ERR(curr)) { brelse(bh); ret = PTR_ERR(curr); goto out; } /* Descriptor we don't care about? */ if (!curr) break; if (vdsn >= curr->volDescSeqNum) { curr->volDescSeqNum = vdsn; curr->block = block; } break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ done = true; break; } brelse(bh); } /* * Now read interesting descriptors again and process them * in a suitable order */ if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); ret = -EAGAIN; goto out; } ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) goto out; if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, data.vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset); if (ret < 0) goto out; } /* Now handle prevailing Partition Descriptors */ for (i = 0; i < data.num_part_descs; i++) { ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); if (ret < 0) goto out; } ret = 0; out: kfree(data.part_descs_loc); return ret; } /* * Load Volume Descriptor Sequence described by anchor in bh * * Returns <0 on error, 0 on success */ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, struct kernel_lb_addr *fileset) { struct anchorVolDescPtr *anchor; sector_t main_s, main_e, reserve_s, reserve_e; int ret; anchor = (struct anchorVolDescPtr *)bh->b_data; /* Locate the main sequence */ main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); main_e = main_e >> sb->s_blocksize_bits; main_e += main_s - 1; /* Locate the reserve sequence */ reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); reserve_e = reserve_e >> sb->s_blocksize_bits; reserve_e += reserve_s - 1; /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ ret = udf_process_sequence(sb, main_s, main_e, fileset); if (ret != -EAGAIN) return ret; udf_sb_free_partitions(sb); ret = udf_process_sequence(sb, reserve_s, reserve_e, fileset); if (ret < 0) { udf_sb_free_partitions(sb); /* No sequence was OK, return -EIO */ if (ret == -EAGAIN) ret = -EIO; } return ret; } /* * Check whether there is an anchor block in the given block and * load Volume Descriptor Sequence if so. * * Returns <0 on error, 0 on success, -EAGAIN is special - try next anchor * block */ static int udf_check_anchor_block(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) { struct buffer_head *bh; uint16_t ident; int ret; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return -EAGAIN; if (ident != TAG_IDENT_AVDP) { brelse(bh); return -EAGAIN; } ret = udf_load_sequence(sb, bh, fileset); brelse(bh); return ret; } /* * Search for an anchor volume descriptor pointer. * * Returns < 0 on error, 0 on success. -EAGAIN is special - try next set * of anchors. */ static int udf_scan_anchors(struct super_block *sb, udf_pblk_t *lastblock, struct kernel_lb_addr *fileset) { udf_pblk_t last[6]; int i; struct udf_sb_info *sbi = UDF_SB(sb); int last_count = 0; int ret; /* First try user provided anchor */ if (sbi->s_anchor) { ret = udf_check_anchor_block(sb, sbi->s_anchor, fileset); if (ret != -EAGAIN) return ret; } /* * according to spec, anchor is in either: * block 256 * lastblock-256 * lastblock * however, if the disc isn't closed, it could be 512. */ ret = udf_check_anchor_block(sb, sbi->s_session + 256, fileset); if (ret != -EAGAIN) return ret; /* * The trouble is which block is the last one. Drives often misreport * this so we try various possibilities. */ last[last_count++] = *lastblock; if (*lastblock >= 1) last[last_count++] = *lastblock - 1; last[last_count++] = *lastblock + 1; if (*lastblock >= 2) last[last_count++] = *lastblock - 2; if (*lastblock >= 150) last[last_count++] = *lastblock - 150; if (*lastblock >= 152) last[last_count++] = *lastblock - 152; for (i = 0; i < last_count; i++) { if (last[i] >= sb_bdev_nr_blocks(sb)) continue; ret = udf_check_anchor_block(sb, last[i], fileset); if (ret != -EAGAIN) { if (!ret) *lastblock = last[i]; return ret; } if (last[i] < 256) continue; ret = udf_check_anchor_block(sb, last[i] - 256, fileset); if (ret != -EAGAIN) { if (!ret) *lastblock = last[i]; return ret; } } /* Finally try block 512 in case media is open */ return udf_check_anchor_block(sb, sbi->s_session + 512, fileset); } /* * Check Volume Structure Descriptor, find Anchor block and load Volume * Descriptor Sequence. * * Returns < 0 on error, 0 on success. -EAGAIN is special meaning anchor * block was not found. */ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, int silent, struct kernel_lb_addr *fileset) { struct udf_sb_info *sbi = UDF_SB(sb); int nsr = 0; int ret; if (!sb_set_blocksize(sb, uopt->blocksize)) { if (!silent) udf_warn(sb, "Bad block size\n"); return -EINVAL; } sbi->s_last_block = uopt->lastblock; if (!UDF_QUERY_FLAG(sb, UDF_FLAG_NOVRS)) { /* Check that it is NSR02 compliant */ nsr = udf_check_vsd(sb); if (!nsr) { if (!silent) udf_warn(sb, "No VRS found\n"); return -EINVAL; } if (nsr == -1) udf_debug("Failed to read sector at offset %d. " "Assuming open disc. Skipping validity " "check\n", VSD_FIRST_SECTOR_OFFSET); if (!sbi->s_last_block) sbi->s_last_block = udf_get_last_block(sb); } else { udf_debug("Validity check skipped because of novrs option\n"); } /* Look for anchor block and load Volume Descriptor Sequence */ sbi->s_anchor = uopt->anchor; ret = udf_scan_anchors(sb, &sbi->s_last_block, fileset); if (ret < 0) { if (!silent && ret == -EAGAIN) udf_warn(sb, "No anchor found\n"); return ret; } return 0; } static void udf_finalize_lvid(struct logicalVolIntegrityDesc *lvid) { struct timespec64 ts; ktime_get_real_ts64(&ts); udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts); lvid->descTag.descCRC = cpu_to_le16( crc_itu_t(0, (char *)lvid + sizeof(struct tag), le16_to_cpu(lvid->descTag.descCRCLength))); lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); } static void udf_open_lvid(struct super_block *sb) { struct udf_sb_info *sbi = UDF_SB(sb); struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; struct logicalVolIntegrityDescImpUse *lvidiu; if (!bh) return; lvid = (struct logicalVolIntegrityDesc *)bh->b_data; lvidiu = udf_sb_lvidiu(sb); if (!lvidiu) return; mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (le32_to_cpu(lvid->integrityType) == LVID_INTEGRITY_TYPE_CLOSE) lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); else UDF_SET_FLAG(sb, UDF_FLAG_INCONSISTENT); udf_finalize_lvid(lvid); mark_buffer_dirty(bh); sbi->s_lvid_dirty = 0; mutex_unlock(&sbi->s_alloc_mutex); /* Make opening of filesystem visible on the media immediately */ sync_dirty_buffer(bh); } static void udf_close_lvid(struct super_block *sb) { struct udf_sb_info *sbi = UDF_SB(sb); struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; struct logicalVolIntegrityDescImpUse *lvidiu; if (!bh) return; lvid = (struct logicalVolIntegrityDesc *)bh->b_data; lvidiu = udf_sb_lvidiu(sb); if (!lvidiu) return; mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); if (!UDF_QUERY_FLAG(sb, UDF_FLAG_INCONSISTENT)) lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); /* * We set buffer uptodate unconditionally here to avoid spurious * warnings from mark_buffer_dirty() when previous EIO has marked * the buffer as !uptodate */ set_buffer_uptodate(bh); udf_finalize_lvid(lvid); mark_buffer_dirty(bh); sbi->s_lvid_dirty = 0; mutex_unlock(&sbi->s_alloc_mutex); /* Make closing of filesystem visible on the media immediately */ sync_dirty_buffer(bh); } u64 lvid_get_unique_id(struct super_block *sb) { struct buffer_head *bh; struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; struct logicalVolHeaderDesc *lvhd; u64 uniqueID; u64 ret; bh = sbi->s_lvid_bh; if (!bh) return 0; lvid = (struct logicalVolIntegrityDesc *)bh->b_data; lvhd = (struct logicalVolHeaderDesc *)lvid->logicalVolContentsUse; mutex_lock(&sbi->s_alloc_mutex); ret = uniqueID = le64_to_cpu(lvhd->uniqueID); if (!(++uniqueID & 0xFFFFFFFF)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); udf_updated_lvid(sb); mutex_unlock(&sbi->s_alloc_mutex); return ret; } static int udf_fill_super(struct super_block *sb, struct fs_context *fc) { int ret = -EINVAL; struct inode *inode = NULL; struct udf_options *uopt = fc->fs_private; struct kernel_lb_addr rootdir, fileset; struct udf_sb_info *sbi; bool lvid_open = false; int silent = fc->sb_flags & SB_SILENT; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; mutex_init(&sbi->s_alloc_mutex); fileset.logicalBlockNum = 0xFFFFFFFF; fileset.partitionReferenceNum = 0xFFFF; sbi->s_flags = uopt->flags; sbi->s_uid = uopt->uid; sbi->s_gid = uopt->gid; sbi->s_umask = uopt->umask; sbi->s_fmode = uopt->fmode; sbi->s_dmode = uopt->dmode; sbi->s_nls_map = uopt->nls_map; uopt->nls_map = NULL; rwlock_init(&sbi->s_cred_lock); if (uopt->session == 0xFFFFFFFF) sbi->s_session = udf_get_last_session(sb); else sbi->s_session = uopt->session; udf_debug("Multi-session=%d\n", sbi->s_session); /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; sb->s_export_op = &udf_export_ops; sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; if (uopt->flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { ret = udf_load_vrs(sb, uopt, silent, &fileset); } else { uopt->blocksize = bdev_logical_block_size(sb->s_bdev); while (uopt->blocksize <= 4096) { ret = udf_load_vrs(sb, uopt, silent, &fileset); if (ret < 0) { if (!silent && ret != -EACCES) { pr_notice("Scanning with blocksize %u failed\n", uopt->blocksize); } brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = NULL; /* * EACCES is special - we want to propagate to * upper layers that we cannot handle RW mount. */ if (ret == -EACCES) break; } else break; uopt->blocksize <<= 1; } } if (ret < 0) { if (ret == -EAGAIN) { udf_warn(sb, "No partition found (1)\n"); ret = -EINVAL; } goto error_out; } udf_debug("Lastblock=%u\n", sbi->s_last_block); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); uint16_t minUDFReadRev; uint16_t minUDFWriteRev; if (!lvidiu) { ret = -EINVAL; goto error_out; } minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); if (minUDFReadRev > UDF_MAX_READ_VERSION) { udf_err(sb, "minUDFReadRev=%x (max is %x)\n", minUDFReadRev, UDF_MAX_READ_VERSION); ret = -EINVAL; goto error_out; } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) { if (!sb_rdonly(sb)) { ret = -EACCES; goto error_out; } UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); } sbi->s_udfrev = minUDFWriteRev; if (minUDFReadRev >= UDF_VERS_USE_EXTENDED_FE) UDF_SET_FLAG(sb, UDF_FLAG_USE_EXTENDED_FE); if (minUDFReadRev >= UDF_VERS_USE_STREAMS) UDF_SET_FLAG(sb, UDF_FLAG_USE_STREAMS); } if (!sbi->s_partitions) { udf_warn(sb, "No partition found (2)\n"); ret = -EINVAL; goto error_out; } if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & UDF_PART_FLAG_READ_ONLY) { if (!sb_rdonly(sb)) { ret = -EACCES; goto error_out; } UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT); } ret = udf_find_fileset(sb, &fileset, &rootdir); if (ret < 0) { udf_warn(sb, "No fileset found\n"); goto error_out; } if (!silent) { struct timestamp ts; udf_time_to_disk_stamp(&ts, sbi->s_record_time); udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n", sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); } if (!sb_rdonly(sb)) { udf_open_lvid(sb); lvid_open = true; } /* Assign the root inode */ /* assign inodes by physical block number */ /* perhaps it's not extensible enough, but for now ... */ inode = udf_iget(sb, &rootdir); if (IS_ERR(inode)) { udf_err(sb, "Error in udf_iget, block=%u, partition=%u\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); ret = PTR_ERR(inode); goto error_out; } /* Allocate a dentry for the root inode */ sb->s_root = d_make_root(inode); if (!sb->s_root) { udf_err(sb, "Couldn't allocate root dentry\n"); ret = -ENOMEM; goto error_out; } sb->s_maxbytes = UDF_MAX_FILESIZE; sb->s_max_links = UDF_MAX_LINKS; return 0; error_out: iput(sbi->s_vat_inode); unload_nls(uopt->nls_map); if (lvid_open) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); udf_sb_free_partitions(sb); kfree(sbi); sb->s_fs_info = NULL; return ret; } void _udf_err(struct super_block *sb, const char *function, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf); va_end(args); } void _udf_warn(struct super_block *sb, const char *function, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf); va_end(args); } static void udf_put_super(struct super_block *sb) { struct udf_sb_info *sbi; sbi = UDF_SB(sb); iput(sbi->s_vat_inode); unload_nls(sbi->s_nls_map); if (!sb_rdonly(sb)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); udf_sb_free_partitions(sb); mutex_destroy(&sbi->s_alloc_mutex); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } static int udf_sync_fs(struct super_block *sb, int wait) { struct udf_sb_info *sbi = UDF_SB(sb); mutex_lock(&sbi->s_alloc_mutex); if (sbi->s_lvid_dirty) { struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; lvid = (struct logicalVolIntegrityDesc *)bh->b_data; udf_finalize_lvid(lvid); /* * Blockdevice will be synced later so we don't have to submit * the buffer for IO */ mark_buffer_dirty(bh); sbi->s_lvid_dirty = 0; } mutex_unlock(&sbi->s_alloc_mutex); return 0; } static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDescImpUse *lvidiu; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); lvidiu = udf_sb_lvidiu(sb); buf->f_type = UDF_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len; buf->f_bfree = udf_count_free(sb); buf->f_bavail = buf->f_bfree; /* * Let's pretend each free block is also a free 'inode' since UDF does * not have separate preallocated table of inodes. */ buf->f_files = (lvidiu != NULL ? (le32_to_cpu(lvidiu->numFiles) + le32_to_cpu(lvidiu->numDirs)) : 0) + buf->f_bfree; buf->f_ffree = buf->f_bfree; buf->f_namelen = UDF_NAME_LEN; buf->f_fsid = u64_to_fsid(id); return 0; } static unsigned int udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) { struct buffer_head *bh = NULL; unsigned int accum = 0; int index; udf_pblk_t block = 0, newblock; struct kernel_lb_addr loc; uint32_t bytes; uint8_t *ptr; uint16_t ident; struct spaceBitmapDesc *bm; loc.logicalBlockNum = bitmap->s_extPosition; loc.partitionReferenceNum = UDF_SB(sb)->s_partition; bh = udf_read_ptagged(sb, &loc, 0, &ident); if (!bh) { udf_err(sb, "udf_count_free failed\n"); goto out; } else if (ident != TAG_IDENT_SBD) { brelse(bh); udf_err(sb, "udf_count_free failed\n"); goto out; } bm = (struct spaceBitmapDesc *)bh->b_data; bytes = le32_to_cpu(bm->numOfBytes); index = sizeof(struct spaceBitmapDesc); /* offset in first block only */ ptr = (uint8_t *)bh->b_data; while (bytes > 0) { u32 cur_bytes = min_t(u32, bytes, sb->s_blocksize - index); accum += bitmap_weight((const unsigned long *)(ptr + index), cur_bytes * 8); bytes -= cur_bytes; if (bytes) { brelse(bh); newblock = udf_get_lb_pblock(sb, &loc, ++block); bh = sb_bread(sb, newblock); if (!bh) { udf_debug("read failed\n"); goto out; } index = 0; ptr = (uint8_t *)bh->b_data; } } brelse(bh); out: return accum; } static unsigned int udf_count_free_table(struct super_block *sb, struct inode *table) { unsigned int accum = 0; uint32_t elen; struct kernel_lb_addr eloc; struct extent_position epos; int8_t etype; mutex_lock(&UDF_SB(sb)->s_alloc_mutex); epos.block = UDF_I(table)->i_location; epos.offset = sizeof(struct unallocSpaceEntry); epos.bh = NULL; while (udf_next_aext(table, &epos, &eloc, &elen, &etype, 1) > 0) accum += (elen >> table->i_sb->s_blocksize_bits); brelse(epos.bh); mutex_unlock(&UDF_SB(sb)->s_alloc_mutex); return accum; } static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; unsigned int part = sbi->s_partition; int ptype = sbi->s_partmaps[part].s_partition_type; if (ptype == UDF_METADATA_MAP25) { part = sbi->s_partmaps[part].s_type_specific.s_metadata. s_phys_partition_ref; } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { /* * Filesystems with VAT are append-only and we cannot write to * them. Let's just report 0 here. */ return 0; } if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } } if (accum) return accum; map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); } if (accum) return accum; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { accum += udf_count_free_table(sb, map->s_uspace.s_table); } return accum; } MODULE_AUTHOR("Ben Fennema"); MODULE_DESCRIPTION("Universal Disk Format Filesystem"); MODULE_LICENSE("GPL"); module_init(init_udf_fs) module_exit(exit_udf_fs)
53 3 1 1 1 46 32 154 23 10 1 12 12 1 3 7 46 1 44 1 15 2 27 44 22 7 23 7 43 30 30 14 45 53 2 2 49 32 1 48 45 46 44 2 26 1 1 1 39 3 3 1 1 1 25 9 2 25 14 35 5 23 7 9 38 1 39 88 22 65 87 72 16 87 18 5 6 6 8 70 70 140 141 140 141 140 66 74 70 44 44 44 25 1 2 8 8 18 63 63 1 63 3 13 2 53 11 8 39 43 1 7 6 9 26 16 1 5 3 5 3 33 1 44 10 1 12 16 10 10 6 325 323 325 325 325 325 26 9 6 5 65 5 1 4 27 27 6 22 5 19 44 44 2 6 39 25 14 21 21 9 10 10 10 10 13 13 12 3 3 3 4 4 7 32 32 32 32 32 13 13 1 12 12 4 9 12 27 27 1 26 26 21 6 26 65 65 65 65 63 30 65 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 /* * videobuf2-v4l2.c - V4L2 driver helper framework * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * Marek Szyprowski <m.szyprowski@samsung.com> * * The vb2_thread implementation was based on code from videobuf-dvb.c: * (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SUSE Labs] * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #include <linux/device.h> #include <linux/err.h> #include <linux/freezer.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/slab.h> #include <media/v4l2-common.h> #include <media/v4l2-dev.h> #include <media/v4l2-device.h> #include <media/v4l2-event.h> #include <media/v4l2-fh.h> #include <media/videobuf2-v4l2.h> static int debug; module_param(debug, int, 0644); #define dprintk(q, level, fmt, arg...) \ do { \ if (debug >= level) \ pr_info("vb2-v4l2: [%p] %s: " fmt, \ (q)->name, __func__, ## arg); \ } while (0) /* Flags that are set by us */ #define V4L2_BUFFER_MASK_FLAGS (V4L2_BUF_FLAG_MAPPED | V4L2_BUF_FLAG_QUEUED | \ V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_ERROR | \ V4L2_BUF_FLAG_PREPARED | \ V4L2_BUF_FLAG_IN_REQUEST | \ V4L2_BUF_FLAG_REQUEST_FD | \ V4L2_BUF_FLAG_TIMESTAMP_MASK) /* Output buffer flags that should be passed on to the driver */ #define V4L2_BUFFER_OUT_FLAGS (V4L2_BUF_FLAG_PFRAME | \ V4L2_BUF_FLAG_BFRAME | \ V4L2_BUF_FLAG_KEYFRAME | \ V4L2_BUF_FLAG_TIMECODE | \ V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) /* * __verify_planes_array() - verify that the planes array passed in struct * v4l2_buffer from userspace can be safely used */ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer *b) { if (!V4L2_TYPE_IS_MULTIPLANAR(b->type)) return 0; /* Is memory for copying plane information present? */ if (b->m.planes == NULL) { dprintk(vb->vb2_queue, 1, "multi-planar buffer passed but planes array not provided\n"); return -EINVAL; } if (b->length < vb->num_planes || b->length > VB2_MAX_PLANES) { dprintk(vb->vb2_queue, 1, "incorrect planes array length, expected %d, got %d\n", vb->num_planes, b->length); return -EINVAL; } return 0; } static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) { return __verify_planes_array(vb, pb); } /* * __verify_length() - Verify that the bytesused value for each plane fits in * the plane length and that the data offset doesn't exceed the bytesused value. */ static int __verify_length(struct vb2_buffer *vb, const struct v4l2_buffer *b) { unsigned int length; unsigned int bytesused; unsigned int plane; if (V4L2_TYPE_IS_CAPTURE(b->type)) return 0; if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { for (plane = 0; plane < vb->num_planes; ++plane) { length = (b->memory == VB2_MEMORY_USERPTR || b->memory == VB2_MEMORY_DMABUF) ? b->m.planes[plane].length : vb->planes[plane].length; bytesused = b->m.planes[plane].bytesused ? b->m.planes[plane].bytesused : length; if (b->m.planes[plane].bytesused > length) return -EINVAL; if (b->m.planes[plane].data_offset > 0 && b->m.planes[plane].data_offset >= bytesused) return -EINVAL; } } else { length = (b->memory == VB2_MEMORY_USERPTR) ? b->length : vb->planes[0].length; if (b->bytesused > length) return -EINVAL; } return 0; } /* * __init_vb2_v4l2_buffer() - initialize the vb2_v4l2_buffer struct */ static void __init_vb2_v4l2_buffer(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); vbuf->request_fd = -1; } static void __copy_timestamp(struct vb2_buffer *vb, const void *pb) { const struct v4l2_buffer *b = pb; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_queue *q = vb->vb2_queue; if (q->is_output) { /* * For output buffers copy the timestamp if needed, * and the timecode field and flag if needed. */ if (q->copy_timestamp) vb->timestamp = v4l2_buffer_get_timestamp(b); vbuf->flags |= b->flags & V4L2_BUF_FLAG_TIMECODE; if (b->flags & V4L2_BUF_FLAG_TIMECODE) vbuf->timecode = b->timecode; } }; static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) { static bool check_once; if (check_once) return; check_once = true; pr_warn("use of bytesused == 0 is deprecated and will be removed in the future,\n"); if (vb->vb2_queue->allow_zero_bytesused) pr_warn("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); else pr_warn("use the actual size instead.\n"); } static int vb2_fill_vb2_v4l2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b) { struct vb2_queue *q = vb->vb2_queue; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_plane *planes = vbuf->planes; unsigned int plane; int ret; ret = __verify_length(vb, b); if (ret < 0) { dprintk(q, 1, "plane parameters verification failed: %d\n", ret); return ret; } if (b->field == V4L2_FIELD_ALTERNATE && q->is_output) { /* * If the format's field is ALTERNATE, then the buffer's field * should be either TOP or BOTTOM, not ALTERNATE since that * makes no sense. The driver has to know whether the * buffer represents a top or a bottom field in order to * program any DMA correctly. Using ALTERNATE is wrong, since * that just says that it is either a top or a bottom field, * but not which of the two it is. */ dprintk(q, 1, "the field is incorrectly set to ALTERNATE for an output buffer\n"); return -EINVAL; } vbuf->sequence = 0; vbuf->request_fd = -1; vbuf->is_held = false; if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { switch (b->memory) { case VB2_MEMORY_USERPTR: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.userptr = b->m.planes[plane].m.userptr; planes[plane].length = b->m.planes[plane].length; } break; case VB2_MEMORY_DMABUF: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.fd = b->m.planes[plane].m.fd; planes[plane].length = b->m.planes[plane].length; } break; default: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.offset = vb->planes[plane].m.offset; planes[plane].length = vb->planes[plane].length; } break; } /* Fill in user-provided information for OUTPUT types */ if (V4L2_TYPE_IS_OUTPUT(b->type)) { /* * Will have to go up to b->length when API starts * accepting variable number of planes. * * If bytesused == 0 for the output buffer, then fall * back to the full buffer size. In that case * userspace clearly never bothered to set it and * it's a safe assumption that they really meant to * use the full plane sizes. * * Some drivers, e.g. old codec drivers, use bytesused == 0 * as a way to indicate that streaming is finished. * In that case, the driver should use the * allow_zero_bytesused flag to keep old userspace * applications working. */ for (plane = 0; plane < vb->num_planes; ++plane) { struct vb2_plane *pdst = &planes[plane]; struct v4l2_plane *psrc = &b->m.planes[plane]; if (psrc->bytesused == 0) vb2_warn_zero_bytesused(vb); if (vb->vb2_queue->allow_zero_bytesused) pdst->bytesused = psrc->bytesused; else pdst->bytesused = psrc->bytesused ? psrc->bytesused : pdst->length; pdst->data_offset = psrc->data_offset; } } } else { /* * Single-planar buffers do not use planes array, * so fill in relevant v4l2_buffer struct fields instead. * In vb2 we use our internal V4l2_planes struct for * single-planar buffers as well, for simplicity. * * If bytesused == 0 for the output buffer, then fall back * to the full buffer size as that's a sensible default. * * Some drivers, e.g. old codec drivers, use bytesused == 0 as * a way to indicate that streaming is finished. In that case, * the driver should use the allow_zero_bytesused flag to keep * old userspace applications working. */ switch (b->memory) { case VB2_MEMORY_USERPTR: planes[0].m.userptr = b->m.userptr; planes[0].length = b->length; break; case VB2_MEMORY_DMABUF: planes[0].m.fd = b->m.fd; planes[0].length = b->length; break; default: planes[0].m.offset = vb->planes[0].m.offset; planes[0].length = vb->planes[0].length; break; } planes[0].data_offset = 0; if (V4L2_TYPE_IS_OUTPUT(b->type)) { if (b->bytesused == 0) vb2_warn_zero_bytesused(vb); if (vb->vb2_queue->allow_zero_bytesused) planes[0].bytesused = b->bytesused; else planes[0].bytesused = b->bytesused ? b->bytesused : planes[0].length; } else planes[0].bytesused = 0; } /* Zero flags that we handle */ vbuf->flags = b->flags & ~V4L2_BUFFER_MASK_FLAGS; if (!vb->vb2_queue->copy_timestamp || V4L2_TYPE_IS_CAPTURE(b->type)) { /* * Non-COPY timestamps and non-OUTPUT queues will get * their timestamp and timestamp source flags from the * queue. */ vbuf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; } if (V4L2_TYPE_IS_OUTPUT(b->type)) { /* * For output buffers mask out the timecode flag: * this will be handled later in vb2_qbuf(). * The 'field' is valid metadata for this output buffer * and so that needs to be copied here. */ vbuf->flags &= ~V4L2_BUF_FLAG_TIMECODE; vbuf->field = b->field; if (!(q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF)) vbuf->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; } else { /* Zero any output buffer flags as this is a capture buffer */ vbuf->flags &= ~V4L2_BUFFER_OUT_FLAGS; /* Zero last flag, this is a signal from driver to userspace */ vbuf->flags &= ~V4L2_BUF_FLAG_LAST; } return 0; } static void set_buffer_cache_hints(struct vb2_queue *q, struct vb2_buffer *vb, struct v4l2_buffer *b) { if (!vb2_queue_allows_cache_hints(q)) { /* * Clear buffer cache flags if queue does not support user * space hints. That's to indicate to userspace that these * flags won't work. */ b->flags &= ~V4L2_BUF_FLAG_NO_CACHE_INVALIDATE; b->flags &= ~V4L2_BUF_FLAG_NO_CACHE_CLEAN; return; } if (b->flags & V4L2_BUF_FLAG_NO_CACHE_INVALIDATE) vb->skip_cache_sync_on_finish = 1; if (b->flags & V4L2_BUF_FLAG_NO_CACHE_CLEAN) vb->skip_cache_sync_on_prepare = 1; } static int vb2_queue_or_prepare_buf(struct vb2_queue *q, struct media_device *mdev, struct vb2_buffer *vb, struct v4l2_buffer *b, bool is_prepare, struct media_request **p_req) { const char *opname = is_prepare ? "prepare_buf" : "qbuf"; struct media_request *req; struct vb2_v4l2_buffer *vbuf; int ret; if (b->type != q->type) { dprintk(q, 1, "%s: invalid buffer type\n", opname); return -EINVAL; } if (b->memory != q->memory) { dprintk(q, 1, "%s: invalid memory type\n", opname); return -EINVAL; } vbuf = to_vb2_v4l2_buffer(vb); ret = __verify_planes_array(vb, b); if (ret) return ret; if (!is_prepare && (b->flags & V4L2_BUF_FLAG_REQUEST_FD) && vb->state != VB2_BUF_STATE_DEQUEUED) { dprintk(q, 1, "%s: buffer is not in dequeued state\n", opname); return -EINVAL; } if (!vb->prepared) { set_buffer_cache_hints(q, vb, b); /* Copy relevant information provided by the userspace */ memset(vbuf->planes, 0, sizeof(vbuf->planes[0]) * vb->num_planes); ret = vb2_fill_vb2_v4l2_buffer(vb, b); if (ret) return ret; } if (is_prepare) return 0; if (!(b->flags & V4L2_BUF_FLAG_REQUEST_FD)) { if (q->requires_requests) { dprintk(q, 1, "%s: queue requires requests\n", opname); return -EBADR; } if (q->uses_requests) { dprintk(q, 1, "%s: queue uses requests\n", opname); return -EBUSY; } return 0; } else if (!q->supports_requests) { dprintk(q, 1, "%s: queue does not support requests\n", opname); return -EBADR; } else if (q->uses_qbuf) { dprintk(q, 1, "%s: queue does not use requests\n", opname); return -EBUSY; } /* * For proper locking when queueing a request you need to be able * to lock access to the vb2 queue, so check that there is a lock * that we can use. In addition p_req must be non-NULL. */ if (WARN_ON(!q->lock || !p_req)) return -EINVAL; /* * Make sure this op is implemented by the driver. It's easy to forget * this callback, but is it important when canceling a buffer in a * queued request. */ if (WARN_ON(!q->ops->buf_request_complete)) return -EINVAL; /* * Make sure this op is implemented by the driver for the output queue. * It's easy to forget this callback, but is it important to correctly * validate the 'field' value at QBUF time. */ if (WARN_ON((q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT || q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) && !q->ops->buf_out_validate)) return -EINVAL; req = media_request_get_by_fd(mdev, b->request_fd); if (IS_ERR(req)) { dprintk(q, 1, "%s: invalid request_fd\n", opname); return PTR_ERR(req); } /* * Early sanity check. This is checked again when the buffer * is bound to the request in vb2_core_qbuf(). */ if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_UPDATING) { dprintk(q, 1, "%s: request is not idle\n", opname); media_request_put(req); return -EBUSY; } *p_req = req; vbuf->request_fd = b->request_fd; return 0; } /* * __fill_v4l2_buffer() - fill in a struct v4l2_buffer with information to be * returned to userspace */ static void __fill_v4l2_buffer(struct vb2_buffer *vb, void *pb) { struct v4l2_buffer *b = pb; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_queue *q = vb->vb2_queue; unsigned int plane; /* Copy back data such as timestamp, flags, etc. */ b->index = vb->index; b->type = vb->type; b->memory = vb->memory; b->bytesused = 0; b->flags = vbuf->flags; b->field = vbuf->field; v4l2_buffer_set_timestamp(b, vb->timestamp); b->timecode = vbuf->timecode; b->sequence = vbuf->sequence; b->reserved2 = 0; b->request_fd = 0; if (q->is_multiplanar) { /* * Fill in plane-related data if userspace provided an array * for it. The caller has already verified memory and size. */ b->length = vb->num_planes; for (plane = 0; plane < vb->num_planes; ++plane) { struct v4l2_plane *pdst = &b->m.planes[plane]; struct vb2_plane *psrc = &vb->planes[plane]; pdst->bytesused = psrc->bytesused; pdst->length = psrc->length; if (q->memory == VB2_MEMORY_MMAP) pdst->m.mem_offset = psrc->m.offset; else if (q->memory == VB2_MEMORY_USERPTR) pdst->m.userptr = psrc->m.userptr; else if (q->memory == VB2_MEMORY_DMABUF) pdst->m.fd = psrc->m.fd; pdst->data_offset = psrc->data_offset; memset(pdst->reserved, 0, sizeof(pdst->reserved)); } } else { /* * We use length and offset in v4l2_planes array even for * single-planar buffers, but userspace does not. */ b->length = vb->planes[0].length; b->bytesused = vb->planes[0].bytesused; if (q->memory == VB2_MEMORY_MMAP) b->m.offset = vb->planes[0].m.offset; else if (q->memory == VB2_MEMORY_USERPTR) b->m.userptr = vb->planes[0].m.userptr; else if (q->memory == VB2_MEMORY_DMABUF) b->m.fd = vb->planes[0].m.fd; } /* * Clear any buffer state related flags. */ b->flags &= ~V4L2_BUFFER_MASK_FLAGS; b->flags |= q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK; if (!q->copy_timestamp) { /* * For non-COPY timestamps, drop timestamp source bits * and obtain the timestamp source from the queue. */ b->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; b->flags |= q->timestamp_flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK; } switch (vb->state) { case VB2_BUF_STATE_QUEUED: case VB2_BUF_STATE_ACTIVE: b->flags |= V4L2_BUF_FLAG_QUEUED; break; case VB2_BUF_STATE_IN_REQUEST: b->flags |= V4L2_BUF_FLAG_IN_REQUEST; break; case VB2_BUF_STATE_ERROR: b->flags |= V4L2_BUF_FLAG_ERROR; fallthrough; case VB2_BUF_STATE_DONE: b->flags |= V4L2_BUF_FLAG_DONE; break; case VB2_BUF_STATE_PREPARING: case VB2_BUF_STATE_DEQUEUED: /* nothing */ break; } if ((vb->state == VB2_BUF_STATE_DEQUEUED || vb->state == VB2_BUF_STATE_IN_REQUEST) && vb->synced && vb->prepared) b->flags |= V4L2_BUF_FLAG_PREPARED; if (vb2_buffer_in_use(q, vb)) b->flags |= V4L2_BUF_FLAG_MAPPED; if (vbuf->request_fd >= 0) { b->flags |= V4L2_BUF_FLAG_REQUEST_FD; b->request_fd = vbuf->request_fd; } } /* * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. It also verifies that struct * v4l2_buffer has a valid number of planes. */ static int __fill_vb2_buffer(struct vb2_buffer *vb, struct vb2_plane *planes) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); unsigned int plane; if (!vb->vb2_queue->copy_timestamp) vb->timestamp = 0; for (plane = 0; plane < vb->num_planes; ++plane) { if (vb->vb2_queue->memory != VB2_MEMORY_MMAP) { planes[plane].m = vbuf->planes[plane].m; planes[plane].length = vbuf->planes[plane].length; } planes[plane].bytesused = vbuf->planes[plane].bytesused; planes[plane].data_offset = vbuf->planes[plane].data_offset; } return 0; } static const struct vb2_buf_ops v4l2_buf_ops = { .verify_planes_array = __verify_planes_array_core, .init_buffer = __init_vb2_v4l2_buffer, .fill_user_buffer = __fill_v4l2_buffer, .fill_vb2_buffer = __fill_vb2_buffer, .copy_timestamp = __copy_timestamp, }; struct vb2_buffer *vb2_find_buffer(struct vb2_queue *q, u64 timestamp) { unsigned int i; struct vb2_buffer *vb2; /* * This loop doesn't scale if there is a really large number of buffers. * Maybe something more efficient will be needed in this case. */ for (i = 0; i < q->max_num_buffers; i++) { vb2 = vb2_get_buffer(q, i); if (!vb2) continue; if (vb2->copied_timestamp && vb2->timestamp == timestamp) return vb2; } return NULL; } EXPORT_SYMBOL_GPL(vb2_find_buffer); /* * vb2_querybuf() - query video buffer information * @q: vb2 queue * @b: buffer struct passed from userspace to vidioc_querybuf handler * in driver * * Should be called from vidioc_querybuf ioctl handler in driver. * This function will verify the passed v4l2_buffer structure and fill the * relevant information for the userspace. * * The return values from this function are intended to be directly returned * from vidioc_querybuf handler in driver. */ int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b) { struct vb2_buffer *vb; int ret; if (b->type != q->type) { dprintk(q, 1, "wrong buffer type\n"); return -EINVAL; } vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = __verify_planes_array(vb, b); if (!ret) vb2_core_querybuf(q, vb, b); return ret; } EXPORT_SYMBOL(vb2_querybuf); static void vb2_set_flags_and_caps(struct vb2_queue *q, u32 memory, u32 *flags, u32 *caps, u32 *max_num_bufs) { if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { /* * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, * but in order to avoid bugs we zero out all bits. */ *flags = 0; } else { /* Clear all unknown flags. */ *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; } *caps |= V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS; if (q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP; if (q->io_modes & VB2_USERPTR) *caps |= V4L2_BUF_CAP_SUPPORTS_USERPTR; if (q->io_modes & VB2_DMABUF) *caps |= V4L2_BUF_CAP_SUPPORTS_DMABUF; if (q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF) *caps |= V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF; if (q->allow_cache_hints && q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS; if (q->supports_requests) *caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS; if (max_num_bufs) { *max_num_bufs = q->max_num_buffers; *caps |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; } } int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) { int ret = vb2_verify_memory_type(q, req->memory, req->type); u32 flags = req->flags; vb2_set_flags_and_caps(q, req->memory, &flags, &req->capabilities, NULL); req->flags = flags; return ret ? ret : vb2_core_reqbufs(q, req->memory, req->flags, &req->count); } EXPORT_SYMBOL_GPL(vb2_reqbufs); int vb2_prepare_buf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b) { struct vb2_buffer *vb; int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } if (b->flags & V4L2_BUF_FLAG_REQUEST_FD) return -EINVAL; vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = vb2_queue_or_prepare_buf(q, mdev, vb, b, true, NULL); return ret ? ret : vb2_core_prepare_buf(q, vb, b); } EXPORT_SYMBOL_GPL(vb2_prepare_buf); int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) { unsigned requested_planes = 1; unsigned requested_sizes[VIDEO_MAX_PLANES]; struct v4l2_format *f = &create->format; int ret = vb2_verify_memory_type(q, create->memory, f->type); unsigned i; create->index = vb2_get_num_buffers(q); vb2_set_flags_and_caps(q, create->memory, &create->flags, &create->capabilities, &create->max_num_buffers); if (create->count == 0) return ret != -EBUSY ? ret : 0; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: requested_planes = f->fmt.pix_mp.num_planes; if (requested_planes == 0 || requested_planes > VIDEO_MAX_PLANES) return -EINVAL; for (i = 0; i < requested_planes; i++) requested_sizes[i] = f->fmt.pix_mp.plane_fmt[i].sizeimage; break; case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: requested_sizes[0] = f->fmt.pix.sizeimage; break; case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: requested_sizes[0] = f->fmt.vbi.samples_per_line * (f->fmt.vbi.count[0] + f->fmt.vbi.count[1]); break; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: requested_sizes[0] = f->fmt.sliced.io_size; break; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: requested_sizes[0] = f->fmt.sdr.buffersize; break; case V4L2_BUF_TYPE_META_CAPTURE: case V4L2_BUF_TYPE_META_OUTPUT: requested_sizes[0] = f->fmt.meta.buffersize; break; default: return -EINVAL; } for (i = 0; i < requested_planes; i++) if (requested_sizes[i] == 0) return -EINVAL; if (ret) return ret; return vb2_core_create_bufs(q, create->memory, create->flags, &create->count, requested_planes, requested_sizes, &create->index); } EXPORT_SYMBOL_GPL(vb2_create_bufs); int vb2_qbuf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b) { struct media_request *req = NULL; struct vb2_buffer *vb; int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = vb2_queue_or_prepare_buf(q, mdev, vb, b, false, &req); if (ret) return ret; ret = vb2_core_qbuf(q, vb, b, req); if (req) media_request_put(req); return ret; } EXPORT_SYMBOL_GPL(vb2_qbuf); int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b, bool nonblocking) { int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } if (b->type != q->type) { dprintk(q, 1, "invalid buffer type\n"); return -EINVAL; } ret = vb2_core_dqbuf(q, NULL, b, nonblocking); if (!q->is_output && b->flags & V4L2_BUF_FLAG_DONE && b->flags & V4L2_BUF_FLAG_LAST) q->last_buffer_dequeued = true; /* * After calling the VIDIOC_DQBUF V4L2_BUF_FLAG_DONE must be * cleared. */ b->flags &= ~V4L2_BUF_FLAG_DONE; return ret; } EXPORT_SYMBOL_GPL(vb2_dqbuf); int vb2_streamon(struct vb2_queue *q, enum v4l2_buf_type type) { if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } return vb2_core_streamon(q, type); } EXPORT_SYMBOL_GPL(vb2_streamon); int vb2_streamoff(struct vb2_queue *q, enum v4l2_buf_type type) { if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } return vb2_core_streamoff(q, type); } EXPORT_SYMBOL_GPL(vb2_streamoff); int vb2_expbuf(struct vb2_queue *q, struct v4l2_exportbuffer *eb) { struct vb2_buffer *vb; vb = vb2_get_buffer(q, eb->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", eb->index); return -EINVAL; } return vb2_core_expbuf(q, &eb->fd, eb->type, vb, eb->plane, eb->flags); } EXPORT_SYMBOL_GPL(vb2_expbuf); int vb2_queue_init_name(struct vb2_queue *q, const char *name) { /* * Sanity check */ if (WARN_ON(!q) || WARN_ON(q->timestamp_flags & ~(V4L2_BUF_FLAG_TIMESTAMP_MASK | V4L2_BUF_FLAG_TSTAMP_SRC_MASK))) return -EINVAL; /* Warn that the driver should choose an appropriate timestamp type */ WARN_ON((q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK) == V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN); /* Warn that vb2_memory should match with v4l2_memory */ if (WARN_ON(VB2_MEMORY_MMAP != (int)V4L2_MEMORY_MMAP) || WARN_ON(VB2_MEMORY_USERPTR != (int)V4L2_MEMORY_USERPTR) || WARN_ON(VB2_MEMORY_DMABUF != (int)V4L2_MEMORY_DMABUF)) return -EINVAL; if (q->buf_struct_size == 0) q->buf_struct_size = sizeof(struct vb2_v4l2_buffer); q->buf_ops = &v4l2_buf_ops; q->is_multiplanar = V4L2_TYPE_IS_MULTIPLANAR(q->type); q->is_output = V4L2_TYPE_IS_OUTPUT(q->type); q->copy_timestamp = (q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK) == V4L2_BUF_FLAG_TIMESTAMP_COPY; /* * For compatibility with vb1: if QBUF hasn't been called yet, then * return EPOLLERR as well. This only affects capture queues, output * queues will always initialize waiting_for_buffers to false. */ q->quirk_poll_must_check_waiting_for_buffers = true; if (name) strscpy(q->name, name, sizeof(q->name)); else q->name[0] = '\0'; return vb2_core_queue_init(q); } EXPORT_SYMBOL_GPL(vb2_queue_init_name); int vb2_queue_init(struct vb2_queue *q) { return vb2_queue_init_name(q, NULL); } EXPORT_SYMBOL_GPL(vb2_queue_init); void vb2_queue_release(struct vb2_queue *q) { vb2_core_queue_release(q); } EXPORT_SYMBOL_GPL(vb2_queue_release); int vb2_queue_change_type(struct vb2_queue *q, unsigned int type) { if (type == q->type) return 0; if (vb2_is_busy(q)) return -EBUSY; q->type = type; return 0; } EXPORT_SYMBOL_GPL(vb2_queue_change_type); __poll_t vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) { struct video_device *vfd = video_devdata(file); __poll_t res; res = vb2_core_poll(q, file, wait); if (test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags)) { struct v4l2_fh *fh = file->private_data; poll_wait(file, &fh->wait, wait); if (v4l2_event_pending(fh)) res |= EPOLLPRI; } return res; } EXPORT_SYMBOL_GPL(vb2_poll); /* * The following functions are not part of the vb2 core API, but are helper * functions that plug into struct v4l2_ioctl_ops, struct v4l2_file_operations * and struct vb2_ops. * They contain boilerplate code that most if not all drivers have to do * and so they simplify the driver code. */ /* vb2 ioctl helpers */ int vb2_ioctl_remove_bufs(struct file *file, void *priv, struct v4l2_remove_buffers *d) { struct video_device *vdev = video_devdata(file); if (vdev->queue->type != d->type) return -EINVAL; if (d->count == 0) return 0; if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_core_remove_bufs(vdev->queue, d->index, d->count); } EXPORT_SYMBOL_GPL(vb2_ioctl_remove_bufs); int vb2_ioctl_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *p) { struct video_device *vdev = video_devdata(file); int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type); u32 flags = p->flags; vb2_set_flags_and_caps(vdev->queue, p->memory, &flags, &p->capabilities, NULL); p->flags = flags; if (res) return res; if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; res = vb2_core_reqbufs(vdev->queue, p->memory, p->flags, &p->count); /* If count == 0, then the owner has released all buffers and he is no longer owner of the queue. Otherwise we have a new owner. */ if (res == 0) vdev->queue->owner = p->count ? file->private_data : NULL; return res; } EXPORT_SYMBOL_GPL(vb2_ioctl_reqbufs); int vb2_ioctl_create_bufs(struct file *file, void *priv, struct v4l2_create_buffers *p) { struct video_device *vdev = video_devdata(file); int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); p->index = vb2_get_num_buffers(vdev->queue); vb2_set_flags_and_caps(vdev->queue, p->memory, &p->flags, &p->capabilities, &p->max_num_buffers); /* * If count == 0, then just check if memory and type are valid. * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0. */ if (p->count == 0) return res != -EBUSY ? res : 0; if (res) return res; if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; res = vb2_create_bufs(vdev->queue, p); if (res == 0) vdev->queue->owner = file->private_data; return res; } EXPORT_SYMBOL_GPL(vb2_ioctl_create_bufs); int vb2_ioctl_prepare_buf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_prepare_buf(vdev->queue, vdev->v4l2_dev->mdev, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_prepare_buf); int vb2_ioctl_querybuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); /* No need to call vb2_queue_is_busy(), anyone can query buffers. */ return vb2_querybuf(vdev->queue, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_querybuf); int vb2_ioctl_qbuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_qbuf(vdev->queue, vdev->v4l2_dev->mdev, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_qbuf); int vb2_ioctl_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_dqbuf(vdev->queue, p, file->f_flags & O_NONBLOCK); } EXPORT_SYMBOL_GPL(vb2_ioctl_dqbuf); int vb2_ioctl_streamon(struct file *file, void *priv, enum v4l2_buf_type i) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_streamon(vdev->queue, i); } EXPORT_SYMBOL_GPL(vb2_ioctl_streamon); int vb2_ioctl_streamoff(struct file *file, void *priv, enum v4l2_buf_type i) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_streamoff(vdev->queue, i); } EXPORT_SYMBOL_GPL(vb2_ioctl_streamoff); int vb2_ioctl_expbuf(struct file *file, void *priv, struct v4l2_exportbuffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_expbuf(vdev->queue, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_expbuf); /* v4l2_file_operations helpers */ int vb2_fop_mmap(struct file *file, struct vm_area_struct *vma) { struct video_device *vdev = video_devdata(file); return vb2_mmap(vdev->queue, vma); } EXPORT_SYMBOL_GPL(vb2_fop_mmap); int _vb2_fop_release(struct file *file, struct mutex *lock) { struct video_device *vdev = video_devdata(file); if (lock) mutex_lock(lock); if (!vdev->queue->owner || file->private_data == vdev->queue->owner) { vb2_queue_release(vdev->queue); vdev->queue->owner = NULL; } if (lock) mutex_unlock(lock); return v4l2_fh_release(file); } EXPORT_SYMBOL_GPL(_vb2_fop_release); int vb2_fop_release(struct file *file) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; return _vb2_fop_release(file, lock); } EXPORT_SYMBOL_GPL(vb2_fop_release); ssize_t vb2_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; int err = -EBUSY; if (!(vdev->queue->io_modes & VB2_WRITE)) return -EINVAL; if (lock && mutex_lock_interruptible(lock)) return -ERESTARTSYS; if (vb2_queue_is_busy(vdev->queue, file)) goto exit; err = vb2_write(vdev->queue, buf, count, ppos, file->f_flags & O_NONBLOCK); if (vdev->queue->fileio) vdev->queue->owner = file->private_data; exit: if (lock) mutex_unlock(lock); return err; } EXPORT_SYMBOL_GPL(vb2_fop_write); ssize_t vb2_fop_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; int err = -EBUSY; if (!(vdev->queue->io_modes & VB2_READ)) return -EINVAL; if (lock && mutex_lock_interruptible(lock)) return -ERESTARTSYS; if (vb2_queue_is_busy(vdev->queue, file)) goto exit; vdev->queue->owner = file->private_data; err = vb2_read(vdev->queue, buf, count, ppos, file->f_flags & O_NONBLOCK); if (!vdev->queue->fileio) vdev->queue->owner = NULL; exit: if (lock) mutex_unlock(lock); return err; } EXPORT_SYMBOL_GPL(vb2_fop_read); __poll_t vb2_fop_poll(struct file *file, poll_table *wait) { struct video_device *vdev = video_devdata(file); struct vb2_queue *q = vdev->queue; struct mutex *lock = q->lock ? q->lock : vdev->lock; __poll_t res; void *fileio; /* * If this helper doesn't know how to lock, then you shouldn't be using * it but you should write your own. */ WARN_ON(!lock); if (lock && mutex_lock_interruptible(lock)) return EPOLLERR; fileio = q->fileio; res = vb2_poll(vdev->queue, file, wait); /* If fileio was started, then we have a new queue owner. */ if (!fileio && q->fileio) q->owner = file->private_data; if (lock) mutex_unlock(lock); return res; } EXPORT_SYMBOL_GPL(vb2_fop_poll); #ifndef CONFIG_MMU unsigned long vb2_fop_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct video_device *vdev = video_devdata(file); return vb2_get_unmapped_area(vdev->queue, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(vb2_fop_get_unmapped_area); #endif void vb2_video_unregister_device(struct video_device *vdev) { /* Check if vdev was ever registered at all */ if (!vdev || !video_is_registered(vdev)) return; /* * Calling this function only makes sense if vdev->queue is set. * If it is NULL, then just call video_unregister_device() instead. */ WARN_ON(!vdev->queue); /* * Take a reference to the device since video_unregister_device() * calls device_unregister(), but we don't want that to release * the device since we want to clean up the queue first. */ get_device(&vdev->dev); video_unregister_device(vdev); if (vdev->queue) { struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; if (lock) mutex_lock(lock); vb2_queue_release(vdev->queue); vdev->queue->owner = NULL; if (lock) mutex_unlock(lock); } /* * Now we put the device, and in most cases this will release * everything. */ put_device(&vdev->dev); } EXPORT_SYMBOL_GPL(vb2_video_unregister_device); /* vb2_ops helpers. Only use if vq->lock is non-NULL. */ void vb2_ops_wait_prepare(struct vb2_queue *vq) { mutex_unlock(vq->lock); } EXPORT_SYMBOL_GPL(vb2_ops_wait_prepare); void vb2_ops_wait_finish(struct vb2_queue *vq) { mutex_lock(vq->lock); } EXPORT_SYMBOL_GPL(vb2_ops_wait_finish); /* * Note that this function is called during validation time and * thus the req_queue_mutex is held to ensure no request objects * can be added or deleted while validating. So there is no need * to protect the objects list. */ int vb2_request_validate(struct media_request *req) { struct media_request_object *obj; int ret = 0; if (!vb2_request_buffer_cnt(req)) return -ENOENT; list_for_each_entry(obj, &req->objects, list) { if (!obj->ops->prepare) continue; ret = obj->ops->prepare(obj); if (ret) break; } if (ret) { list_for_each_entry_continue_reverse(obj, &req->objects, list) if (obj->ops->unprepare) obj->ops->unprepare(obj); return ret; } return 0; } EXPORT_SYMBOL_GPL(vb2_request_validate); void vb2_request_queue(struct media_request *req) { struct media_request_object *obj, *obj_safe; /* * Queue all objects. Note that buffer objects are at the end of the * objects list, after all other object types. Once buffer objects * are queued, the driver might delete them immediately (if the driver * processes the buffer at once), so we have to use * list_for_each_entry_safe() to handle the case where the object we * queue is deleted. */ list_for_each_entry_safe(obj, obj_safe, &req->objects, list) if (obj->ops->queue) obj->ops->queue(obj); } EXPORT_SYMBOL_GPL(vb2_request_queue); MODULE_DESCRIPTION("Driver helper framework for Video for Linux 2"); MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>, Marek Szyprowski"); MODULE_LICENSE("GPL");
152 189 182 186 12 197 71 36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 /* SPDX-License-Identifier: GPL-2.0-only */ /* * vivid-core.h - core datastructures * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #ifndef _VIVID_CORE_H_ #define _VIVID_CORE_H_ #include <linux/fb.h> #include <linux/workqueue.h> #include <media/cec.h> #include <media/videobuf2-v4l2.h> #include <media/v4l2-device.h> #include <media/v4l2-dev.h> #include <media/v4l2-ctrls.h> #include <media/tpg/v4l2-tpg.h> #include "vivid-rds-gen.h" #include "vivid-vbi-gen.h" #define dprintk(dev, level, fmt, arg...) \ v4l2_dbg(level, vivid_debug, &dev->v4l2_dev, fmt, ## arg) /* The maximum number of inputs */ #define MAX_INPUTS 16 /* The maximum number of outputs */ #define MAX_OUTPUTS 16 /* The maximum number of video capture buffers */ #define MAX_VID_CAP_BUFFERS 64 /* The maximum up or down scaling factor is 4 */ #define MAX_ZOOM 4 /* The maximum image width/height are set to 4K DMT */ #define MAX_WIDTH 4096 #define MAX_HEIGHT 2160 /* The minimum image width/height */ #define MIN_WIDTH 16 #define MIN_HEIGHT MIN_WIDTH /* Pixel Array control divider */ #define PIXEL_ARRAY_DIV MIN_WIDTH /* The data_offset of plane 0 for the multiplanar formats */ #define PLANE0_DATA_OFFSET 128 /* The supported TV frequency range in MHz */ #define MIN_TV_FREQ (44U * 16U) #define MAX_TV_FREQ (958U * 16U) /* The number of samples returned in every SDR buffer */ #define SDR_CAP_SAMPLES_PER_BUF 0x4000 /* used by the threads to know when to resync internal counters */ #define JIFFIES_PER_DAY (3600U * 24U * HZ) #define JIFFIES_RESYNC (JIFFIES_PER_DAY * (0xf0000000U / JIFFIES_PER_DAY)) /* * Maximum number of HDMI inputs allowed by vivid, due to limitations * of the Physical Address in the EDID and used by CEC we stop at 15 * inputs and outputs. */ #define MAX_HDMI_INPUTS 15 #define MAX_HDMI_OUTPUTS 15 /* Maximum number of S-Video inputs allowed by vivid */ #define MAX_SVID_INPUTS 16 /* The maximum number of items in a menu control */ #define MAX_MENU_ITEMS BITS_PER_LONG_LONG /* Number of fixed menu items in the 'Connected To' menu controls */ #define FIXED_MENU_ITEMS 2 /* The maximum number of vivid devices */ #define VIVID_MAX_DEVS CONFIG_VIDEO_VIVID_MAX_DEVS extern const struct v4l2_rect vivid_min_rect; extern const struct v4l2_rect vivid_max_rect; extern unsigned vivid_debug; /* * NULL-terminated string array for the HDMI 'Connected To' menu controls * with the list of possible HDMI outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_hdmi_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all HDMI outputs that are in use */ extern u64 hdmi_to_output_menu_skip_mask; /* * Bitmask of which vivid instances need to update any connected * HDMI outputs. */ extern u64 hdmi_input_update_outputs_mask; /* * Spinlock for access to hdmi_to_output_menu_skip_mask and * hdmi_input_update_outputs_mask. */ extern spinlock_t hdmi_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the HDMI menu skip mask * changes. */ extern struct workqueue_struct *update_hdmi_ctrls_workqueue; /* * The HDMI menu control value (index in the menu list) maps to an HDMI * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_hdmi_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_hdmi_to_output_index[MAX_MENU_ITEMS]; /* * NULL-terminated string array for the S-Video 'Connected To' menu controls * with the list of possible S-Video outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_svid_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all S-Video outputs that are in use */ extern u64 svid_to_output_menu_skip_mask; /* Spinlock for access to svid_to_output_menu_skip_mask */ extern spinlock_t svid_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the S-Video menu skip mask * changes. */ extern struct workqueue_struct *update_svid_ctrls_workqueue; /* * The S-Video menu control value (index in the menu list) maps to an S-Video * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_svid_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_svid_to_output_index[MAX_MENU_ITEMS]; extern struct vivid_dev *vivid_devs[VIVID_MAX_DEVS]; extern unsigned int n_devs; struct vivid_fmt { u32 fourcc; /* v4l2 format id */ enum tgp_color_enc color_enc; bool can_do_overlay; u8 vdownsampling[TPG_MAX_PLANES]; u32 alpha_mask; u8 planes; u8 buffers; u32 data_offset[TPG_MAX_PLANES]; u32 bit_depth[TPG_MAX_PLANES]; }; extern struct vivid_fmt vivid_formats[]; /* buffer for one video frame */ struct vivid_buffer { /* common v4l buffer stuff -- must be first */ struct vb2_v4l2_buffer vb; struct list_head list; }; enum vivid_input { WEBCAM, TV, SVID, HDMI, }; enum vivid_signal_mode { CURRENT_DV_TIMINGS, CURRENT_STD = CURRENT_DV_TIMINGS, NO_SIGNAL, NO_LOCK, OUT_OF_RANGE, SELECTED_DV_TIMINGS, SELECTED_STD = SELECTED_DV_TIMINGS, CYCLE_DV_TIMINGS, CYCLE_STD = CYCLE_DV_TIMINGS, CUSTOM_DV_TIMINGS, }; enum vivid_colorspace { VIVID_CS_170M, VIVID_CS_709, VIVID_CS_SRGB, VIVID_CS_OPRGB, VIVID_CS_2020, VIVID_CS_DCI_P3, VIVID_CS_240M, VIVID_CS_SYS_M, VIVID_CS_SYS_BG, }; #define VIVID_INVALID_SIGNAL(mode) \ ((mode) == NO_SIGNAL || (mode) == NO_LOCK || (mode) == OUT_OF_RANGE) struct vivid_cec_xfer { struct cec_adapter *adap; u8 msg[CEC_MAX_MSG_SIZE]; u32 len; u32 sft; }; struct vivid_dev { u8 inst; struct v4l2_device v4l2_dev; #ifdef CONFIG_MEDIA_CONTROLLER struct media_device mdev; struct media_pad vid_cap_pad; struct media_pad vid_out_pad; struct media_pad vbi_cap_pad; struct media_pad vbi_out_pad; struct media_pad sdr_cap_pad; struct media_pad meta_cap_pad; struct media_pad meta_out_pad; struct media_pad touch_cap_pad; #endif struct v4l2_ctrl_handler ctrl_hdl_user_gen; struct v4l2_ctrl_handler ctrl_hdl_user_vid; struct v4l2_ctrl_handler ctrl_hdl_user_aud; struct v4l2_ctrl_handler ctrl_hdl_streaming; struct v4l2_ctrl_handler ctrl_hdl_sdtv_cap; struct v4l2_ctrl_handler ctrl_hdl_loop_cap; struct v4l2_ctrl_handler ctrl_hdl_fb; struct video_device vid_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_cap; struct video_device vid_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_out; struct video_device vbi_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_cap; struct video_device vbi_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_out; struct video_device radio_rx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_rx; struct video_device radio_tx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_tx; struct video_device sdr_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_sdr_cap; struct video_device meta_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_cap; struct video_device meta_out_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_out; struct video_device touch_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_touch_cap; spinlock_t slock; struct mutex mutex; struct work_struct update_hdmi_ctrl_work; struct work_struct update_svid_ctrl_work; /* capabilities */ u32 vid_cap_caps; u32 vid_out_caps; u32 vbi_cap_caps; u32 vbi_out_caps; u32 sdr_cap_caps; u32 radio_rx_caps; u32 radio_tx_caps; u32 meta_cap_caps; u32 meta_out_caps; u32 touch_cap_caps; /* supported features */ bool multiplanar; u8 num_inputs; u8 num_hdmi_inputs; u8 num_svid_inputs; u8 input_type[MAX_INPUTS]; u8 input_name_counter[MAX_INPUTS]; u8 num_outputs; u8 num_hdmi_outputs; u8 output_type[MAX_OUTPUTS]; u8 output_name_counter[MAX_OUTPUTS]; bool has_audio_inputs; bool has_audio_outputs; bool has_vid_cap; bool has_vid_out; bool has_vbi_cap; bool has_raw_vbi_cap; bool has_sliced_vbi_cap; bool has_vbi_out; bool has_raw_vbi_out; bool has_sliced_vbi_out; bool has_radio_rx; bool has_radio_tx; bool has_sdr_cap; bool has_fb; bool has_meta_cap; bool has_meta_out; bool has_tv_tuner; bool has_touch_cap; /* Output index (0-MAX_OUTPUTS) to vivid instance of connected input */ struct vivid_dev *output_to_input_instance[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to input index (0-MAX_INPUTS) of connected input */ u8 output_to_input_index[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to HDMI or S-Video output index (0-MAX_HDMI/SVID_OUTPUTS) */ u8 output_to_iface_index[MAX_OUTPUTS]; /* ctrl_hdmi_to_output or ctrl_svid_to_output control value for each input */ s32 input_is_connected_to_output[MAX_INPUTS]; /* HDMI index (0-MAX_HDMI_OUTPUTS) to output index (0-MAX_OUTPUTS) */ u8 hdmi_index_to_output_index[MAX_HDMI_OUTPUTS]; /* HDMI index (0-MAX_HDMI_INPUTS) to input index (0-MAX_INPUTS) */ u8 hdmi_index_to_input_index[MAX_HDMI_INPUTS]; /* S-Video index (0-MAX_SVID_INPUTS) to input index (0-MAX_INPUTS) */ u8 svid_index_to_input_index[MAX_SVID_INPUTS]; /* controls */ struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; struct v4l2_ctrl *saturation; struct v4l2_ctrl *hue; struct { /* autogain/gain cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *gain; }; struct v4l2_ctrl *volume; struct v4l2_ctrl *mute; struct v4l2_ctrl *alpha; struct v4l2_ctrl *button; struct v4l2_ctrl *boolean; struct v4l2_ctrl *int32; struct v4l2_ctrl *int64; struct v4l2_ctrl *menu; struct v4l2_ctrl *string; struct v4l2_ctrl *bitmask; struct v4l2_ctrl *int_menu; struct v4l2_ctrl *ro_int32; struct v4l2_ctrl *pixel_array; struct v4l2_ctrl *test_pattern; struct v4l2_ctrl *colorspace; struct v4l2_ctrl *rgb_range_cap; struct v4l2_ctrl *real_rgb_range_cap; struct { /* std_signal_mode/standard cluster */ struct v4l2_ctrl *ctrl_std_signal_mode; struct v4l2_ctrl *ctrl_standard; }; struct { /* dv_timings_signal_mode/timings cluster */ struct v4l2_ctrl *ctrl_dv_timings_signal_mode; struct v4l2_ctrl *ctrl_dv_timings; }; struct v4l2_ctrl *ctrl_has_crop_cap; struct v4l2_ctrl *ctrl_has_compose_cap; struct v4l2_ctrl *ctrl_has_scaler_cap; struct v4l2_ctrl *ctrl_has_crop_out; struct v4l2_ctrl *ctrl_has_compose_out; struct v4l2_ctrl *ctrl_has_scaler_out; struct v4l2_ctrl *ctrl_tx_mode; struct v4l2_ctrl *ctrl_tx_rgb_range; struct v4l2_ctrl *ctrl_tx_edid_present; struct v4l2_ctrl *ctrl_tx_hotplug; struct v4l2_ctrl *ctrl_tx_rxsense; struct v4l2_ctrl *ctrl_rx_power_present; struct v4l2_ctrl *radio_tx_rds_pi; struct v4l2_ctrl *radio_tx_rds_pty; struct v4l2_ctrl *radio_tx_rds_mono_stereo; struct v4l2_ctrl *radio_tx_rds_art_head; struct v4l2_ctrl *radio_tx_rds_compressed; struct v4l2_ctrl *radio_tx_rds_dyn_pty; struct v4l2_ctrl *radio_tx_rds_ta; struct v4l2_ctrl *radio_tx_rds_tp; struct v4l2_ctrl *radio_tx_rds_ms; struct v4l2_ctrl *radio_tx_rds_psname; struct v4l2_ctrl *radio_tx_rds_radiotext; struct v4l2_ctrl *radio_rx_rds_pty; struct v4l2_ctrl *radio_rx_rds_ta; struct v4l2_ctrl *radio_rx_rds_tp; struct v4l2_ctrl *radio_rx_rds_ms; struct v4l2_ctrl *radio_rx_rds_psname; struct v4l2_ctrl *radio_rx_rds_radiotext; struct v4l2_ctrl *ctrl_hdmi_to_output[MAX_HDMI_INPUTS]; char ctrl_hdmi_to_output_names[MAX_HDMI_INPUTS][32]; struct v4l2_ctrl *ctrl_svid_to_output[MAX_SVID_INPUTS]; char ctrl_svid_to_output_names[MAX_SVID_INPUTS][32]; unsigned input_brightness[MAX_INPUTS]; unsigned osd_mode; unsigned button_pressed; bool sensor_hflip; bool sensor_vflip; bool hflip; bool vflip; bool vbi_cap_interlaced; bool loop_video; bool reduced_fps; /* Framebuffer */ unsigned long video_pbase; void *video_vbase; u32 video_buffer_size; int display_width; int display_height; int display_byte_stride; int bits_per_pixel; int bytes_per_pixel; #ifdef CONFIG_VIDEO_VIVID_OSD struct fb_info fb_info; struct fb_var_screeninfo fb_defined; struct fb_fix_screeninfo fb_fix; #endif /* Error injection */ bool disconnect_error; bool queue_setup_error; bool buf_prepare_error; bool start_streaming_error; bool dqbuf_error; bool req_validate_error; bool seq_wrap; u64 time_wrap; u64 time_wrap_offset; unsigned perc_dropped_buffers; enum vivid_signal_mode std_signal_mode[MAX_INPUTS]; unsigned int query_std_last[MAX_INPUTS]; v4l2_std_id query_std[MAX_INPUTS]; enum tpg_video_aspect std_aspect_ratio[MAX_INPUTS]; enum vivid_signal_mode dv_timings_signal_mode[MAX_INPUTS]; char **query_dv_timings_qmenu; char *query_dv_timings_qmenu_strings; unsigned query_dv_timings_size; unsigned int query_dv_timings_last[MAX_INPUTS]; unsigned int query_dv_timings[MAX_INPUTS]; enum tpg_video_aspect dv_timings_aspect_ratio[MAX_INPUTS]; /* Input */ unsigned input; v4l2_std_id std_cap[MAX_INPUTS]; struct v4l2_dv_timings dv_timings_cap[MAX_INPUTS]; int dv_timings_cap_sel[MAX_INPUTS]; u32 service_set_cap; struct vivid_vbi_gen_data vbi_gen; u8 *edid; unsigned edid_blocks; unsigned edid_max_blocks; unsigned webcam_size_idx; unsigned webcam_ival_idx; unsigned tv_freq; unsigned tv_audmode; unsigned tv_field_cap; unsigned tv_audio_input; u32 power_present; /* Output */ unsigned output; v4l2_std_id std_out; struct v4l2_dv_timings dv_timings_out; u32 colorspace_out; u32 ycbcr_enc_out; u32 hsv_enc_out; u32 quantization_out; u32 xfer_func_out; u32 service_set_out; unsigned bytesperline_out[TPG_MAX_PLANES]; unsigned tv_field_out; unsigned tv_audio_output; bool vbi_out_have_wss; u8 vbi_out_wss[2]; bool vbi_out_have_cc[2]; u8 vbi_out_cc[2][2]; bool dvi_d_out; u8 *scaled_line; u8 *blended_line; unsigned cur_scaled_line; /* Output Overlay */ void *fb_vbase_out; bool overlay_out_enabled; int overlay_out_top, overlay_out_left; unsigned fbuf_out_flags; u32 chromakey_out; u8 global_alpha_out; /* video capture */ struct tpg_data tpg; unsigned ms_vid_cap; bool must_blank[MAX_VID_CAP_BUFFERS]; const struct vivid_fmt *fmt_cap; struct v4l2_fract timeperframe_vid_cap; enum v4l2_field field_cap; struct v4l2_rect src_rect; struct v4l2_rect fmt_cap_rect; struct v4l2_rect crop_cap; struct v4l2_rect compose_cap; struct v4l2_rect crop_bounds_cap; struct vb2_queue vb_vid_cap_q; struct list_head vid_cap_active; struct vb2_queue vb_vbi_cap_q; struct list_head vbi_cap_active; struct vb2_queue vb_meta_cap_q; struct list_head meta_cap_active; struct vb2_queue vb_touch_cap_q; struct list_head touch_cap_active; /* thread for generating video capture stream */ struct task_struct *kthread_vid_cap; unsigned long jiffies_vid_cap; u64 cap_stream_start; u64 cap_frame_period; u64 cap_frame_eof_offset; u32 cap_seq_offset; u32 cap_seq_count; bool cap_seq_resync; u32 vid_cap_seq_start; u32 vid_cap_seq_count; bool vid_cap_streaming; u32 vbi_cap_seq_start; u32 vbi_cap_seq_count; bool vbi_cap_streaming; u32 meta_cap_seq_start; u32 meta_cap_seq_count; bool meta_cap_streaming; /* Touch capture */ struct task_struct *kthread_touch_cap; unsigned long jiffies_touch_cap; u64 touch_cap_stream_start; u32 touch_cap_seq_offset; bool touch_cap_seq_resync; u32 touch_cap_seq_start; u32 touch_cap_seq_count; u32 touch_cap_with_seq_wrap_count; bool touch_cap_streaming; struct v4l2_fract timeperframe_tch_cap; struct v4l2_pix_format tch_format; int tch_pat_random; /* video output */ const struct vivid_fmt *fmt_out; struct v4l2_fract timeperframe_vid_out; enum v4l2_field field_out; struct v4l2_rect sink_rect; struct v4l2_rect fmt_out_rect; struct v4l2_rect crop_out; struct v4l2_rect compose_out; struct v4l2_rect compose_bounds_out; struct vb2_queue vb_vid_out_q; struct list_head vid_out_active; struct vb2_queue vb_vbi_out_q; struct list_head vbi_out_active; struct vb2_queue vb_meta_out_q; struct list_head meta_out_active; /* video loop precalculated rectangles */ /* * Intersection between what the output side composes and the capture side * crops. I.e., what actually needs to be copied from the output buffer to * the capture buffer. */ struct v4l2_rect loop_vid_copy; /* The part of the output buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_out; /* The part of the capture buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_cap; /* * The intersection of the framebuffer, the overlay output window and * loop_vid_copy. I.e., the part of the framebuffer that actually should be * blended with the compose_out rectangle. This uses the framebuffer origin. */ struct v4l2_rect loop_fb_copy; /* The same as loop_fb_copy but with compose_out origin. */ struct v4l2_rect loop_vid_overlay; /* * The part of the capture buffer that (after scaling) corresponds * to loop_vid_overlay. */ struct v4l2_rect loop_vid_overlay_cap; /* thread for generating video output stream */ struct task_struct *kthread_vid_out; unsigned long jiffies_vid_out; u32 out_seq_offset; u32 out_seq_count; bool out_seq_resync; u32 vid_out_seq_start; u32 vid_out_seq_count; bool vid_out_streaming; u32 vbi_out_seq_start; u32 vbi_out_seq_count; bool vbi_out_streaming; bool stream_sliced_vbi_out; u32 meta_out_seq_start; u32 meta_out_seq_count; bool meta_out_streaming; /* SDR capture */ struct vb2_queue vb_sdr_cap_q; struct list_head sdr_cap_active; u32 sdr_pixelformat; /* v4l2 format id */ unsigned sdr_buffersize; unsigned sdr_adc_freq; unsigned sdr_fm_freq; unsigned sdr_fm_deviation; int sdr_fixp_src_phase; int sdr_fixp_mod_phase; bool tstamp_src_is_soe; bool has_crop_cap; bool has_compose_cap; bool has_scaler_cap; bool has_crop_out; bool has_compose_out; bool has_scaler_out; /* thread for generating SDR stream */ struct task_struct *kthread_sdr_cap; unsigned long jiffies_sdr_cap; u32 sdr_cap_seq_offset; u32 sdr_cap_seq_start; u32 sdr_cap_seq_count; u32 sdr_cap_with_seq_wrap_count; bool sdr_cap_seq_resync; /* RDS generator */ struct vivid_rds_gen rds_gen; /* Radio receiver */ unsigned radio_rx_freq; unsigned radio_rx_audmode; int radio_rx_sig_qual; unsigned radio_rx_hw_seek_mode; bool radio_rx_hw_seek_prog_lim; bool radio_rx_rds_controls; bool radio_rx_rds_enabled; unsigned radio_rx_rds_use_alternates; unsigned radio_rx_rds_last_block; struct v4l2_fh *radio_rx_rds_owner; /* Radio transmitter */ unsigned radio_tx_freq; unsigned radio_tx_subchans; bool radio_tx_rds_controls; unsigned radio_tx_rds_last_block; struct v4l2_fh *radio_tx_rds_owner; /* Shared between radio receiver and transmitter */ bool radio_rds_loop; ktime_t radio_rds_init_time; /* CEC */ struct cec_adapter *cec_rx_adap; struct cec_adapter *cec_tx_adap[MAX_HDMI_OUTPUTS]; struct task_struct *kthread_cec; wait_queue_head_t kthread_waitq_cec; struct vivid_cec_xfer xfers[MAX_OUTPUTS]; spinlock_t cec_xfers_slock; /* read and write cec messages */ u32 cec_sft; /* bus signal free time, in bit periods */ u8 last_initiator; /* CEC OSD String */ char osd[14]; unsigned long osd_jiffies; bool meta_pts; bool meta_scr; }; static inline bool vivid_is_webcam(const struct vivid_dev *dev) { return dev->input_type[dev->input] == WEBCAM; } static inline bool vivid_is_tv_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == TV; } static inline bool vivid_is_svid_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == SVID; } static inline bool vivid_is_hdmi_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == HDMI; } static inline bool vivid_is_sdtv_cap(const struct vivid_dev *dev) { return vivid_is_tv_cap(dev) || vivid_is_svid_cap(dev); } static inline bool vivid_is_svid_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == SVID; } static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == HDMI; } #endif
28399 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM capability #if !defined(_TRACE_CAPABILITY_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_CAPABILITY_H #include <linux/cred.h> #include <linux/tracepoint.h> #include <linux/user_namespace.h> /** * cap_capable - called after it's determined if a task has a particular * effective capability * * @cred: The credentials used * @target_ns: The user namespace of the resource being accessed * @capable_ns: The user namespace in which the credential provides the * capability to access the targeted resource. * This will be NULL if ret is not 0. * @cap: The capability to check for * @ret: The return value of the check: 0 if it does, -ve if it does not * * Allows to trace calls to cap_capable in commoncap.c */ TRACE_EVENT(cap_capable, TP_PROTO(const struct cred *cred, struct user_namespace *target_ns, const struct user_namespace *capable_ns, int cap, int ret), TP_ARGS(cred, target_ns, capable_ns, cap, ret), TP_STRUCT__entry( __field(const struct cred *, cred) __field(struct user_namespace *, target_ns) __field(const struct user_namespace *, capable_ns) __field(int, cap) __field(int, ret) ), TP_fast_assign( __entry->cred = cred; __entry->target_ns = target_ns; __entry->capable_ns = ret == 0 ? capable_ns : NULL; __entry->cap = cap; __entry->ret = ret; ), TP_printk("cred %p, target_ns %p, capable_ns %p, cap %d, ret %d", __entry->cred, __entry->target_ns, __entry->capable_ns, __entry->cap, __entry->ret) ); #endif /* _TRACE_CAPABILITY_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
10 10 9 6 3 10 10 2 1 1 8 4 2 6 7 1 1 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 // SPDX-License-Identifier: GPL-2.0+ /* * Driver for USB Mass Storage compliant devices * * Current development and maintenance by: * (c) 1999-2002 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * * Developed with the assistance of: * (c) 2000 David L. Brown, Jr. (usb-storage@davidb.org) * (c) 2000 Stephen J. Gowdy (SGowdy@lbl.gov) * (c) 2002 Alan Stern <stern@rowland.org> * * Initial work by: * (c) 1999 Michael Gee (michael@linuxspecific.com) * * This driver is based on the 'USB Mass Storage Class' document. This * describes in detail the protocol used to communicate with such * devices. Clearly, the designers had SCSI and ATAPI commands in * mind when they created this document. The commands are all very * similar to commands in the SCSI-II and ATAPI specifications. * * It is important to note that in a number of cases this class * exhibits class-specific exemptions from the USB specification. * Notably the usage of NAK, STALL and ACK differs from the norm, in * that they are used to communicate wait, failed and OK on commands. * * Also, for certain devices, the interrupt endpoint is used to convey * status of a command. */ #include <linux/sched.h> #include <linux/gfp.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/usb/quirks.h> #include <scsi/scsi.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_device.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "scsiglue.h" #include "debug.h" #include <linux/blkdev.h> #include "../../scsi/sd.h" /*********************************************************************** * Data transfer routines ***********************************************************************/ /* * This is subtle, so pay attention: * --------------------------------- * We're very concerned about races with a command abort. Hanging this code * is a sure fire way to hang the kernel. (Note that this discussion applies * only to transactions resulting from a scsi queued-command, since only * these transactions are subject to a scsi abort. Other transactions, such * as those occurring during device-specific initialization, must be handled * by a separate code path.) * * The abort function (usb_storage_command_abort() in scsiglue.c) first * sets the machine state and the ABORTING bit in us->dflags to prevent * new URBs from being submitted. It then calls usb_stor_stop_transport() * below, which atomically tests-and-clears the URB_ACTIVE bit in us->dflags * to see if the current_urb needs to be stopped. Likewise, the SG_ACTIVE * bit is tested to see if the current_sg scatter-gather request needs to be * stopped. The timeout callback routine does much the same thing. * * When a disconnect occurs, the DISCONNECTING bit in us->dflags is set to * prevent new URBs from being submitted, and usb_stor_stop_transport() is * called to stop any ongoing requests. * * The submit function first verifies that the submitting is allowed * (neither ABORTING nor DISCONNECTING bits are set) and that the submit * completes without errors, and only then sets the URB_ACTIVE bit. This * prevents the stop_transport() function from trying to cancel the URB * while the submit call is underway. Next, the submit function must test * the flags to see if an abort or disconnect occurred during the submission * or before the URB_ACTIVE bit was set. If so, it's essential to cancel * the URB if it hasn't been cancelled already (i.e., if the URB_ACTIVE bit * is still set). Either way, the function must then wait for the URB to * finish. Note that the URB can still be in progress even after a call to * usb_unlink_urb() returns. * * The idea is that (1) once the ABORTING or DISCONNECTING bit is set, * either the stop_transport() function or the submitting function * is guaranteed to call usb_unlink_urb() for an active URB, * and (2) test_and_clear_bit() prevents usb_unlink_urb() from being * called more than once or from being called during usb_submit_urb(). */ /* * This is the completion handler which will wake us up when an URB * completes. */ static void usb_stor_blocking_completion(struct urb *urb) { struct completion *urb_done_ptr = urb->context; complete(urb_done_ptr); } /* * This is the common part of the URB message submission code * * All URBs from the usb-storage driver involved in handling a queued scsi * command _must_ pass through this function (or something like it) for the * abort mechanisms to work properly. */ static int usb_stor_msg_common(struct us_data *us, int timeout) { struct completion urb_done; long timeleft; int status; /* don't submit URBs during abort processing */ if (test_bit(US_FLIDX_ABORTING, &us->dflags)) return -EIO; /* set up data structures for the wakeup system */ init_completion(&urb_done); /* fill the common fields in the URB */ us->current_urb->context = &urb_done; us->current_urb->transfer_flags = 0; /* * we assume that if transfer_buffer isn't us->iobuf then it * hasn't been mapped for DMA. Yes, this is clunky, but it's * easier than always having the caller tell us whether the * transfer buffer has already been mapped. */ if (us->current_urb->transfer_buffer == us->iobuf) us->current_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; us->current_urb->transfer_dma = us->iobuf_dma; /* submit the URB */ status = usb_submit_urb(us->current_urb, GFP_NOIO); if (status) { /* something went wrong */ return status; } /* * since the URB has been submitted successfully, it's now okay * to cancel it */ set_bit(US_FLIDX_URB_ACTIVE, &us->dflags); /* did an abort occur during the submission? */ if (test_bit(US_FLIDX_ABORTING, &us->dflags)) { /* cancel the URB, if it hasn't been cancelled already */ if (test_and_clear_bit(US_FLIDX_URB_ACTIVE, &us->dflags)) { usb_stor_dbg(us, "-- cancelling URB\n"); usb_unlink_urb(us->current_urb); } } /* wait for the completion of the URB */ timeleft = wait_for_completion_interruptible_timeout( &urb_done, timeout ? : MAX_SCHEDULE_TIMEOUT); clear_bit(US_FLIDX_URB_ACTIVE, &us->dflags); if (timeleft <= 0) { usb_stor_dbg(us, "%s -- cancelling URB\n", timeleft == 0 ? "Timeout" : "Signal"); usb_kill_urb(us->current_urb); } /* return the URB status */ return us->current_urb->status; } /* * Transfer one control message, with timeouts, and allowing early * termination. Return codes are usual -Exxx, *not* USB_STOR_XFER_xxx. */ int usb_stor_control_msg(struct us_data *us, unsigned int pipe, u8 request, u8 requesttype, u16 value, u16 index, void *data, u16 size, int timeout) { int status; usb_stor_dbg(us, "rq=%02x rqtype=%02x value=%04x index=%02x len=%u\n", request, requesttype, value, index, size); /* fill in the devrequest structure */ us->cr->bRequestType = requesttype; us->cr->bRequest = request; us->cr->wValue = cpu_to_le16(value); us->cr->wIndex = cpu_to_le16(index); us->cr->wLength = cpu_to_le16(size); /* fill and submit the URB */ usb_fill_control_urb(us->current_urb, us->pusb_dev, pipe, (unsigned char*) us->cr, data, size, usb_stor_blocking_completion, NULL); status = usb_stor_msg_common(us, timeout); /* return the actual length of the data transferred if no error */ if (status == 0) status = us->current_urb->actual_length; return status; } EXPORT_SYMBOL_GPL(usb_stor_control_msg); /* * This is a version of usb_clear_halt() that allows early termination and * doesn't read the status from the device -- this is because some devices * crash their internal firmware when the status is requested after a halt. * * A definitive list of these 'bad' devices is too difficult to maintain or * make complete enough to be useful. This problem was first observed on the * Hagiwara FlashGate DUAL unit. However, bus traces reveal that neither * MacOS nor Windows checks the status after clearing a halt. * * Since many vendors in this space limit their testing to interoperability * with these two OSes, specification violations like this one are common. */ int usb_stor_clear_halt(struct us_data *us, unsigned int pipe) { int result; int endp = usb_pipeendpoint(pipe); if (usb_pipein (pipe)) endp |= USB_DIR_IN; result = usb_stor_control_msg(us, us->send_ctrl_pipe, USB_REQ_CLEAR_FEATURE, USB_RECIP_ENDPOINT, USB_ENDPOINT_HALT, endp, NULL, 0, 3*HZ); if (result >= 0) usb_reset_endpoint(us->pusb_dev, endp); usb_stor_dbg(us, "result = %d\n", result); return result; } EXPORT_SYMBOL_GPL(usb_stor_clear_halt); /* * Interpret the results of a URB transfer * * This function prints appropriate debugging messages, clears halts on * non-control endpoints, and translates the status to the corresponding * USB_STOR_XFER_xxx return code. */ static int interpret_urb_result(struct us_data *us, unsigned int pipe, unsigned int length, int result, unsigned int partial) { usb_stor_dbg(us, "Status code %d; transferred %u/%u\n", result, partial, length); switch (result) { /* no error code; did we send all the data? */ case 0: if (partial != length) { usb_stor_dbg(us, "-- short transfer\n"); return USB_STOR_XFER_SHORT; } usb_stor_dbg(us, "-- transfer complete\n"); return USB_STOR_XFER_GOOD; /* stalled */ case -EPIPE: /* * for control endpoints, (used by CB[I]) a stall indicates * a failed command */ if (usb_pipecontrol(pipe)) { usb_stor_dbg(us, "-- stall on control pipe\n"); return USB_STOR_XFER_STALLED; } /* for other sorts of endpoint, clear the stall */ usb_stor_dbg(us, "clearing endpoint halt for pipe 0x%x\n", pipe); if (usb_stor_clear_halt(us, pipe) < 0) return USB_STOR_XFER_ERROR; return USB_STOR_XFER_STALLED; /* babble - the device tried to send more than we wanted to read */ case -EOVERFLOW: usb_stor_dbg(us, "-- babble\n"); return USB_STOR_XFER_LONG; /* the transfer was cancelled by abort, disconnect, or timeout */ case -ECONNRESET: usb_stor_dbg(us, "-- transfer cancelled\n"); return USB_STOR_XFER_ERROR; /* short scatter-gather read transfer */ case -EREMOTEIO: usb_stor_dbg(us, "-- short read transfer\n"); return USB_STOR_XFER_SHORT; /* abort or disconnect in progress */ case -EIO: usb_stor_dbg(us, "-- abort or disconnect in progress\n"); return USB_STOR_XFER_ERROR; /* the catch-all error case */ default: usb_stor_dbg(us, "-- unknown error\n"); return USB_STOR_XFER_ERROR; } } /* * Transfer one control message, without timeouts, but allowing early * termination. Return codes are USB_STOR_XFER_xxx. */ int usb_stor_ctrl_transfer(struct us_data *us, unsigned int pipe, u8 request, u8 requesttype, u16 value, u16 index, void *data, u16 size) { int result; usb_stor_dbg(us, "rq=%02x rqtype=%02x value=%04x index=%02x len=%u\n", request, requesttype, value, index, size); /* fill in the devrequest structure */ us->cr->bRequestType = requesttype; us->cr->bRequest = request; us->cr->wValue = cpu_to_le16(value); us->cr->wIndex = cpu_to_le16(index); us->cr->wLength = cpu_to_le16(size); /* fill and submit the URB */ usb_fill_control_urb(us->current_urb, us->pusb_dev, pipe, (unsigned char*) us->cr, data, size, usb_stor_blocking_completion, NULL); result = usb_stor_msg_common(us, 0); return interpret_urb_result(us, pipe, size, result, us->current_urb->actual_length); } EXPORT_SYMBOL_GPL(usb_stor_ctrl_transfer); /* * Receive one interrupt buffer, without timeouts, but allowing early * termination. Return codes are USB_STOR_XFER_xxx. * * This routine always uses us->recv_intr_pipe as the pipe and * us->ep_bInterval as the interrupt interval. */ static int usb_stor_intr_transfer(struct us_data *us, void *buf, unsigned int length) { int result; unsigned int pipe = us->recv_intr_pipe; unsigned int maxp; usb_stor_dbg(us, "xfer %u bytes\n", length); /* calculate the max packet size */ maxp = usb_maxpacket(us->pusb_dev, pipe); if (maxp > length) maxp = length; /* fill and submit the URB */ usb_fill_int_urb(us->current_urb, us->pusb_dev, pipe, buf, maxp, usb_stor_blocking_completion, NULL, us->ep_bInterval); result = usb_stor_msg_common(us, 0); return interpret_urb_result(us, pipe, length, result, us->current_urb->actual_length); } /* * Transfer one buffer via bulk pipe, without timeouts, but allowing early * termination. Return codes are USB_STOR_XFER_xxx. If the bulk pipe * stalls during the transfer, the halt is automatically cleared. */ int usb_stor_bulk_transfer_buf(struct us_data *us, unsigned int pipe, void *buf, unsigned int length, unsigned int *act_len) { int result; usb_stor_dbg(us, "xfer %u bytes\n", length); /* fill and submit the URB */ usb_fill_bulk_urb(us->current_urb, us->pusb_dev, pipe, buf, length, usb_stor_blocking_completion, NULL); result = usb_stor_msg_common(us, 0); /* store the actual length of the data transferred */ if (act_len) *act_len = us->current_urb->actual_length; return interpret_urb_result(us, pipe, length, result, us->current_urb->actual_length); } EXPORT_SYMBOL_GPL(usb_stor_bulk_transfer_buf); /* * Transfer a scatter-gather list via bulk transfer * * This function does basically the same thing as usb_stor_bulk_transfer_buf() * above, but it uses the usbcore scatter-gather library. */ static int usb_stor_bulk_transfer_sglist(struct us_data *us, unsigned int pipe, struct scatterlist *sg, int num_sg, unsigned int length, unsigned int *act_len) { int result; /* don't submit s-g requests during abort processing */ if (test_bit(US_FLIDX_ABORTING, &us->dflags)) goto usb_stor_xfer_error; /* initialize the scatter-gather request block */ usb_stor_dbg(us, "xfer %u bytes, %d entries\n", length, num_sg); result = usb_sg_init(&us->current_sg, us->pusb_dev, pipe, 0, sg, num_sg, length, GFP_NOIO); if (result) { usb_stor_dbg(us, "usb_sg_init returned %d\n", result); goto usb_stor_xfer_error; } /* * since the block has been initialized successfully, it's now * okay to cancel it */ set_bit(US_FLIDX_SG_ACTIVE, &us->dflags); /* did an abort occur during the submission? */ if (test_bit(US_FLIDX_ABORTING, &us->dflags)) { /* cancel the request, if it hasn't been cancelled already */ if (test_and_clear_bit(US_FLIDX_SG_ACTIVE, &us->dflags)) { usb_stor_dbg(us, "-- cancelling sg request\n"); usb_sg_cancel(&us->current_sg); } } /* wait for the completion of the transfer */ usb_sg_wait(&us->current_sg); clear_bit(US_FLIDX_SG_ACTIVE, &us->dflags); result = us->current_sg.status; if (act_len) *act_len = us->current_sg.bytes; return interpret_urb_result(us, pipe, length, result, us->current_sg.bytes); usb_stor_xfer_error: if (act_len) *act_len = 0; return USB_STOR_XFER_ERROR; } /* * Common used function. Transfer a complete command * via usb_stor_bulk_transfer_sglist() above. Set cmnd resid */ int usb_stor_bulk_srb(struct us_data* us, unsigned int pipe, struct scsi_cmnd* srb) { unsigned int partial; int result = usb_stor_bulk_transfer_sglist(us, pipe, scsi_sglist(srb), scsi_sg_count(srb), scsi_bufflen(srb), &partial); scsi_set_resid(srb, scsi_bufflen(srb) - partial); return result; } EXPORT_SYMBOL_GPL(usb_stor_bulk_srb); /* * Transfer an entire SCSI command's worth of data payload over the bulk * pipe. * * Note that this uses usb_stor_bulk_transfer_buf() and * usb_stor_bulk_transfer_sglist() to achieve its goals -- * this function simply determines whether we're going to use * scatter-gather or not, and acts appropriately. */ int usb_stor_bulk_transfer_sg(struct us_data* us, unsigned int pipe, void *buf, unsigned int length_left, int use_sg, int *residual) { int result; unsigned int partial; /* are we scatter-gathering? */ if (use_sg) { /* use the usb core scatter-gather primitives */ result = usb_stor_bulk_transfer_sglist(us, pipe, (struct scatterlist *) buf, use_sg, length_left, &partial); length_left -= partial; } else { /* no scatter-gather, just make the request */ result = usb_stor_bulk_transfer_buf(us, pipe, buf, length_left, &partial); length_left -= partial; } /* store the residual and return the error code */ if (residual) *residual = length_left; return result; } EXPORT_SYMBOL_GPL(usb_stor_bulk_transfer_sg); /*********************************************************************** * Transport routines ***********************************************************************/ /* * There are so many devices that report the capacity incorrectly, * this routine was written to counteract some of the resulting * problems. */ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb) { struct gendisk *disk; struct scsi_disk *sdkp; u32 sector; /* To Report "Medium Error: Record Not Found */ static const unsigned char record_not_found[18] = { [0] = 0x70, /* current error */ [2] = MEDIUM_ERROR, /* = 0x03 */ [7] = 0x0a, /* additional length */ [12] = 0x14 /* Record Not Found */ }; /* * If last-sector problems can't occur, whether because the * capacity was already decremented or because the device is * known to report the correct capacity, then we don't need * to do anything. */ if (!us->use_last_sector_hacks) return; /* Was this command a READ(10) or a WRITE(10)? */ if (srb->cmnd[0] != READ_10 && srb->cmnd[0] != WRITE_10) goto done; /* Did this command access the last sector? */ sector = (srb->cmnd[2] << 24) | (srb->cmnd[3] << 16) | (srb->cmnd[4] << 8) | (srb->cmnd[5]); disk = scsi_cmd_to_rq(srb)->q->disk; if (!disk) goto done; sdkp = scsi_disk(disk); if (!sdkp) goto done; if (sector + 1 != sdkp->capacity) goto done; if (srb->result == SAM_STAT_GOOD && scsi_get_resid(srb) == 0) { /* * The command succeeded. We know this device doesn't * have the last-sector bug, so stop checking it. */ us->use_last_sector_hacks = 0; } else { /* * The command failed. Allow up to 3 retries in case this * is some normal sort of failure. After that, assume the * capacity is wrong and we're trying to access the sector * beyond the end. Replace the result code and sense data * with values that will cause the SCSI core to fail the * command immediately, instead of going into an infinite * (or even just a very long) retry loop. */ if (++us->last_sector_retries < 3) return; srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, record_not_found, sizeof(record_not_found)); } done: /* * Don't reset the retry counter for TEST UNIT READY commands, * because they get issued after device resets which might be * caused by a failed last-sector access. */ if (srb->cmnd[0] != TEST_UNIT_READY) us->last_sector_retries = 0; } /* * Invoke the transport and basic error-handling/recovery methods * * This is used by the protocol layers to actually send the message to * the device and receive the response. */ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) { int need_auto_sense; int result; /* send the command to the transport layer */ scsi_set_resid(srb, 0); result = us->transport(srb, us); /* * if the command gets aborted by the higher layers, we need to * short-circuit all other processing */ if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { usb_stor_dbg(us, "-- command was aborted\n"); srb->result = DID_ABORT << 16; goto Handle_Errors; } /* if there is a transport error, reset and don't auto-sense */ if (result == USB_STOR_TRANSPORT_ERROR) { usb_stor_dbg(us, "-- transport indicates error, resetting\n"); srb->result = DID_ERROR << 16; goto Handle_Errors; } /* if the transport provided its own sense data, don't auto-sense */ if (result == USB_STOR_TRANSPORT_NO_SENSE) { srb->result = SAM_STAT_CHECK_CONDITION; last_sector_hacks(us, srb); return; } srb->result = SAM_STAT_GOOD; /* * Determine if we need to auto-sense * * I normally don't use a flag like this, but it's almost impossible * to understand what's going on here if I don't. */ need_auto_sense = 0; /* * If we're running the CB transport, which is incapable * of determining status on its own, we will auto-sense * unless the operation involved a data-in transfer. Devices * can signal most data-in errors by stalling the bulk-in pipe. */ if ((us->protocol == USB_PR_CB || us->protocol == USB_PR_DPCM_USB) && srb->sc_data_direction != DMA_FROM_DEVICE) { usb_stor_dbg(us, "-- CB transport device requiring auto-sense\n"); need_auto_sense = 1; } /* Some devices (Kindle) require another command after SYNC CACHE */ if ((us->fflags & US_FL_SENSE_AFTER_SYNC) && srb->cmnd[0] == SYNCHRONIZE_CACHE) { usb_stor_dbg(us, "-- sense after SYNC CACHE\n"); need_auto_sense = 1; } /* * If we have a failure, we're going to do a REQUEST_SENSE * automatically. Note that we differentiate between a command * "failure" and an "error" in the transport mechanism. */ if (result == USB_STOR_TRANSPORT_FAILED) { usb_stor_dbg(us, "-- transport indicates command failure\n"); need_auto_sense = 1; } /* * Determine if this device is SAT by seeing if the * command executed successfully. Otherwise we'll have * to wait for at least one CHECK_CONDITION to determine * SANE_SENSE support */ if (unlikely((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) && result == USB_STOR_TRANSPORT_GOOD && !(us->fflags & US_FL_SANE_SENSE) && !(us->fflags & US_FL_BAD_SENSE) && !(srb->cmnd[2] & 0x20))) { usb_stor_dbg(us, "-- SAT supported, increasing auto-sense\n"); us->fflags |= US_FL_SANE_SENSE; } /* * A short transfer on a command where we don't expect it * is unusual, but it doesn't mean we need to auto-sense. */ if ((scsi_get_resid(srb) > 0) && !((srb->cmnd[0] == REQUEST_SENSE) || (srb->cmnd[0] == INQUIRY) || (srb->cmnd[0] == MODE_SENSE) || (srb->cmnd[0] == LOG_SENSE) || (srb->cmnd[0] == MODE_SENSE_10))) { usb_stor_dbg(us, "-- unexpectedly short transfer\n"); } /* Now, if we need to do the auto-sense, let's do it */ if (need_auto_sense) { int temp_result; struct scsi_eh_save ses; int sense_size = US_SENSE_SIZE; struct scsi_sense_hdr sshdr; const u8 *scdd; u8 fm_ili; /* device supports and needs bigger sense buffer */ if (us->fflags & US_FL_SANE_SENSE) sense_size = ~0; Retry_Sense: usb_stor_dbg(us, "Issuing auto-REQUEST_SENSE\n"); scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sense_size); /* FIXME: we must do the protocol translation here */ if (us->subclass == USB_SC_RBC || us->subclass == USB_SC_SCSI || us->subclass == USB_SC_CYP_ATACB) srb->cmd_len = 6; else srb->cmd_len = 12; /* issue the auto-sense command */ scsi_set_resid(srb, 0); temp_result = us->transport(us->srb, us); /* let's clean up right away */ scsi_eh_restore_cmnd(srb, &ses); if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { usb_stor_dbg(us, "-- auto-sense aborted\n"); srb->result = DID_ABORT << 16; /* If SANE_SENSE caused this problem, disable it */ if (sense_size != US_SENSE_SIZE) { us->fflags &= ~US_FL_SANE_SENSE; us->fflags |= US_FL_BAD_SENSE; } goto Handle_Errors; } /* * Some devices claim to support larger sense but fail when * trying to request it. When a transport failure happens * using US_FS_SANE_SENSE, we always retry with a standard * (small) sense request. This fixes some USB GSM modems */ if (temp_result == USB_STOR_TRANSPORT_FAILED && sense_size != US_SENSE_SIZE) { usb_stor_dbg(us, "-- auto-sense failure, retry small sense\n"); sense_size = US_SENSE_SIZE; us->fflags &= ~US_FL_SANE_SENSE; us->fflags |= US_FL_BAD_SENSE; goto Retry_Sense; } /* Other failures */ if (temp_result != USB_STOR_TRANSPORT_GOOD) { usb_stor_dbg(us, "-- auto-sense failure\n"); /* * we skip the reset if this happens to be a * multi-target device, since failure of an * auto-sense is perfectly valid */ srb->result = DID_ERROR << 16; if (!(us->fflags & US_FL_SCM_MULT_TARG)) goto Handle_Errors; return; } /* * If the sense data returned is larger than 18-bytes then we * assume this device supports requesting more in the future. * The response code must be 70h through 73h inclusive. */ if (srb->sense_buffer[7] > (US_SENSE_SIZE - 8) && !(us->fflags & US_FL_SANE_SENSE) && !(us->fflags & US_FL_BAD_SENSE) && (srb->sense_buffer[0] & 0x7C) == 0x70) { usb_stor_dbg(us, "-- SANE_SENSE support enabled\n"); us->fflags |= US_FL_SANE_SENSE; /* * Indicate to the user that we truncated their sense * because we didn't know it supported larger sense. */ usb_stor_dbg(us, "-- Sense data truncated to %i from %i\n", US_SENSE_SIZE, srb->sense_buffer[7] + 8); srb->sense_buffer[7] = (US_SENSE_SIZE - 8); } scsi_normalize_sense(srb->sense_buffer, SCSI_SENSE_BUFFERSIZE, &sshdr); usb_stor_dbg(us, "-- Result from auto-sense is %d\n", temp_result); usb_stor_dbg(us, "-- code: 0x%x, key: 0x%x, ASC: 0x%x, ASCQ: 0x%x\n", sshdr.response_code, sshdr.sense_key, sshdr.asc, sshdr.ascq); #ifdef CONFIG_USB_STORAGE_DEBUG usb_stor_show_sense(us, sshdr.sense_key, sshdr.asc, sshdr.ascq); #endif /* set the result so the higher layers expect this data */ srb->result = SAM_STAT_CHECK_CONDITION; scdd = scsi_sense_desc_find(srb->sense_buffer, SCSI_SENSE_BUFFERSIZE, 4); fm_ili = (scdd ? scdd[3] : srb->sense_buffer[2]) & 0xA0; /* * We often get empty sense data. This could indicate that * everything worked or that there was an unspecified * problem. We have to decide which. */ if (sshdr.sense_key == 0 && sshdr.asc == 0 && sshdr.ascq == 0 && fm_ili == 0) { /* * If things are really okay, then let's show that. * Zero out the sense buffer so the higher layers * won't realize we did an unsolicited auto-sense. */ if (result == USB_STOR_TRANSPORT_GOOD) { srb->result = SAM_STAT_GOOD; srb->sense_buffer[0] = 0x0; } /* * ATA-passthru commands use sense data to report * the command completion status, and often devices * return Check Condition status when nothing is * wrong. */ else if (srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) { /* leave the data alone */ } /* * If there was a problem, report an unspecified * hardware error to prevent the higher layers from * entering an infinite retry loop. */ else { srb->result = DID_ERROR << 16; if ((sshdr.response_code & 0x72) == 0x72) srb->sense_buffer[1] = HARDWARE_ERROR; else srb->sense_buffer[2] = HARDWARE_ERROR; } } } /* * Some devices don't work or return incorrect data the first * time they get a READ(10) command, or for the first READ(10) * after a media change. If the INITIAL_READ10 flag is set, * keep track of whether READ(10) commands succeed. If the * previous one succeeded and this one failed, set the REDO_READ10 * flag to force a retry. */ if (unlikely((us->fflags & US_FL_INITIAL_READ10) && srb->cmnd[0] == READ_10)) { if (srb->result == SAM_STAT_GOOD) { set_bit(US_FLIDX_READ10_WORKED, &us->dflags); } else if (test_bit(US_FLIDX_READ10_WORKED, &us->dflags)) { clear_bit(US_FLIDX_READ10_WORKED, &us->dflags); set_bit(US_FLIDX_REDO_READ10, &us->dflags); } /* * Next, if the REDO_READ10 flag is set, return a result * code that will cause the SCSI core to retry the READ(10) * command immediately. */ if (test_bit(US_FLIDX_REDO_READ10, &us->dflags)) { clear_bit(US_FLIDX_REDO_READ10, &us->dflags); srb->result = DID_IMM_RETRY << 16; srb->sense_buffer[0] = 0; } } /* Did we transfer less than the minimum amount required? */ if ((srb->result == SAM_STAT_GOOD || srb->sense_buffer[2] == 0) && scsi_bufflen(srb) - scsi_get_resid(srb) < srb->underflow) srb->result = DID_ERROR << 16; last_sector_hacks(us, srb); return; /* * Error and abort processing: try to resynchronize with the device * by issuing a port reset. If that fails, try a class-specific * device reset. */ Handle_Errors: /* * Set the RESETTING bit, and clear the ABORTING bit so that * the reset may proceed. */ scsi_lock(us_to_host(us)); set_bit(US_FLIDX_RESETTING, &us->dflags); clear_bit(US_FLIDX_ABORTING, &us->dflags); scsi_unlock(us_to_host(us)); /* * We must release the device lock because the pre_reset routine * will want to acquire it. */ mutex_unlock(&us->dev_mutex); result = usb_stor_port_reset(us); mutex_lock(&us->dev_mutex); if (result < 0) { scsi_lock(us_to_host(us)); usb_stor_report_device_reset(us); scsi_unlock(us_to_host(us)); us->transport_reset(us); } clear_bit(US_FLIDX_RESETTING, &us->dflags); last_sector_hacks(us, srb); } /* Stop the current URB transfer */ void usb_stor_stop_transport(struct us_data *us) { /* * If the state machine is blocked waiting for an URB, * let's wake it up. The test_and_clear_bit() call * guarantees that if a URB has just been submitted, * it won't be cancelled more than once. */ if (test_and_clear_bit(US_FLIDX_URB_ACTIVE, &us->dflags)) { usb_stor_dbg(us, "-- cancelling URB\n"); usb_unlink_urb(us->current_urb); } /* If we are waiting for a scatter-gather operation, cancel it. */ if (test_and_clear_bit(US_FLIDX_SG_ACTIVE, &us->dflags)) { usb_stor_dbg(us, "-- cancelling sg request\n"); usb_sg_cancel(&us->current_sg); } } /* * Control/Bulk and Control/Bulk/Interrupt transport */ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us) { unsigned int transfer_length = scsi_bufflen(srb); unsigned int pipe = 0; int result; /* COMMAND STAGE */ /* let's send the command via the control pipe */ /* * Command is sometime (f.e. after scsi_eh_prep_cmnd) on the stack. * Stack may be vmallocated. So no DMA for us. Make a copy. */ memcpy(us->iobuf, srb->cmnd, srb->cmd_len); result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, US_CBI_ADSC, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, srb->cmd_len); /* check the return code for the command */ usb_stor_dbg(us, "Call to usb_stor_ctrl_transfer() returned %d\n", result); /* if we stalled the command, it means command failed */ if (result == USB_STOR_XFER_STALLED) { return USB_STOR_TRANSPORT_FAILED; } /* Uh oh... serious problem here */ if (result != USB_STOR_XFER_GOOD) { return USB_STOR_TRANSPORT_ERROR; } /* DATA STAGE */ /* transfer the data payload for this command, if one exists*/ if (transfer_length) { pipe = srb->sc_data_direction == DMA_FROM_DEVICE ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_srb(us, pipe, srb); usb_stor_dbg(us, "CBI data stage result is 0x%x\n", result); /* if we stalled the data transfer it means command failed */ if (result == USB_STOR_XFER_STALLED) return USB_STOR_TRANSPORT_FAILED; if (result > USB_STOR_XFER_STALLED) return USB_STOR_TRANSPORT_ERROR; } /* STATUS STAGE */ /* * NOTE: CB does not have a status stage. Silly, I know. So * we have to catch this at a higher level. */ if (us->protocol != USB_PR_CBI) return USB_STOR_TRANSPORT_GOOD; result = usb_stor_intr_transfer(us, us->iobuf, 2); usb_stor_dbg(us, "Got interrupt data (0x%x, 0x%x)\n", us->iobuf[0], us->iobuf[1]); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* * UFI gives us ASC and ASCQ, like a request sense * * REQUEST_SENSE and INQUIRY don't affect the sense data on UFI * devices, so we ignore the information for those commands. Note * that this means we could be ignoring a real error on these * commands, but that can't be helped. */ if (us->subclass == USB_SC_UFI) { if (srb->cmnd[0] == REQUEST_SENSE || srb->cmnd[0] == INQUIRY) return USB_STOR_TRANSPORT_GOOD; if (us->iobuf[0]) goto Failed; return USB_STOR_TRANSPORT_GOOD; } /* * If not UFI, we interpret the data as a result code * The first byte should always be a 0x0. * * Some bogus devices don't follow that rule. They stuff the ASC * into the first byte -- so if it's non-zero, call it a failure. */ if (us->iobuf[0]) { usb_stor_dbg(us, "CBI IRQ data showed reserved bType 0x%x\n", us->iobuf[0]); goto Failed; } /* The second byte & 0x0F should be 0x0 for good, otherwise error */ switch (us->iobuf[1] & 0x0F) { case 0x00: return USB_STOR_TRANSPORT_GOOD; case 0x01: goto Failed; } return USB_STOR_TRANSPORT_ERROR; /* * the CBI spec requires that the bulk pipe must be cleared * following any data-in/out command failure (section 2.4.3.1.3) */ Failed: if (pipe) usb_stor_clear_halt(us, pipe); return USB_STOR_TRANSPORT_FAILED; } EXPORT_SYMBOL_GPL(usb_stor_CB_transport); /* * Bulk only transport */ /* Determine what the maximum LUN supported is */ int usb_stor_Bulk_max_lun(struct us_data *us) { int result; /* issue the command */ us->iobuf[0] = 0; result = usb_stor_control_msg(us, us->recv_ctrl_pipe, US_BULK_GET_MAX_LUN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, 1, 10*HZ); usb_stor_dbg(us, "GetMaxLUN command result is %d, data is %d\n", result, us->iobuf[0]); /* If we have a successful request, return the result if valid. */ if (result > 0) { if (us->iobuf[0] <= US_BULK_MAX_LUN_LIMIT) { return us->iobuf[0]; } else { dev_info(&us->pusb_intf->dev, "Max LUN %d is not valid, using 0 instead", us->iobuf[0]); } } /* * Some devices don't like GetMaxLUN. They may STALL the control * pipe, they may return a zero-length result, they may do nothing at * all and timeout, or they may fail in even more bizarrely creative * ways. In these cases the best approach is to use the default * value: only one LUN. */ return 0; } int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf; unsigned int transfer_length = scsi_bufflen(srb); unsigned int residue; int result; int fake_sense = 0; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* Take care of BULK32 devices; set extra byte to 0 */ if (unlikely(us->fflags & US_FL_BULK32)) { cbwlen = 32; us->iobuf[31] = 0; } /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(transfer_length); bcb->Flags = srb->sc_data_direction == DMA_FROM_DEVICE ? US_BULK_FLAG_IN : US_BULK_FLAG_OUT; bcb->Tag = ++us->tag; bcb->Lun = srb->device->lun; if (us->fflags & US_FL_SCM_MULT_TARG) bcb->Lun |= srb->device->id << 4; bcb->Length = srb->cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, srb->cmnd, bcb->Length); /* send it to out endpoint */ usb_stor_dbg(us, "Bulk Command S 0x%x T 0x%x L %d F %d Trg %d LUN %d CL %d\n", le32_to_cpu(bcb->Signature), bcb->Tag, le32_to_cpu(bcb->DataTransferLength), bcb->Flags, (bcb->Lun >> 4), (bcb->Lun & 0x0F), bcb->Length); result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); usb_stor_dbg(us, "Bulk command transfer result=%d\n", result); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ /* * Some USB-IDE converter chips need a 100us delay between the * command phase and the data phase. Some devices need a little * more than that, probably because of clock rate inaccuracies. */ if (unlikely(us->fflags & US_FL_GO_SLOW)) usleep_range(125, 150); if (transfer_length) { unsigned int pipe = srb->sc_data_direction == DMA_FROM_DEVICE ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_srb(us, pipe, srb); usb_stor_dbg(us, "Bulk data transfer result 0x%x\n", result); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; /* * If the device tried to send back more data than the * amount requested, the spec requires us to transfer * the CSW anyway. Since there's no point retrying * the command, we'll return fake sense data indicating * Illegal Request, Invalid Field in CDB. */ if (result == USB_STOR_XFER_LONG) fake_sense = 1; /* * Sometimes a device will mistakenly skip the data phase * and go directly to the status phase without sending a * zero-length packet. If we get a 13-byte response here, * check whether it really is a CSW. */ if (result == USB_STOR_XFER_SHORT && srb->sc_data_direction == DMA_FROM_DEVICE && transfer_length - scsi_get_resid(srb) == US_BULK_CS_WRAP_LEN) { struct scatterlist *sg = NULL; unsigned int offset = 0; if (usb_stor_access_xfer_buf((unsigned char *) bcs, US_BULK_CS_WRAP_LEN, srb, &sg, &offset, FROM_XFER_BUF) == US_BULK_CS_WRAP_LEN && bcs->Signature == cpu_to_le32(US_BULK_CS_SIGN)) { usb_stor_dbg(us, "Device skipped data phase\n"); scsi_set_resid(srb, transfer_length); goto skipped_data_phase; } } } /* * See flow chart on pg 15 of the Bulk Only Transport spec for * an explanation of how this code works. */ /* get CSW for device status */ usb_stor_dbg(us, "Attempting to get CSW...\n"); result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); /* * Some broken devices add unnecessary zero-length packets to the * end of their data transfers. Such packets show up as 0-length * CSWs. If we encounter such a thing, try to read the CSW again. */ if (result == USB_STOR_XFER_SHORT && cswlen == 0) { usb_stor_dbg(us, "Received 0-length CSW; retrying...\n"); result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); } /* did the attempt to read the CSW fail? */ if (result == USB_STOR_XFER_STALLED) { /* get the status again */ usb_stor_dbg(us, "Attempting to get CSW (2nd try)...\n"); result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, NULL); } /* if we still have a failure at this point, we're in trouble */ usb_stor_dbg(us, "Bulk status result = %d\n", result); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; skipped_data_phase: /* check bulk status */ residue = le32_to_cpu(bcs->Residue); usb_stor_dbg(us, "Bulk Status S 0x%x T 0x%x R %u Stat 0x%x\n", le32_to_cpu(bcs->Signature), bcs->Tag, residue, bcs->Status); if (!(bcs->Tag == us->tag || (us->fflags & US_FL_BULK_IGNORE_TAG)) || bcs->Status > US_BULK_STAT_PHASE) { usb_stor_dbg(us, "Bulk logical error\n"); return USB_STOR_TRANSPORT_ERROR; } /* * Some broken devices report odd signatures, so we do not check them * for validity against the spec. We store the first one we see, * and check subsequent transfers for validity against this signature. */ if (!us->bcs_signature) { us->bcs_signature = bcs->Signature; if (us->bcs_signature != cpu_to_le32(US_BULK_CS_SIGN)) usb_stor_dbg(us, "Learnt BCS signature 0x%08X\n", le32_to_cpu(us->bcs_signature)); } else if (bcs->Signature != us->bcs_signature) { usb_stor_dbg(us, "Signature mismatch: got %08X, expecting %08X\n", le32_to_cpu(bcs->Signature), le32_to_cpu(us->bcs_signature)); return USB_STOR_TRANSPORT_ERROR; } /* * try to compute the actual residue, based on how much data * was really transferred and what the device tells us */ if (residue && !(us->fflags & US_FL_IGNORE_RESIDUE)) { /* * Heuristically detect devices that generate bogus residues * by seeing what happens with INQUIRY and READ CAPACITY * commands. */ if (bcs->Status == US_BULK_STAT_OK && scsi_get_resid(srb) == 0 && ((srb->cmnd[0] == INQUIRY && transfer_length == 36) || (srb->cmnd[0] == READ_CAPACITY && transfer_length == 8))) { us->fflags |= US_FL_IGNORE_RESIDUE; } else { residue = min(residue, transfer_length); scsi_set_resid(srb, max(scsi_get_resid(srb), residue)); } } /* based on the status code, we report good or bad */ switch (bcs->Status) { case US_BULK_STAT_OK: /* device babbled -- return fake sense data */ if (fake_sense) { memcpy(srb->sense_buffer, usb_stor_sense_invalidCDB, sizeof(usb_stor_sense_invalidCDB)); return USB_STOR_TRANSPORT_NO_SENSE; } /* command good -- note that data could be short */ return USB_STOR_TRANSPORT_GOOD; case US_BULK_STAT_FAIL: /* command failed */ return USB_STOR_TRANSPORT_FAILED; case US_BULK_STAT_PHASE: /* * phase error -- note that a transport reset will be * invoked by the invoke_transport() function */ return USB_STOR_TRANSPORT_ERROR; } /* we should never get here, but if we do, we're in trouble */ return USB_STOR_TRANSPORT_ERROR; } EXPORT_SYMBOL_GPL(usb_stor_Bulk_transport); /*********************************************************************** * Reset routines ***********************************************************************/ /* * This is the common part of the device reset code. * * It's handy that every transport mechanism uses the control endpoint for * resets. * * Basically, we send a reset with a 5-second timeout, so we don't get * jammed attempting to do the reset. */ static int usb_stor_reset_common(struct us_data *us, u8 request, u8 requesttype, u16 value, u16 index, void *data, u16 size) { int result; int result2; if (test_bit(US_FLIDX_DISCONNECTING, &us->dflags)) { usb_stor_dbg(us, "No reset during disconnect\n"); return -EIO; } result = usb_stor_control_msg(us, us->send_ctrl_pipe, request, requesttype, value, index, data, size, 5*HZ); if (result < 0) { usb_stor_dbg(us, "Soft reset failed: %d\n", result); return result; } /* * Give the device some time to recover from the reset, * but don't delay disconnect processing. */ wait_event_interruptible_timeout(us->delay_wait, test_bit(US_FLIDX_DISCONNECTING, &us->dflags), HZ*6); if (test_bit(US_FLIDX_DISCONNECTING, &us->dflags)) { usb_stor_dbg(us, "Reset interrupted by disconnect\n"); return -EIO; } usb_stor_dbg(us, "Soft reset: clearing bulk-in endpoint halt\n"); result = usb_stor_clear_halt(us, us->recv_bulk_pipe); usb_stor_dbg(us, "Soft reset: clearing bulk-out endpoint halt\n"); result2 = usb_stor_clear_halt(us, us->send_bulk_pipe); /* return a result code based on the result of the clear-halts */ if (result >= 0) result = result2; if (result < 0) usb_stor_dbg(us, "Soft reset failed\n"); else usb_stor_dbg(us, "Soft reset done\n"); return result; } /* This issues a CB[I] Reset to the device in question */ #define CB_RESET_CMD_SIZE 12 int usb_stor_CB_reset(struct us_data *us) { memset(us->iobuf, 0xFF, CB_RESET_CMD_SIZE); us->iobuf[0] = SEND_DIAGNOSTIC; us->iobuf[1] = 4; return usb_stor_reset_common(us, US_CBI_ADSC, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, CB_RESET_CMD_SIZE); } EXPORT_SYMBOL_GPL(usb_stor_CB_reset); /* * This issues a Bulk-only Reset to the device in question, including * clearing the subsequent endpoint halts that may occur. */ int usb_stor_Bulk_reset(struct us_data *us) { return usb_stor_reset_common(us, US_BULK_RESET_REQUEST, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, NULL, 0); } EXPORT_SYMBOL_GPL(usb_stor_Bulk_reset); /* * Issue a USB port reset to the device. The caller must not hold * us->dev_mutex. */ int usb_stor_port_reset(struct us_data *us) { int result; /*for these devices we must use the class specific method */ if (us->pusb_dev->quirks & USB_QUIRK_RESET) return -EPERM; result = usb_lock_device_for_reset(us->pusb_dev, us->pusb_intf); if (result < 0) usb_stor_dbg(us, "unable to lock device for reset: %d\n", result); else { /* Were we disconnected while waiting for the lock? */ if (test_bit(US_FLIDX_DISCONNECTING, &us->dflags)) { result = -EIO; usb_stor_dbg(us, "No reset during disconnect\n"); } else { result = usb_reset_device(us->pusb_dev); usb_stor_dbg(us, "usb_reset_device returns %d\n", result); } usb_unlock_device(us->pusb_dev); } return result; }
18 4552 4542 1 4 4528 4550 4 4 14 15 4555 20 1 4 4552 3 1 15 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0-only #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/sched/debug.h> #include <linux/bitfield.h> #include <xen/xen.h> #include <asm/fpu/api.h> #include <asm/fred.h> #include <asm/sev.h> #include <asm/traps.h> #include <asm/kdebug.h> #include <asm/insn-eval.h> #include <asm/sgx.h> static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr) { int reg_offset = pt_regs_offset(regs, nr); static unsigned long __dummy; if (WARN_ON_ONCE(reg_offset < 0)) return &__dummy; return (unsigned long *)((unsigned long)regs + reg_offset); } static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } static bool ex_handler_default(const struct exception_table_entry *e, struct pt_regs *regs) { if (e->data & EX_FLAG_CLEAR_AX) regs->ax = 0; if (e->data & EX_FLAG_CLEAR_DX) regs->dx = 0; regs->ip = ex_fixup_addr(e); return true; } /* * This is the *very* rare case where we do a "load_unaligned_zeropad()" * and it's a page crosser into a non-existent page. * * This happens when we optimistically load a pathname a word-at-a-time * and the name is less than the full word and the next page is not * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC. * * NOTE! The faulting address is always a 'mov mem,reg' type instruction * of size 'long', and the exception fixup must always point to right * after the instruction. */ static bool ex_handler_zeropad(const struct exception_table_entry *e, struct pt_regs *regs, unsigned long fault_addr) { struct insn insn; const unsigned long mask = sizeof(long) - 1; unsigned long offset, addr, next_ip, len; unsigned long *reg; next_ip = ex_fixup_addr(e); len = next_ip - regs->ip; if (len > MAX_INSN_SIZE) return false; if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN)) return false; if (insn.length != len) return false; if (insn.opcode.bytes[0] != 0x8b) return false; if (insn.opnd_bytes != sizeof(long)) return false; addr = (unsigned long) insn_get_addr_ref(&insn, regs); if (addr == ~0ul) return false; offset = addr & mask; addr = addr & ~mask; if (fault_addr != addr + sizeof(long)) return false; reg = insn_get_modrm_reg_ptr(&insn, regs); if (!reg) return false; *reg = *(unsigned long *)addr >> (offset * 8); return ex_handler_default(e, regs); } static bool ex_handler_fault(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { regs->ax = trapnr; return ex_handler_default(fixup, regs); } static bool ex_handler_sgx(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG; return ex_handler_default(fixup, regs); } /* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) * should always be valid. However, past bugs have allowed userspace to set * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn(). * These caused XRSTOR to fail when switching to the task, leaking the FPU * registers of the task previously executing on the CPU. Mitigate this class * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ static bool ex_handler_fprestore(const struct exception_table_entry *fixup, struct pt_regs *regs) { regs->ip = ex_fixup_addr(fixup); WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", (void *)instruction_pointer(regs)); fpu_reset_from_exception_fixup(); return true; } /* * On x86-64, we end up being imprecise with 'access_ok()', and allow * non-canonical user addresses to make the range comparisons simpler, * and to not have to worry about LAM being enabled. * * In fact, we allow up to one page of "slop" at the sign boundary, * which means that we can do access_ok() by just checking the sign * of the pointer for the common case of having a small access size. */ static bool gp_fault_address_ok(unsigned long fault_address) { #ifdef CONFIG_X86_64 /* Is it in the "user space" part of the non-canonical space? */ if (valid_user_address(fault_address)) return true; /* .. or just above it? */ fault_address -= PAGE_SIZE; if (valid_user_address(fault_address)) return true; #endif return false; } static bool ex_handler_uaccess(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long fault_address) { WARN_ONCE(trapnr == X86_TRAP_GP && !gp_fault_address_ok(fault_address), "General protection fault in user access. Non-canonical address?"); return ex_handler_default(fixup, regs); } static bool ex_handler_msr(const struct exception_table_entry *fixup, struct pt_regs *regs, bool wrmsr, bool safe, int reg) { if (__ONCE_LITE_IF(!safe && wrmsr)) { pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip); show_stack_regs(regs); } if (__ONCE_LITE_IF(!safe && !wrmsr)) { pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip); show_stack_regs(regs); } if (!wrmsr) { /* Pretend that the read succeeded and returned 0. */ regs->ax = 0; regs->dx = 0; } if (safe) *pt_regs_nr(regs, reg) = -EIO; return ex_handler_default(fixup, regs); } static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, struct pt_regs *regs) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); asm volatile ("mov %0, %%fs" : : "rm" (0)); return ex_handler_default(fixup, regs); } static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, struct pt_regs *regs, int reg, int imm) { *pt_regs_nr(regs, reg) = (long)imm; return ex_handler_default(fixup, regs); } static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long fault_address, int reg, int imm) { regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg); return ex_handler_uaccess(fixup, regs, trapnr, fault_address); } #ifdef CONFIG_X86_FRED static bool ex_handler_eretu(const struct exception_table_entry *fixup, struct pt_regs *regs, unsigned long error_code) { struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, orig_ax)); unsigned short ss = uregs->ss; unsigned short cs = uregs->cs; /* * Move the NMI bit from the invalid stack frame, which caused ERETU * to fault, to the fault handler's stack frame, thus to unblock NMI * with the fault handler's ERETS instruction ASAP if NMI is blocked. */ regs->fred_ss.nmi = uregs->fred_ss.nmi; /* * Sync event information to uregs, i.e., the ERETU return frame, but * is it safe to write to the ERETU return frame which is just above * current event stack frame? * * The RSP used by FRED to push a stack frame is not the value in %rsp, * it is calculated from %rsp with the following 2 steps: * 1) RSP = %rsp - (IA32_FRED_CONFIG & 0x1c0) // Reserve N*64 bytes * 2) RSP = RSP & ~0x3f // Align to a 64-byte cache line * when an event delivery doesn't trigger a stack level change. * * Here is an example with N*64 (N=1) bytes reserved: * * 64-byte cache line ==> ______________ * |___Reserved___| * |__Event_data__| * |_____SS_______| * |_____RSP______| * |_____FLAGS____| * |_____CS_______| * |_____IP_______| * 64-byte cache line ==> |__Error_code__| <== ERETU return frame * |______________| * |______________| * |______________| * |______________| * |______________| * |______________| * |______________| * 64-byte cache line ==> |______________| <== RSP after step 1) and 2) * |___Reserved___| * |__Event_data__| * |_____SS_______| * |_____RSP______| * |_____FLAGS____| * |_____CS_______| * |_____IP_______| * 64-byte cache line ==> |__Error_code__| <== ERETS return frame * * Thus a new FRED stack frame will always be pushed below a previous * FRED stack frame ((N*64) bytes may be reserved between), and it is * safe to write to a previous FRED stack frame as they never overlap. */ fred_info(uregs)->edata = fred_event_data(regs); uregs->ssx = regs->ssx; uregs->fred_ss.ss = ss; /* The NMI bit was moved away above */ uregs->fred_ss.nmi = 0; uregs->csx = regs->csx; uregs->fred_cs.sl = 0; uregs->fred_cs.wfe = 0; uregs->cs = cs; uregs->orig_ax = error_code; return ex_handler_default(fixup, regs); } #endif int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; int type, reg, imm; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; pnp_bios_is_utter_crap = 1; printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n"); __asm__ volatile( "movl %0, %%esp\n\t" "jmp *%1\n\t" : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); panic("do_trap: can't hit this"); } #endif e = search_exception_tables(regs->ip); if (!e) return 0; type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); reg = FIELD_GET(EX_DATA_REG_MASK, e->data); imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); switch (type) { case EX_TYPE_DEFAULT: case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); case EX_TYPE_FAULT: case EX_TYPE_FAULT_MCE_SAFE: return ex_handler_fault(e, regs, trapnr); case EX_TYPE_UACCESS: return ex_handler_uaccess(e, regs, trapnr, fault_addr); case EX_TYPE_CLEAR_FS: return ex_handler_clear_fs(e, regs); case EX_TYPE_FPU_RESTORE: return ex_handler_fprestore(e, regs); case EX_TYPE_BPF: return ex_handler_bpf(e, regs); case EX_TYPE_WRMSR: return ex_handler_msr(e, regs, true, false, reg); case EX_TYPE_RDMSR: return ex_handler_msr(e, regs, false, false, reg); case EX_TYPE_WRMSR_SAFE: return ex_handler_msr(e, regs, true, true, reg); case EX_TYPE_RDMSR_SAFE: return ex_handler_msr(e, regs, false, true, reg); case EX_TYPE_WRMSR_IN_MCE: ex_handler_msr_mce(regs, true); break; case EX_TYPE_RDMSR_IN_MCE: ex_handler_msr_mce(regs, false); break; case EX_TYPE_POP_REG: regs->sp += sizeof(long); fallthrough; case EX_TYPE_IMM_REG: return ex_handler_imm_reg(e, regs, reg, imm); case EX_TYPE_FAULT_SGX: return ex_handler_sgx(e, regs, trapnr); case EX_TYPE_UCOPY_LEN: return ex_handler_ucopy_len(e, regs, trapnr, fault_addr, reg, imm); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(e, regs, fault_addr); #ifdef CONFIG_X86_FRED case EX_TYPE_ERETU: return ex_handler_eretu(e, regs, error_code); #endif } BUG(); } extern unsigned int early_recursion_flag; /* Restricted version used during very early boot */ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) return; if (early_recursion_flag > 2) goto halt_loop; /* * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. * Xen pv domains are not using the default __KERNEL_CS. */ if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* * The full exception fixup machinery is available as soon as * the early IDT is loaded. This means that it is the * responsibility of extable users to either function correctly * when handlers are invoked early or to simply avoid causing * exceptions before they're ready to handle them. * * This is better than filtering which handlers can be used, * because refusing to call a handler here is guaranteed to * result in a hard-to-debug panic. * * Keep in mind that not all vectors actually get here. Early * page faults, for example, are special. */ if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; if (trapnr == X86_TRAP_UD) { if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { /* Skip the ud2. */ regs->ip += LEN_UD2; return; } /* * If this was a BUG and report_bug returns or if this * was just a normal #UD, we want to continue onward and * crash. */ } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, regs->orig_ax, read_cr2()); show_regs(regs); halt_loop: while (true) halt(); }
679 689 483 10 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIMENS_H #define _LINUX_TIMENS_H #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> #include <linux/time64.h> struct user_namespace; extern struct user_namespace init_user_ns; struct vm_area_struct; struct timens_offsets { struct timespec64 monotonic; struct timespec64 boottime; }; struct time_namespace { struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; struct timens_offsets offsets; struct page *vvar_page; /* If set prevents changing offsets after any task joined namespace. */ bool frozen_offsets; } __randomize_layout; extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS extern int vdso_join_timens(struct task_struct *task, struct time_namespace *ns); extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { refcount_inc(&ns->ns.count); return ns; } struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { if (refcount_dec_and_test(&ns->ns.count)) free_time_ns(ns); } void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m); struct proc_timens_offset { int clockid; struct timespec64 val; }; int proc_timens_set_offset(struct file *file, struct task_struct *p, struct proc_timens_offset *offsets, int n); static inline void timens_add_monotonic(struct timespec64 *ts) { struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets; *ts = timespec64_add(*ts, ns_offsets->monotonic); } static inline void timens_add_boottime(struct timespec64 *ts) { struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets; *ts = timespec64_add(*ts, ns_offsets->boottime); } static inline u64 timens_add_boottime_ns(u64 nsec) { struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets; return nsec + timespec64_to_ns(&ns_offsets->boottime); } static inline void timens_sub_boottime(struct timespec64 *ts) { struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets; *ts = timespec64_sub(*ts, ns_offsets->boottime); } ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *offsets); static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) { struct time_namespace *ns = current->nsproxy->time_ns; if (likely(ns == &init_time_ns)) return tim; return do_timens_ktime_to_host(clockid, tim, &ns->offsets); } #else static inline int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { return 0; } static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns) { } static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { return NULL; } static inline void put_time_ns(struct time_namespace *ns) { } static inline struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns) { if (flags & CLONE_NEWTIME) return ERR_PTR(-EINVAL); return old_ns; } static inline void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) { return; } static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) { return NULL; } static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } static inline u64 timens_add_boottime_ns(u64 nsec) { return nsec; } static inline void timens_sub_boottime(struct timespec64 *ts) { } static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) { return tim; } #endif #endif /* _LINUX_TIMENS_H */
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mdio #if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MDIO_H #include <linux/tracepoint.h> TRACE_EVENT_CONDITION(mdio_access, TP_PROTO(struct mii_bus *bus, char read, u8 addr, unsigned regnum, u16 val, int err), TP_ARGS(bus, read, addr, regnum, val, err), TP_CONDITION(err >= 0), TP_STRUCT__entry( __array(char, busid, MII_BUS_ID_SIZE) __field(char, read) __field(u8, addr) __field(u16, val) __field(unsigned, regnum) ), TP_fast_assign( strscpy(__entry->busid, bus->id, MII_BUS_ID_SIZE); __entry->read = read; __entry->addr = addr; __entry->regnum = regnum; __entry->val = val; ), TP_printk("%s %-5s phy:0x%02hhx reg:0x%02x val:0x%04hx", __entry->busid, __entry->read ? "read" : "write", __entry->addr, __entry->regnum, __entry->val) ); #endif /* if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ #include <trace/define_trace.h>
3176 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TRACE_EVENT_H #define _LINUX_TRACE_EVENT_H #include <linux/ring_buffer.h> #include <linux/trace_seq.h> #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/perf_event.h> #include <linux/tracepoint.h> struct trace_array; struct array_buffer; struct tracer; struct dentry; struct bpf_prog; union bpf_attr; /* Used for event string fields when they are NULL */ #define EVENT_NULL_STR "(null)" const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, unsigned long long flags, const struct trace_print_flags_u64 *flag_array); const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array); #endif const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, unsigned int bitmask_size); const char *trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len, bool concatenate); const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size); const char * trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); struct trace_iterator; struct trace_event; int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); extern __printf(2, 3) void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 len; u16 offset; #else u16 offset; u16 len; #endif } __packed; /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: * * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { unsigned short type; unsigned char flags; unsigned char preempt_count; int pid; }; #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: */ struct trace_iterator { struct trace_array *tr; struct tracer *trace; struct array_buffer *array_buffer; void *private; int cpu_file; struct mutex mutex; struct ring_buffer_iter **buffer_iter; unsigned long iter_flags; void *temp; /* temp holder */ unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; atomic_t wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; cpumask_var_t started; /* Set when the file is closed to prevent new waiters */ bool closed; /* it's true when current open file is snapshot */ bool snapshot; /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; unsigned long lost_events; int leftover; int ent_size; int cpu; u64 ts; loff_t pos; long idx; /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, TRACE_FILE_TIME_IN_NS = 4, }; typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags, struct trace_event *event); struct trace_event_functions { trace_print_func trace; trace_print_func raw; trace_print_func hex; trace_print_func binary; }; struct trace_event { struct hlist_node node; int type; struct trace_event_functions *funcs; }; extern int register_trace_event(struct trace_event *event); extern int unregister_trace_event(struct trace_event *event); /* Return values for print_line callback */ enum print_line_t { TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ TRACE_TYPE_HANDLED = 1, TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; enum print_line_t trace_handle_return(struct trace_seq *s); static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, unsigned int trace_ctx) { entry->preempt_count = trace_ctx & 0xff; entry->pid = current->pid; entry->type = type; entry->flags = trace_ctx >> 16; } unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED_LAZY = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, TRACE_FLAG_BH_OFF = 0x80, }; static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) { unsigned int irq_status = irqs_disabled_flags(irqflags) ? TRACE_FLAG_IRQS_OFF : 0; return tracing_gen_ctx_irq_test(irq_status); } static inline unsigned int tracing_gen_ctx(void) { unsigned long irqflags; local_save_flags(irqflags); return tracing_gen_ctx_flags(irqflags); } static inline unsigned int tracing_gen_ctx_dec(void) { unsigned int trace_ctx; trace_ctx = tracing_gen_ctx(); /* * Subtract one from the preemption counter if preemption is enabled, * see trace_event_buffer_reserve()for details. */ if (IS_ENABLED(CONFIG_PREEMPTION)) trace_ctx--; return trace_ctx; } struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, unsigned int trace_ctx); #define TRACE_RECORD_CMDLINE BIT(0) #define TRACE_RECORD_TGID BIT(1) void tracing_record_taskinfo(struct task_struct *task, int flags); void tracing_record_taskinfo_sched_switch(struct task_struct *prev, struct task_struct *next, int flags); void tracing_record_cmdline(struct task_struct *task); void tracing_record_tgid(struct task_struct *task); int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) __printf(3, 4); struct event_filter; enum trace_reg { TRACE_REG_REGISTER, TRACE_REG_UNREGISTER, #ifdef CONFIG_PERF_EVENTS TRACE_REG_PERF_REGISTER, TRACE_REG_PERF_UNREGISTER, TRACE_REG_PERF_OPEN, TRACE_REG_PERF_CLOSE, /* * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a * custom action was taken and the default action is not to be * performed. */ TRACE_REG_PERF_ADD, TRACE_REG_PERF_DEL, #endif }; struct trace_event_call; #define TRACE_FUNCTION_TYPE ((const char *)~0UL) struct trace_event_fields { const char *type; union { struct { const char *name; const int size; const int align; const unsigned int is_signed:1; unsigned int needs_test:1; const int filter_type; const int len; }; int (*define_fields)(struct trace_event_call *); }; }; struct trace_event_class { const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; #endif int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); struct trace_event_fields *fields_array; struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; int (*raw_init)(struct trace_event_call *); }; extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); struct trace_event_buffer { struct trace_buffer *buffer; struct ring_buffer_event *event; struct trace_event_file *trace_file; void *entry; unsigned int trace_ctx; struct pt_regs *regs; }; void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len); void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_DYNAMIC_BIT, TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, TRACE_EVENT_FL_EPROBE_BIT, TRACE_EVENT_FL_FPROBE_BIT, TRACE_EVENT_FL_CUSTOM_BIT, TRACE_EVENT_FL_TEST_STR_BIT, }; /* * Event flags: * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file * TRACEPOINT - Event is a tracepoint * DYNAMIC - Event is a dynamic event (created at run time) * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe * EPROBE - Event is an event probe * FPROBE - Event is an function probe * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) * This is set when the custom event has not been attached * to a tracepoint yet, then it is cleared when it is. * TEST_STR - The event has a "%s" that points to a string outside the event */ enum { TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_DYNAMIC = (1 << TRACE_EVENT_FL_DYNAMIC_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT), TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) struct trace_event_call { struct list_head list; struct trace_event_class *class; union { const char *name; /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ struct tracepoint *tp; }; struct trace_event event; char *print_fmt; /* * Static events can disappear with modules, * where as dynamic ones need their own ref count. */ union { void *module; atomic_t refcnt; }; void *data; /* See the TRACE_EVENT_FL_* flags above */ int flags; /* static flags of different events */ #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; #ifdef CONFIG_DYNAMIC_EVENTS bool trace_event_dyn_try_get_ref(struct trace_event_call *call); void trace_event_dyn_put_ref(struct trace_event_call *call); bool trace_event_dyn_busy(struct trace_event_call *call); #else static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call) { /* Without DYNAMIC_EVENTS configured, nothing should be calling this */ return false; } static inline void trace_event_dyn_put_ref(struct trace_event_call *call) { } static inline bool trace_event_dyn_busy(struct trace_event_call *call) { /* Nothing should call this without DYNAIMIC_EVENTS configured. */ return true; } #endif static inline bool trace_event_try_get_ref(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_DYNAMIC) return trace_event_dyn_try_get_ref(call); else return try_module_get(call->module); } static inline void trace_event_put_ref(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_DYNAMIC) trace_event_dyn_put_ref(call); else module_put(call->module); } #ifdef CONFIG_PERF_EVENTS static inline bool bpf_prog_array_valid(struct trace_event_call *call) { /* * This inline function checks whether call->prog_array * is valid or not. The function is called in various places, * outside rcu_read_lock/unlock, as a heuristic to speed up execution. * * If this function returns true, and later call->prog_array * becomes false inside rcu_read_lock/unlock region, * we bail out then. If this function return false, * there is a risk that we might miss a few events if the checking * were delayed until inside rcu_read_lock/unlock region and * call->prog_array happened to become non-NULL then. * * Here, READ_ONCE() is used instead of rcu_access_pointer(). * rcu_access_pointer() requires the actual definition of * "struct bpf_prog_array" while READ_ONCE() only needs * a declaration of the same type. */ return !!READ_ONCE(call->prog_array); } #endif static inline const char * trace_event_name(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_CUSTOM) return call->name; else if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; else return call->name; } static inline struct list_head * trace_get_fields(struct trace_event_call *event_call) { if (!event_call->class->get_fields) return &event_call->class->fields; return event_call->class->get_fields(event_call); } struct trace_subsystem_dir; enum { EVENT_FILE_FL_ENABLED_BIT, EVENT_FILE_FL_RECORDED_CMD_BIT, EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, EVENT_FILE_FL_WAS_ENABLED_BIT, EVENT_FILE_FL_FREED_BIT, }; extern struct trace_event_file *trace_get_event_file(const char *instance, const char *system, const char *event); extern void trace_put_event_file(struct trace_event_file *file); #define MAX_DYNEVENT_CMD_LEN (2048) enum dynevent_type { DYNEVENT_TYPE_SYNTH = 1, DYNEVENT_TYPE_KPROBE, DYNEVENT_TYPE_NONE, }; struct dynevent_cmd; typedef int (*dynevent_create_fn_t)(struct dynevent_cmd *cmd); struct dynevent_cmd { struct seq_buf seq; const char *event_name; unsigned int n_fields; enum dynevent_type type; dynevent_create_fn_t run_command; void *private_data; }; extern int dynevent_create(struct dynevent_cmd *cmd); extern int synth_event_delete(const char *name); extern void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen); extern int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, struct module *mod, ...); #define synth_event_gen_cmd_start(cmd, name, mod, ...) \ __synth_event_gen_cmd_start(cmd, name, mod, ## __VA_ARGS__, NULL) struct synth_field_desc { const char *type; const char *name; }; extern int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, struct module *mod, struct synth_field_desc *fields, unsigned int n_fields); extern int synth_event_create(const char *name, struct synth_field_desc *fields, unsigned int n_fields, struct module *mod); extern int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, const char *name); extern int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name); extern int synth_event_add_fields(struct dynevent_cmd *cmd, struct synth_field_desc *fields, unsigned int n_fields); #define synth_event_gen_cmd_end(cmd) \ dynevent_create(cmd) struct synth_event; struct synth_event_trace_state { struct trace_event_buffer fbuffer; struct synth_trace_event *entry; struct trace_buffer *buffer; struct synth_event *event; unsigned int cur_field; unsigned int n_u64; bool disabled; bool add_next; bool add_name; }; extern int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...); extern int synth_event_trace_array(struct trace_event_file *file, u64 *vals, unsigned int n_vals); extern int synth_event_trace_start(struct trace_event_file *file, struct synth_event_trace_state *trace_state); extern int synth_event_add_next_val(u64 val, struct synth_event_trace_state *trace_state); extern int synth_event_add_val(const char *field_name, u64 val, struct synth_event_trace_state *trace_state); extern int synth_event_trace_end(struct synth_event_trace_state *trace_state); extern int kprobe_event_delete(const char *name); extern void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen); #define kprobe_event_gen_cmd_start(cmd, name, loc, ...) \ __kprobe_event_gen_cmd_start(cmd, false, name, loc, ## __VA_ARGS__, NULL) #define kretprobe_event_gen_cmd_start(cmd, name, loc, ...) \ __kprobe_event_gen_cmd_start(cmd, true, name, loc, ## __VA_ARGS__, NULL) extern int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, const char *name, const char *loc, ...); #define kprobe_event_add_fields(cmd, ...) \ __kprobe_event_add_fields(cmd, ## __VA_ARGS__, NULL) #define kprobe_event_add_field(cmd, field) \ __kprobe_event_add_fields(cmd, field, NULL) extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); #define kprobe_event_gen_cmd_end(cmd) \ dynevent_create(cmd) #define kretprobe_event_gen_cmd_end(cmd) \ dynevent_create(cmd) /* * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid * WAS_ENABLED - Set when enabled to know to clear trace on module removal * FREED - File descriptor is freed, all fields should be considered invalid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), }; struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; /* * 32 bit flags: * bit 0: enabled * bit 1: enabled cmd record * bit 2: enable/disable with the soft disable bit * bit 3: soft disabled * bit 4: trigger enabled * * Note: The bits must be set atomically to prevent races * from other writers. Reads of flags do not need to be in * sync as they occur in critical sections. But the way flags * is currently used, these changes do not affect the code * except that when a change is made, it may have a slight * delay in propagating the changes to other CPUs due to * caching and such. Which is mostly OK ;-) */ unsigned long flags; refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; #ifdef CONFIG_HIST_TRIGGERS extern struct irq_work hist_poll_work; extern wait_queue_head_t hist_poll_wq; static inline void hist_poll_wakeup(void) { if (wq_has_sleeper(&hist_poll_wq)) irq_work_queue(&hist_poll_work); } #define hist_poll_wait(file, wait) \ poll_wait(file, &hist_poll_wq, wait) #endif #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ event_##name.flags |= value; \ return 0; \ } \ early_initcall(trace_init_flags_##name); #define __TRACE_EVENT_PERF_PERM(name, expr...) \ static int perf_perm_##name(struct trace_event_call *tp_event, \ struct perf_event *p_event) \ { \ return ({ expr; }); \ } \ static int __init trace_init_perf_perm_##name(void) \ { \ event_##name.perf_perm = &perf_perm_##name; \ return 0; \ } \ early_initcall(trace_init_perf_perm_##name); #define PERF_MAX_TRACE_SIZE 8192 #define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ enum event_trigger_type { ETT_NONE = (0), ETT_TRACE_ONOFF = (1 << 0), ETT_SNAPSHOT = (1 << 1), ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), ETT_EVENT_HIST = (1 << 4), ETT_HIST_ENABLE = (1 << 5), ETT_EVENT_EPROBE = (1 << 6), }; extern int filter_match_preds(struct event_filter *filter, void *rec); extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event); extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); bool __trace_trigger_soft_disabled(struct trace_event_file *file); /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test * * If any triggers without filters are attached to this event, they * will be called here. If the event is soft disabled and has no * triggers that require testing the fields, it will return true, * otherwise false. */ static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_PID_FILTER)))) return false; if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) return false; return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); struct bpf_raw_tp_link; int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr, unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } static inline int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie) { return -EOPNOTSUPP; } static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } static inline int perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } struct bpf_raw_tp_link; static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) { return NULL; } static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { } static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } static inline int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } static inline int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } #endif enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_CPUMASK, FILTER_COMM, FILTER_CPU, FILTER_STACKTRACE, }; extern int trace_event_raw_init(struct trace_event_call *call); extern int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable); #ifdef CONFIG_PERF_EVENTS struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); extern int perf_trace_add(struct perf_event *event, int flags); extern void perf_trace_del(struct perf_event *event, int flags); #ifdef CONFIG_KPROBE_EVENTS extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS extern int perf_uprobe_init(struct perf_event *event, unsigned long ref_ctr_offset, bool is_retprobe); extern void perf_uprobe_destroy(struct perf_event *event); extern int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, u64 *probe_offset, u64 *probe_addr, bool perf_type_tracepoint); #endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_free_bpf_prog(struct perf_event *event); void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1); void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2); void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3); void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4); void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5); void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8); void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9); void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10); void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11); void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct trace_event_call *call, u64 count, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif #define TRACE_EVENT_STR_MAX 512 /* * gcc warns that you can not use a va_list in an inlined * function. But lets me make it into a macro :-/ */ #define __trace_event_vstr_len(fmt, va) \ ({ \ va_list __ap; \ int __ret; \ \ va_copy(__ap, *(va)); \ __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ va_end(__ap); \ \ min(__ret, TRACE_EVENT_STR_MAX); \ }) #endif /* _LINUX_TRACE_EVENT_H */ /* * Note: we keep the TRACE_CUSTOM_EVENT outside the include file ifdef protection. * This is due to the way trace custom events work. If a file includes two * trace event headers under one "CREATE_CUSTOM_TRACE_EVENTS" the first include * will override the TRACE_CUSTOM_EVENT and break the second include. */ #ifndef TRACE_CUSTOM_EVENT #define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_CUSTOM_EVENT(template, name, proto, args) #define TRACE_CUSTOM_EVENT(name, proto, args, struct, assign, print) #endif /* ifdef TRACE_CUSTOM_EVENT (see note above) */
407 407 384 384 384 383 384 383 384 384 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 // SPDX-License-Identifier: GPL-2.0-only #include <linux/user-return-notifier.h> #include <linux/percpu.h> #include <linux/sched.h> #include <linux/export.h> static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); /* * Request a notification when the current cpu returns to userspace. Must be * called in atomic context. The notifier will also be called in atomic * context. */ void user_return_notifier_register(struct user_return_notifier *urn) { set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list)); } EXPORT_SYMBOL_GPL(user_return_notifier_register); /* * Removes a registered user return notifier. Must be called from atomic * context, and from the same cpu registration occurred in. */ void user_return_notifier_unregister(struct user_return_notifier *urn) { hlist_del(&urn->link); if (hlist_empty(this_cpu_ptr(&return_notifier_list))) clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); } EXPORT_SYMBOL_GPL(user_return_notifier_unregister); /* Calls registered user return notifiers */ void fire_user_return_notifiers(void) { struct user_return_notifier *urn; struct hlist_node *tmp2; struct hlist_head *head; head = &get_cpu_var(return_notifier_list); hlist_for_each_entry_safe(urn, tmp2, head, link) urn->on_user_return(urn); put_cpu_var(return_notifier_list); }
69 313 407 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_TSTAMP_H #define _NF_CONNTRACK_TSTAMP_H #include <net/net_namespace.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> struct nf_conn_tstamp { u_int64_t start; u_int64_t stop; }; static inline struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); #else return NULL; #endif } static inline struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP struct net *net = nf_ct_net(ct); if (!net->ct.sysctl_tstamp) return NULL; return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); #else return NULL; #endif }; #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP void nf_conntrack_tstamp_pernet_init(struct net *net); #else static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} #endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ #endif /* _NF_CONNTRACK_TSTAMP_H */
2 38 2 22 22 22 3 30 5 1 15 25 25 28 23 15 23 6 6 4 1 5 5 15 15 7 7 1 7 24 24 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS dynamic root handling * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/fs.h> #include <linux/namei.h> #include <linux/dns_resolver.h> #include "internal.h" #define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */ #define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX) static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino); /* * iget5() comparator for inode created by autocell operations */ static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) { struct afs_fid *fid = opaque; return inode->i_ino == fid->vnode; } /* * iget5() inode initialiser */ static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) { struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_fid *fid = opaque; vnode->volume = as->volume; vnode->fid = *fid; inode->i_ino = fid->vnode; inode->i_generation = fid->unique; return 0; } /* * Create an inode for an autocell dynamic automount dir. */ static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino) { struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vnode = ino, .unique = 1, }; _enter(""); inode = iget5_locked(sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", inode, inode->i_ino, fid.vid, fid.vnode, fid.unique); vnode = AFS_FS_I(inode); if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 2); inode->i_size = 0; inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_autocell_inode_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_AUTOMOUNT | S_NOATIME; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); unlock_new_inode(inode); } _leave(" = %p", inode); return inode; } /* * Try to automount the mountpoint with pseudo directory, if the autocell * option is set. */ static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct afs_cell *cell = NULL; struct afs_net *net = afs_d2net(dentry); struct inode *inode = NULL; const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; bool dotted = false; int ret = -ENOENT; /* Names prefixed with a dot are R/W mounts. */ if (name[0] == '.') { name++; len--; dotted = true; } cell = afs_lookup_cell(net, name, len, NULL, false, afs_cell_trace_use_lookup_dynroot); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto out_no_cell; } inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; } dentry->d_fsdata = cell; return d_splice_alias(inode, dentry); out: afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot); out_no_cell: if (!inode) return d_splice_alias(inode, dentry); return ret == -ENOENT ? NULL : ERR_PTR(ret); } /* * Look up an entry in a dynroot directory. */ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { _enter("%pd", dentry); if (flags & LOOKUP_CREATE) return ERR_PTR(-EOPNOTSUPP); if (dentry->d_name.len >= AFSNAMEMAX) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } if (dentry->d_name.len == 5 && memcmp(dentry->d_name.name, "@cell", 5) == 0) return afs_lookup_atcell(dir, dentry, 2); if (dentry->d_name.len == 6 && memcmp(dentry->d_name.name, ".@cell", 6) == 0) return afs_lookup_atcell(dir, dentry, 3); return afs_dynroot_lookup_cell(dir, dentry, flags); } const struct inode_operations afs_dynroot_inode_operations = { .lookup = afs_dynroot_lookup, }; static void afs_dynroot_d_release(struct dentry *dentry) { struct afs_cell *cell = dentry->d_fsdata; afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt); } /* * Keep @cell symlink dentries around, but only keep cell autodirs when they're * being used. */ static int afs_dynroot_delete_dentry(const struct dentry *dentry) { const struct qstr *name = &dentry->d_name; if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0) return 0; if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0) return 0; return 1; } const struct dentry_operations afs_dynroot_dentry_operations = { .d_delete = afs_dynroot_delete_dentry, .d_release = afs_dynroot_d_release, .d_automount = afs_d_automount, }; static void afs_atcell_delayed_put_cell(void *arg) { struct afs_cell *cell = arg; afs_put_cell(cell, afs_cell_trace_put_atcell); } /* * Read @cell or .@cell symlinks. */ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_cell *cell; struct afs_net *net = afs_i2net(inode); const char *name; bool dotted = vnode->fid.vnode == 3; if (!rcu_access_pointer(net->ws_cell)) return ERR_PTR(-ENOENT); if (!dentry) { /* We're in RCU-pathwalk. */ cell = rcu_dereference(net->ws_cell); if (dotted) name = cell->name - 1; else name = cell->name; /* Shouldn't need to set a delayed call. */ return name; } down_read(&net->cells_lock); cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (dotted) name = cell->name - 1; else name = cell->name; afs_get_cell(cell, afs_cell_trace_get_atcell); set_delayed_call(done, afs_atcell_delayed_put_cell, cell); up_read(&net->cells_lock); return name; } static const struct inode_operations afs_atcell_inode_operations = { .get_link = afs_atcell_get_link, }; /* * Create an inode for the @cell or .@cell symlinks. */ static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino) { struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vnode = ino, .unique = 1, }; inode = iget5_locked(dir->i_sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) return ERR_PTR(-ENOMEM); vnode = AFS_FS_I(inode); if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 1); inode->i_size = 0; inode->i_mode = S_IFLNK | 0555; inode->i_op = &afs_atcell_inode_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_NOATIME; unlock_new_inode(inode); } return d_splice_alias(inode, dentry); } /* * Transcribe the cell database into readdir content under the RCU read lock. * Each cell produces two entries, one prefixed with a dot and one not. */ static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx) { const struct afs_cell *cell; loff_t newpos; _enter("%llu", ctx->pos); for (;;) { unsigned int ix = ctx->pos >> 1; cell = idr_get_next(&net->cells_dyn_ino, &ix); if (!cell) return 0; if (READ_ONCE(cell->state) == AFS_CELL_REMOVING || READ_ONCE(cell->state) == AFS_CELL_DEAD) { ctx->pos += 2; ctx->pos &= ~1; continue; } newpos = ix << 1; if (newpos > ctx->pos) ctx->pos = newpos; _debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino); if ((ctx->pos & 1) == 0) { if (!dir_emit(ctx, cell->name, cell->name_len, cell->dynroot_ino, DT_DIR)) return 0; ctx->pos++; } if ((ctx->pos & 1) == 1) { if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1, cell->dynroot_ino + 1, DT_DIR)) return 0; ctx->pos++; } } return 0; } /* * Read the AFS dynamic root directory. This produces a list of cellnames, * dotted and undotted, along with @cell and .@cell links if configured. */ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) { struct afs_net *net = afs_d2net(file->f_path.dentry); int ret = 0; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) { if (rcu_access_pointer(net->ws_cell) && !dir_emit(ctx, "@cell", 5, 2, DT_LNK)) return 0; ctx->pos = 3; } if (ctx->pos == 3) { if (rcu_access_pointer(net->ws_cell) && !dir_emit(ctx, ".@cell", 6, 3, DT_LNK)) return 0; ctx->pos = 4; } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); up_read(&net->cells_lock); } return ret; } static const struct file_operations afs_dynroot_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = afs_dynroot_readdir, .fsync = noop_fsync, }; /* * Create an inode for a dynamic root directory. */ struct inode *afs_dynroot_iget_root(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,}; if (as->volume) fid.vid = as->volume->vid; inode = iget5_locked(sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) return ERR_PTR(-ENOMEM); vnode = AFS_FS_I(inode); /* there shouldn't be an existing inode */ if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 2); inode->i_size = 0; inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_dynroot_inode_operations; inode->i_fop = &afs_dynroot_file_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_NOATIME; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); unlock_new_inode(inode); } _leave(" = %p", inode); return inode; }
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H #include <linux/bug.h> #include <linux/minmax.h> #include <linux/seq_file.h> #include <linux/types.h> /* * Trace sequences are used to allow a function to call several other functions * to create a string of data to use. */ /** * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer */ struct seq_buf { char *buffer; size_t size; size_t len; }; #define DECLARE_SEQ_BUF(NAME, SIZE) \ struct seq_buf NAME = { \ .buffer = (char[SIZE]) { 0 }, \ .size = SIZE, \ } static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; if (s->size) s->buffer[0] = '\0'; } static inline void seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) { s->buffer = buf; s->size = size; seq_buf_clear(s); } /* * seq_buf have a buffer that might overflow. When this happens * len is set to be greater than size. */ static inline bool seq_buf_has_overflowed(struct seq_buf *s) { return s->len > s->size; } static inline void seq_buf_set_overflow(struct seq_buf *s) { s->len = s->size + 1; } /* * How much buffer is left on the seq_buf? */ static inline unsigned int seq_buf_buffer_left(struct seq_buf *s) { if (seq_buf_has_overflowed(s)) return 0; return s->size - s->len; } /* How much buffer was written? */ static inline unsigned int seq_buf_used(struct seq_buf *s) { return min(s->len, s->size); } /** * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then * the last byte of the buffer is zeroed, and the len will still * point passed it. * * After this function is called, s->buffer is safe to use * in string operations. * * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { if (WARN_ON(s->size == 0)) return ""; if (seq_buf_buffer_left(s)) s->buffer[s->len] = 0; else s->buffer[s->size - 1] = 0; return s->buffer; } /** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) { WARN_ON(s->len > s->size + 1); if (s->len < s->size) { *bufp = s->buffer + s->len; return s->size - s->len; } *bufp = NULL; return 0; } /** * seq_buf_commit - commit data to the buffer * @s: the seq_buf handle * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) { if (num < 0) { seq_buf_set_overflow(s); } else { /* num must be negative on overflow */ BUG_ON(s->len + num > s->size); s->len += num; } } extern __printf(2, 3) int seq_buf_printf(struct seq_buf *s, const char *fmt, ...); extern __printf(2, 0) int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s); extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt); extern int seq_buf_puts(struct seq_buf *s, const char *str); extern int seq_buf_putc(struct seq_buf *s, unsigned char c); extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc); extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); #ifdef CONFIG_BINARY_PRINTF __printf(2, 0) int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif void seq_buf_do_printk(struct seq_buf *s, const char *lvl); #endif /* _LINUX_SEQ_BUF_H */
41 41 12 46 41 46 2 46 12 46 12 46 46 46 45 46 12 46 46 46 46 46 46 46 46 46 46 46 46 46 46 12 45 44 13 14 12 41 41 41 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 /* * Non-physical true random number generator based on timing jitter -- * Jitter RNG standalone code. * * Copyright Stephan Mueller <smueller@chronox.de>, 2015 - 2023 * * Design * ====== * * See https://www.chronox.de/jent.html * * License * ======= * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL2 are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ /* * This Jitterentropy RNG is based on the jitterentropy library * version 3.4.0 provided at https://www.chronox.de/jent.html */ #ifdef __OPTIMIZE__ #error "The CPU Jitter random number generator must not be compiled with optimizations. See documentation. Use the compiler switch -O0 for compiling jitterentropy.c." #endif typedef unsigned long long __u64; typedef long long __s64; typedef unsigned int __u32; typedef unsigned char u8; #define NULL ((void *) 0) /* The entropy pool */ struct rand_data { /* SHA3-256 is used as conditioner */ #define DATA_SIZE_BITS 256 /* all data values that are vital to maintain the security * of the RNG are marked as SENSITIVE. A user must not * access that information while the RNG executes its loops to * calculate the next random value. */ void *hash_state; /* SENSITIVE hash state entropy pool */ __u64 prev_time; /* SENSITIVE Previous time stamp */ __u64 last_delta; /* SENSITIVE stuck test */ __s64 last_delta2; /* SENSITIVE stuck test */ unsigned int flags; /* Flags used to initialize */ unsigned int osr; /* Oversample rate */ #define JENT_MEMORY_ACCESSLOOPS 128 #define JENT_MEMORY_SIZE \ (CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS * \ CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE) unsigned char *mem; /* Memory access location with size of * memblocks * memblocksize */ unsigned int memlocation; /* Pointer to byte in *mem */ unsigned int memblocks; /* Number of memory blocks in *mem */ unsigned int memblocksize; /* Size of one memory block in bytes */ unsigned int memaccessloops; /* Number of memory accesses per random * bit generation */ /* Repetition Count Test */ unsigned int rct_count; /* Number of stuck values */ /* Adaptive Proportion Test cutoff values */ unsigned int apt_cutoff; /* Intermittent health test failure */ unsigned int apt_cutoff_permanent; /* Permanent health test failure */ #define JENT_APT_WINDOW_SIZE 512 /* Data window size */ /* LSB of time stamp to process */ #define JENT_APT_LSB 16 #define JENT_APT_WORD_MASK (JENT_APT_LSB - 1) unsigned int apt_observations; /* Number of collected observations */ unsigned int apt_count; /* APT counter */ unsigned int apt_base; /* APT base reference */ unsigned int health_failure; /* Record health failure */ unsigned int apt_base_set:1; /* APT base reference set? */ }; /* Flags that can be used to initialize the RNG */ #define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more * entropy, saves MEMORY_SIZE RAM for * entropy collector */ /* -- error codes for init function -- */ #define JENT_ENOTIME 1 /* Timer service not available */ #define JENT_ECOARSETIME 2 /* Timer too coarse for RNG */ #define JENT_ENOMONOTONIC 3 /* Timer is not monotonic increasing */ #define JENT_EVARVAR 5 /* Timer does not produce variations of * variations (2nd derivation of time is * zero). */ #define JENT_ESTUCK 8 /* Too many stuck results during init. */ #define JENT_EHEALTH 9 /* Health test failed during initialization */ #define JENT_ERCT 10 /* RCT failed during initialization */ #define JENT_EHASH 11 /* Hash self test failed */ #define JENT_EMEM 12 /* Can't allocate memory for initialization */ #define JENT_RCT_FAILURE 1 /* Failure in RCT health test. */ #define JENT_APT_FAILURE 2 /* Failure in APT health test. */ #define JENT_PERMANENT_FAILURE_SHIFT 16 #define JENT_PERMANENT_FAILURE(x) (x << JENT_PERMANENT_FAILURE_SHIFT) #define JENT_RCT_FAILURE_PERMANENT JENT_PERMANENT_FAILURE(JENT_RCT_FAILURE) #define JENT_APT_FAILURE_PERMANENT JENT_PERMANENT_FAILURE(JENT_APT_FAILURE) /* * The output n bits can receive more than n bits of min entropy, of course, * but the fixed output of the conditioning function can only asymptotically * approach the output size bits of min entropy, not attain that bound. Random * maps will tend to have output collisions, which reduces the creditable * output entropy (that is what SP 800-90B Section 3.1.5.1.2 attempts to bound). * * The value "64" is justified in Appendix A.4 of the current 90C draft, * and aligns with NIST's in "epsilon" definition in this document, which is * that a string can be considered "full entropy" if you can bound the min * entropy in each bit of output to at least 1-epsilon, where epsilon is * required to be <= 2^(-32). */ #define JENT_ENTROPY_SAFETY_FACTOR 64 #include <linux/fips.h> #include <linux/minmax.h> #include "jitterentropy.h" /*************************************************************************** * Adaptive Proportion Test * * This test complies with SP800-90B section 4.4.2. ***************************************************************************/ /* * See the SP 800-90B comment #10b for the corrected cutoff for the SP 800-90B * APT. * https://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf * In the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). * (The original formula wasn't correct because the first symbol must * necessarily have been observed, so there is no chance of observing 0 of these * symbols.) * * For the alpha < 2^-53, R cannot be used as it uses a float data type without * arbitrary precision. A SageMath script is used to calculate those cutoff * values. * * For any value above 14, this yields the maximal allowable value of 512 * (by FIPS 140-2 IG 7.19 Resolution # 16, we cannot choose a cutoff value that * renders the test unable to fail). */ static const unsigned int jent_apt_cutoff_lookup[15] = { 325, 422, 459, 477, 488, 494, 499, 502, 505, 507, 508, 509, 510, 511, 512 }; static const unsigned int jent_apt_cutoff_permanent_lookup[15] = { 355, 447, 479, 494, 502, 507, 510, 512, 512, 512, 512, 512, 512, 512, 512 }; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static void jent_apt_init(struct rand_data *ec, unsigned int osr) { /* * Establish the apt_cutoff based on the presumed entropy rate of * 1/osr. */ if (osr >= ARRAY_SIZE(jent_apt_cutoff_lookup)) { ec->apt_cutoff = jent_apt_cutoff_lookup[ ARRAY_SIZE(jent_apt_cutoff_lookup) - 1]; ec->apt_cutoff_permanent = jent_apt_cutoff_permanent_lookup[ ARRAY_SIZE(jent_apt_cutoff_permanent_lookup) - 1]; } else { ec->apt_cutoff = jent_apt_cutoff_lookup[osr - 1]; ec->apt_cutoff_permanent = jent_apt_cutoff_permanent_lookup[osr - 1]; } } /* * Reset the APT counter * * @ec [in] Reference to entropy collector */ static void jent_apt_reset(struct rand_data *ec, unsigned int delta_masked) { /* Reset APT counter */ ec->apt_count = 0; ec->apt_base = delta_masked; ec->apt_observations = 0; } /* * Insert a new entropy event into APT * * @ec [in] Reference to entropy collector * @delta_masked [in] Masked time delta to process */ static void jent_apt_insert(struct rand_data *ec, unsigned int delta_masked) { /* Initialize the base reference */ if (!ec->apt_base_set) { ec->apt_base = delta_masked; ec->apt_base_set = 1; return; } if (delta_masked == ec->apt_base) { ec->apt_count++; /* Note, ec->apt_count starts with one. */ if (ec->apt_count >= ec->apt_cutoff_permanent) ec->health_failure |= JENT_APT_FAILURE_PERMANENT; else if (ec->apt_count >= ec->apt_cutoff) ec->health_failure |= JENT_APT_FAILURE; } ec->apt_observations++; if (ec->apt_observations >= JENT_APT_WINDOW_SIZE) jent_apt_reset(ec, delta_masked); } /*************************************************************************** * Stuck Test and its use as Repetition Count Test * * The Jitter RNG uses an enhanced version of the Repetition Count Test * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical * back-to-back values, the input to the RCT is the counting of the stuck * values during the generation of one Jitter RNG output block. * * The RCT is applied with an alpha of 2^{-30} compliant to FIPS 140-2 IG 9.8. * * During the counting operation, the Jitter RNG always calculates the RCT * cut-off value of C. If that value exceeds the allowed cut-off value, * the Jitter RNG output block will be calculated completely but discarded at * the end. The caller of the Jitter RNG is informed with an error code. ***************************************************************************/ /* * Repetition Count Test as defined in SP800-90B section 4.4.1 * * @ec [in] Reference to entropy collector * @stuck [in] Indicator whether the value is stuck */ static void jent_rct_insert(struct rand_data *ec, int stuck) { if (stuck) { ec->rct_count++; /* * The cutoff value is based on the following consideration: * alpha = 2^-30 or 2^-60 as recommended in SP800-90B. * In addition, we require an entropy value H of 1/osr as this * is the minimum entropy required to provide full entropy. * Note, we collect (DATA_SIZE_BITS + ENTROPY_SAFETY_FACTOR)*osr * deltas for inserting them into the entropy pool which should * then have (close to) DATA_SIZE_BITS bits of entropy in the * conditioned output. * * Note, ec->rct_count (which equals to value B in the pseudo * code of SP800-90B section 4.4.1) starts with zero. Hence * we need to subtract one from the cutoff value as calculated * following SP800-90B. Thus C = ceil(-log_2(alpha)/H) = 30*osr * or 60*osr. */ if ((unsigned int)ec->rct_count >= (60 * ec->osr)) { ec->rct_count = -1; ec->health_failure |= JENT_RCT_FAILURE_PERMANENT; } else if ((unsigned int)ec->rct_count >= (30 * ec->osr)) { ec->rct_count = -1; ec->health_failure |= JENT_RCT_FAILURE; } } else { /* Reset RCT */ ec->rct_count = 0; } } static inline __u64 jent_delta(__u64 prev, __u64 next) { #define JENT_UINT64_MAX (__u64)(~((__u64) 0)) return (prev < next) ? (next - prev) : (JENT_UINT64_MAX - prev + 1 + next); } /* * Stuck test by checking the: * 1st derivative of the jitter measurement (time delta) * 2nd derivative of the jitter measurement (delta of time deltas) * 3rd derivative of the jitter measurement (delta of delta of time deltas) * * All values must always be non-zero. * * @ec [in] Reference to entropy collector * @current_delta [in] Jitter time delta * * @return * 0 jitter measurement not stuck (good bit) * 1 jitter measurement stuck (reject bit) */ static int jent_stuck(struct rand_data *ec, __u64 current_delta) { __u64 delta2 = jent_delta(ec->last_delta, current_delta); __u64 delta3 = jent_delta(ec->last_delta2, delta2); ec->last_delta = current_delta; ec->last_delta2 = delta2; /* * Insert the result of the comparison of two back-to-back time * deltas. */ jent_apt_insert(ec, current_delta); if (!current_delta || !delta2 || !delta3) { /* RCT with a stuck bit */ jent_rct_insert(ec, 1); return 1; } /* RCT with a non-stuck bit */ jent_rct_insert(ec, 0); return 0; } /* * Report any health test failures * * @ec [in] Reference to entropy collector * * @return a bitmask indicating which tests failed * 0 No health test failure * 1 RCT failure * 2 APT failure * 1<<JENT_PERMANENT_FAILURE_SHIFT RCT permanent failure * 2<<JENT_PERMANENT_FAILURE_SHIFT APT permanent failure */ static unsigned int jent_health_failure(struct rand_data *ec) { /* Test is only enabled in FIPS mode */ if (!fips_enabled) return 0; return ec->health_failure; } /*************************************************************************** * Noise sources ***************************************************************************/ /* * Update of the loop count used for the next round of * an entropy collection. * * Input: * @bits is the number of low bits of the timer to consider * @min is the number of bits we shift the timer value to the right at * the end to make sure we have a guaranteed minimum value * * @return Newly calculated loop counter */ static __u64 jent_loop_shuffle(unsigned int bits, unsigned int min) { __u64 time = 0; __u64 shuffle = 0; unsigned int i = 0; unsigned int mask = (1<<bits) - 1; jent_get_nstime(&time); /* * We fold the time value as much as possible to ensure that as many * bits of the time stamp are included as possible. */ for (i = 0; ((DATA_SIZE_BITS + bits - 1) / bits) > i; i++) { shuffle ^= time & mask; time = time >> bits; } /* * We add a lower boundary value to ensure we have a minimum * RNG loop count. */ return (shuffle + (1<<min)); } /* * CPU Jitter noise source -- this is the noise source based on the CPU * execution time jitter * * This function injects the individual bits of the time value into the * entropy pool using a hash. * * ec [in] entropy collector * time [in] time stamp to be injected * stuck [in] Is the time stamp identified as stuck? * * Output: * updated hash context in the entropy collector or error code */ static int jent_condition_data(struct rand_data *ec, __u64 time, int stuck) { #define SHA3_HASH_LOOP (1<<3) struct { int rct_count; unsigned int apt_observations; unsigned int apt_count; unsigned int apt_base; } addtl = { ec->rct_count, ec->apt_observations, ec->apt_count, ec->apt_base }; return jent_hash_time(ec->hash_state, time, (u8 *)&addtl, sizeof(addtl), SHA3_HASH_LOOP, stuck); } /* * Memory Access noise source -- this is a noise source based on variations in * memory access times * * This function performs memory accesses which will add to the timing * variations due to an unknown amount of CPU wait states that need to be * added when accessing memory. The memory size should be larger than the L1 * caches as outlined in the documentation and the associated testing. * * The L1 cache has a very high bandwidth, albeit its access rate is usually * slower than accessing CPU registers. Therefore, L1 accesses only add minimal * variations as the CPU has hardly to wait. Starting with L2, significant * variations are added because L2 typically does not belong to the CPU any more * and therefore a wider range of CPU wait states is necessary for accesses. * L3 and real memory accesses have even a wider range of wait states. However, * to reliably access either L3 or memory, the ec->mem memory must be quite * large which is usually not desirable. * * @ec [in] Reference to the entropy collector with the memory access data -- if * the reference to the memory block to be accessed is NULL, this noise * source is disabled * @loop_cnt [in] if a value not equal to 0 is set, use the given value * number of loops to perform the LFSR */ static void jent_memaccess(struct rand_data *ec, __u64 loop_cnt) { unsigned int wrap = 0; __u64 i = 0; #define MAX_ACC_LOOP_BIT 7 #define MIN_ACC_LOOP_BIT 0 __u64 acc_loop_cnt = jent_loop_shuffle(MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); if (NULL == ec || NULL == ec->mem) return; wrap = ec->memblocksize * ec->memblocks; /* * testing purposes -- allow test app to set the counter, not * needed during runtime */ if (loop_cnt) acc_loop_cnt = loop_cnt; for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) { unsigned char *tmpval = ec->mem + ec->memlocation; /* * memory access: just add 1 to one byte, * wrap at 255 -- memory access implies read * from and write to memory location */ *tmpval = (*tmpval + 1) & 0xff; /* * Addition of memblocksize - 1 to pointer * with wrap around logic to ensure that every * memory location is hit evenly */ ec->memlocation = ec->memlocation + ec->memblocksize - 1; ec->memlocation = ec->memlocation % wrap; } } /*************************************************************************** * Start of entropy processing logic ***************************************************************************/ /* * This is the heart of the entropy generation: calculate time deltas and * use the CPU jitter in the time deltas. The jitter is injected into the * entropy pool. * * WARNING: ensure that ->prev_time is primed before using the output * of this function! This can be done by calling this function * and not using its result. * * @ec [in] Reference to entropy collector * * @return result of stuck test */ static int jent_measure_jitter(struct rand_data *ec, __u64 *ret_current_delta) { __u64 time = 0; __u64 current_delta = 0; int stuck; /* Invoke one noise source before time measurement to add variations */ jent_memaccess(ec, 0); /* * Get time stamp and calculate time delta to previous * invocation to measure the timing variations */ jent_get_nstime(&time); current_delta = jent_delta(ec->prev_time, time); ec->prev_time = time; /* Check whether we have a stuck measurement. */ stuck = jent_stuck(ec, current_delta); /* Now call the next noise sources which also injects the data */ if (jent_condition_data(ec, current_delta, stuck)) stuck = 1; /* return the raw entropy value */ if (ret_current_delta) *ret_current_delta = current_delta; return stuck; } /* * Generator of one 64 bit random number * Function fills rand_data->hash_state * * @ec [in] Reference to entropy collector */ static void jent_gen_entropy(struct rand_data *ec) { unsigned int k = 0, safety_factor = 0; if (fips_enabled) safety_factor = JENT_ENTROPY_SAFETY_FACTOR; /* priming of the ->prev_time value */ jent_measure_jitter(ec, NULL); while (!jent_health_failure(ec)) { /* If a stuck measurement is received, repeat measurement */ if (jent_measure_jitter(ec, NULL)) continue; /* * We multiply the loop value with ->osr to obtain the * oversampling rate requested by the caller */ if (++k >= ((DATA_SIZE_BITS + safety_factor) * ec->osr)) break; } } /* * Entry function: Obtain entropy for the caller. * * This function invokes the entropy gathering logic as often to generate * as many bytes as requested by the caller. The entropy gathering logic * creates 64 bit per invocation. * * This function truncates the last 64 bit entropy value output to the exact * size specified by the caller. * * @ec [in] Reference to entropy collector * @data [in] pointer to buffer for storing random data -- buffer must already * exist * @len [in] size of the buffer, specifying also the requested number of random * in bytes * * @return 0 when request is fulfilled or an error * * The following error codes can occur: * -1 entropy_collector is NULL or the generation failed * -2 Intermittent health failure * -3 Permanent health failure */ int jent_read_entropy(struct rand_data *ec, unsigned char *data, unsigned int len) { unsigned char *p = data; if (!ec) return -1; while (len > 0) { unsigned int tocopy, health_test_result; jent_gen_entropy(ec); health_test_result = jent_health_failure(ec); if (health_test_result > JENT_PERMANENT_FAILURE_SHIFT) { /* * At this point, the Jitter RNG instance is considered * as a failed instance. There is no rerun of the * startup test any more, because the caller * is assumed to not further use this instance. */ return -3; } else if (health_test_result) { /* * Perform startup health tests and return permanent * error if it fails. */ if (jent_entropy_init(0, 0, NULL, ec)) { /* Mark the permanent error */ ec->health_failure &= JENT_RCT_FAILURE_PERMANENT | JENT_APT_FAILURE_PERMANENT; return -3; } return -2; } tocopy = min(DATA_SIZE_BITS / 8, len); if (jent_read_random_block(ec->hash_state, p, tocopy)) return -1; len -= tocopy; p += tocopy; } return 0; } /*************************************************************************** * Initialization logic ***************************************************************************/ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, unsigned int flags, void *hash_state) { struct rand_data *entropy_collector; entropy_collector = jent_zalloc(sizeof(struct rand_data)); if (!entropy_collector) return NULL; if (!(flags & JENT_DISABLE_MEMORY_ACCESS)) { /* Allocate memory for adding variations based on memory * access */ entropy_collector->mem = jent_kvzalloc(JENT_MEMORY_SIZE); if (!entropy_collector->mem) { jent_zfree(entropy_collector); return NULL; } entropy_collector->memblocksize = CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE; entropy_collector->memblocks = CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS; entropy_collector->memaccessloops = JENT_MEMORY_ACCESSLOOPS; } /* verify and set the oversampling rate */ if (osr == 0) osr = 1; /* H_submitter = 1 / osr */ entropy_collector->osr = osr; entropy_collector->flags = flags; entropy_collector->hash_state = hash_state; /* Initialize the APT */ jent_apt_init(entropy_collector, osr); /* fill the data pad with non-zero values */ jent_gen_entropy(entropy_collector); return entropy_collector; } void jent_entropy_collector_free(struct rand_data *entropy_collector) { jent_kvzfree(entropy_collector->mem, JENT_MEMORY_SIZE); entropy_collector->mem = NULL; jent_zfree(entropy_collector); } int jent_entropy_init(unsigned int osr, unsigned int flags, void *hash_state, struct rand_data *p_ec) { /* * If caller provides an allocated ec, reuse it which implies that the * health test entropy data is used to further still the available * entropy pool. */ struct rand_data *ec = p_ec; int i, time_backwards = 0, ret = 0, ec_free = 0; unsigned int health_test_result; if (!ec) { ec = jent_entropy_collector_alloc(osr, flags, hash_state); if (!ec) return JENT_EMEM; ec_free = 1; } else { /* Reset the APT */ jent_apt_reset(ec, 0); /* Ensure that a new APT base is obtained */ ec->apt_base_set = 0; /* Reset the RCT */ ec->rct_count = 0; /* Reset intermittent, leave permanent health test result */ ec->health_failure &= (~JENT_RCT_FAILURE); ec->health_failure &= (~JENT_APT_FAILURE); } /* We could perform statistical tests here, but the problem is * that we only have a few loop counts to do testing. These * loop counts may show some slight skew and we produce * false positives. * * Moreover, only old systems show potentially problematic * jitter entropy that could potentially be caught here. But * the RNG is intended for hardware that is available or widely * used, but not old systems that are long out of favor. Thus, * no statistical tests. */ /* * We could add a check for system capabilities such as clock_getres or * check for CONFIG_X86_TSC, but it does not make much sense as the * following sanity checks verify that we have a high-resolution * timer. */ /* * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is * definitely too little. * * SP800-90B requires at least 1024 initial test cycles. */ #define TESTLOOPCOUNT 1024 #define CLEARCACHE 100 for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) { __u64 start_time = 0, end_time = 0, delta = 0; /* Invoke core entropy collection logic */ jent_measure_jitter(ec, &delta); end_time = ec->prev_time; start_time = ec->prev_time - delta; /* test whether timer works */ if (!start_time || !end_time) { ret = JENT_ENOTIME; goto out; } /* * test whether timer is fine grained enough to provide * delta even when called shortly after each other -- this * implies that we also have a high resolution timer */ if (!delta || (end_time == start_time)) { ret = JENT_ECOARSETIME; goto out; } /* * up to here we did not modify any variable that will be * evaluated later, but we already performed some work. Thus we * already have had an impact on the caches, branch prediction, * etc. with the goal to clear it to get the worst case * measurements. */ if (i < CLEARCACHE) continue; /* test whether we have an increasing timer */ if (!(end_time > start_time)) time_backwards++; } /* * we allow up to three times the time running backwards. * CLOCK_REALTIME is affected by adjtime and NTP operations. Thus, * if such an operation just happens to interfere with our test, it * should not fail. The value of 3 should cover the NTP case being * performed during our test run. */ if (time_backwards > 3) { ret = JENT_ENOMONOTONIC; goto out; } /* Did we encounter a health test failure? */ health_test_result = jent_health_failure(ec); if (health_test_result) { ret = (health_test_result & JENT_RCT_FAILURE) ? JENT_ERCT : JENT_EHEALTH; goto out; } out: if (ec_free) jent_entropy_collector_free(ec); return ret; }
91 90 90 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-mul_2.c - MPI helper functions * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" #include "longlong.h" mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t cy_limb; mpi_size_t j; mpi_limb_t prod_high, prod_low; mpi_limb_t x; /* The loop counter and index J goes from -SIZE to -1. This way * the loop becomes faster. */ j = -s1_size; res_ptr -= j; s1_ptr -= j; cy_limb = 0; do { umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); prod_low += cy_limb; cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; x = res_ptr[j]; prod_low = x + prod_low; cy_limb += prod_low < x ? 1 : 0; res_ptr[j] = prod_low; } while (++j); return cy_limb; }
8 1 8 1 3 9 8 9 4 9 6 9 6 9 8 4 3 8 8 8 9 12 12 12 12 12 12 12 5 12 7 5 12 7 5 12 12 12 12 12 5 12 5 5 12 12 12 12 12 12 12 12 5 12 117 117 116 117 111 117 33 117 117 117 116 303 294 1 10 9 9 1 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/condition.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/slab.h> /* List of "struct tomoyo_condition". */ LIST_HEAD(tomoyo_condition_list); /** * tomoyo_argv - Check argv[] in "struct linux_binbrm". * * @index: Index number of @arg_ptr. * @arg_ptr: Contents of argv[@index]. * @argc: Length of @argv. * @argv: Pointer to "struct tomoyo_argv". * @checked: Set to true if @argv[@index] was found. * * Returns true on success, false otherwise. */ static bool tomoyo_argv(const unsigned int index, const char *arg_ptr, const int argc, const struct tomoyo_argv *argv, u8 *checked) { int i; struct tomoyo_path_info arg; arg.name = arg_ptr; for (i = 0; i < argc; argv++, checked++, i++) { bool result; if (index != argv->index) continue; *checked = 1; tomoyo_fill_path_info(&arg); result = tomoyo_path_matches_pattern(&arg, argv->value); if (argv->is_not) result = !result; if (!result) return false; } return true; } /** * tomoyo_envp - Check envp[] in "struct linux_binbrm". * * @env_name: The name of environment variable. * @env_value: The value of environment variable. * @envc: Length of @envp. * @envp: Pointer to "struct tomoyo_envp". * @checked: Set to true if @envp[@env_name] was found. * * Returns true on success, false otherwise. */ static bool tomoyo_envp(const char *env_name, const char *env_value, const int envc, const struct tomoyo_envp *envp, u8 *checked) { int i; struct tomoyo_path_info name; struct tomoyo_path_info value; name.name = env_name; tomoyo_fill_path_info(&name); value.name = env_value; tomoyo_fill_path_info(&value); for (i = 0; i < envc; envp++, checked++, i++) { bool result; if (!tomoyo_path_matches_pattern(&name, envp->name)) continue; *checked = 1; if (envp->value) { result = tomoyo_path_matches_pattern(&value, envp->value); if (envp->is_not) result = !result; } else { result = true; if (!envp->is_not) result = !result; } if (!result) return false; } return true; } /** * tomoyo_scan_bprm - Scan "struct linux_binprm". * * @ee: Pointer to "struct tomoyo_execve". * @argc: Length of @argc. * @argv: Pointer to "struct tomoyo_argv". * @envc: Length of @envp. * @envp: Pointer to "struct tomoyo_envp". * * Returns true on success, false otherwise. */ static bool tomoyo_scan_bprm(struct tomoyo_execve *ee, const u16 argc, const struct tomoyo_argv *argv, const u16 envc, const struct tomoyo_envp *envp) { struct linux_binprm *bprm = ee->bprm; struct tomoyo_page_dump *dump = &ee->dump; char *arg_ptr = ee->tmp; int arg_len = 0; unsigned long pos = bprm->p; int offset = pos % PAGE_SIZE; int argv_count = bprm->argc; int envp_count = bprm->envc; bool result = true; u8 local_checked[32]; u8 *checked; if (argc + envc <= sizeof(local_checked)) { checked = local_checked; memset(local_checked, 0, sizeof(local_checked)); } else { checked = kzalloc(argc + envc, GFP_NOFS); if (!checked) return false; } while (argv_count || envp_count) { if (!tomoyo_dump_page(bprm, pos, dump)) { result = false; goto out; } pos += PAGE_SIZE - offset; while (offset < PAGE_SIZE) { /* Read. */ const char *kaddr = dump->data; const unsigned char c = kaddr[offset++]; if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) { if (c == '\\') { arg_ptr[arg_len++] = '\\'; arg_ptr[arg_len++] = '\\'; } else if (c > ' ' && c < 127) { arg_ptr[arg_len++] = c; } else { arg_ptr[arg_len++] = '\\'; arg_ptr[arg_len++] = (c >> 6) + '0'; arg_ptr[arg_len++] = ((c >> 3) & 7) + '0'; arg_ptr[arg_len++] = (c & 7) + '0'; } } else { arg_ptr[arg_len] = '\0'; } if (c) continue; /* Check. */ if (argv_count) { if (!tomoyo_argv(bprm->argc - argv_count, arg_ptr, argc, argv, checked)) { result = false; break; } argv_count--; } else if (envp_count) { char *cp = strchr(arg_ptr, '='); if (cp) { *cp = '\0'; if (!tomoyo_envp(arg_ptr, cp + 1, envc, envp, checked + argc)) { result = false; break; } } envp_count--; } else { break; } arg_len = 0; } offset = 0; if (!result) break; } out: if (result) { int i; /* Check not-yet-checked entries. */ for (i = 0; i < argc; i++) { if (checked[i]) continue; /* * Return true only if all unchecked indexes in * bprm->argv[] are not matched. */ if (argv[i].is_not) continue; result = false; break; } for (i = 0; i < envc; envp++, i++) { if (checked[argc + i]) continue; /* * Return true only if all unchecked environ variables * in bprm->envp[] are either undefined or not matched. */ if ((!envp->value && !envp->is_not) || (envp->value && envp->is_not)) continue; result = false; break; } } if (checked != local_checked) kfree(checked); return result; } /** * tomoyo_scan_exec_realpath - Check "exec.realpath" parameter of "struct tomoyo_condition". * * @file: Pointer to "struct file". * @ptr: Pointer to "struct tomoyo_name_union". * @match: True if "exec.realpath=", false if "exec.realpath!=". * * Returns true on success, false otherwise. */ static bool tomoyo_scan_exec_realpath(struct file *file, const struct tomoyo_name_union *ptr, const bool match) { bool result; struct tomoyo_path_info exe; if (!file) return false; exe.name = tomoyo_realpath_from_path(&file->f_path); if (!exe.name) return false; tomoyo_fill_path_info(&exe); result = tomoyo_compare_name_union(&exe, ptr); kfree(exe.name); return result == match; } /** * tomoyo_get_dqword - tomoyo_get_name() for a quoted string. * * @start: String to save. * * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise. */ static const struct tomoyo_path_info *tomoyo_get_dqword(char *start) { char *cp = start + strlen(start) - 1; if (cp == start || *start++ != '"' || *cp != '"') return NULL; *cp = '\0'; if (*start && !tomoyo_correct_word(start)) return NULL; return tomoyo_get_name(start); } /** * tomoyo_parse_name_union_quoted - Parse a quoted word. * * @param: Pointer to "struct tomoyo_acl_param". * @ptr: Pointer to "struct tomoyo_name_union". * * Returns true on success, false otherwise. */ static bool tomoyo_parse_name_union_quoted(struct tomoyo_acl_param *param, struct tomoyo_name_union *ptr) { char *filename = param->data; if (*filename == '@') return tomoyo_parse_name_union(param, ptr); ptr->filename = tomoyo_get_dqword(filename); return ptr->filename != NULL; } /** * tomoyo_parse_argv - Parse an argv[] condition part. * * @left: Lefthand value. * @right: Righthand value. * @argv: Pointer to "struct tomoyo_argv". * * Returns true on success, false otherwise. */ static bool tomoyo_parse_argv(char *left, char *right, struct tomoyo_argv *argv) { if (tomoyo_parse_ulong(&argv->index, &left) != TOMOYO_VALUE_TYPE_DECIMAL || *left++ != ']' || *left) return false; argv->value = tomoyo_get_dqword(right); return argv->value != NULL; } /** * tomoyo_parse_envp - Parse an envp[] condition part. * * @left: Lefthand value. * @right: Righthand value. * @envp: Pointer to "struct tomoyo_envp". * * Returns true on success, false otherwise. */ static bool tomoyo_parse_envp(char *left, char *right, struct tomoyo_envp *envp) { const struct tomoyo_path_info *name; const struct tomoyo_path_info *value; char *cp = left + strlen(left) - 1; if (*cp-- != ']' || *cp != '"') goto out; *cp = '\0'; if (!tomoyo_correct_word(left)) goto out; name = tomoyo_get_name(left); if (!name) goto out; if (!strcmp(right, "NULL")) { value = NULL; } else { value = tomoyo_get_dqword(right); if (!value) { tomoyo_put_name(name); goto out; } } envp->name = name; envp->value = value; return true; out: return false; } /** * tomoyo_same_condition - Check for duplicated "struct tomoyo_condition" entry. * * @a: Pointer to "struct tomoyo_condition". * @b: Pointer to "struct tomoyo_condition". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_same_condition(const struct tomoyo_condition *a, const struct tomoyo_condition *b) { return a->size == b->size && a->condc == b->condc && a->numbers_count == b->numbers_count && a->names_count == b->names_count && a->argc == b->argc && a->envc == b->envc && a->grant_log == b->grant_log && a->transit == b->transit && !memcmp(a + 1, b + 1, a->size - sizeof(*a)); } /** * tomoyo_condition_type - Get condition type. * * @word: Keyword string. * * Returns one of values in "enum tomoyo_conditions_index" on success, * TOMOYO_MAX_CONDITION_KEYWORD otherwise. */ static u8 tomoyo_condition_type(const char *word) { u8 i; for (i = 0; i < TOMOYO_MAX_CONDITION_KEYWORD; i++) { if (!strcmp(word, tomoyo_condition_keyword[i])) break; } return i; } /* Define this to enable debug mode. */ /* #define DEBUG_CONDITION */ #ifdef DEBUG_CONDITION #define dprintk printk #else #define dprintk(...) do { } while (0) #endif /** * tomoyo_commit_condition - Commit "struct tomoyo_condition". * * @entry: Pointer to "struct tomoyo_condition". * * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise. * * This function merges duplicated entries. This function returns NULL if * @entry is not duplicated but memory quota for policy has exceeded. */ static struct tomoyo_condition *tomoyo_commit_condition (struct tomoyo_condition *entry) { struct tomoyo_condition *ptr; bool found = false; if (mutex_lock_interruptible(&tomoyo_policy_lock)) { dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__); ptr = NULL; found = true; goto out; } list_for_each_entry(ptr, &tomoyo_condition_list, head.list) { if (!tomoyo_same_condition(ptr, entry) || atomic_read(&ptr->head.users) == TOMOYO_GC_IN_PROGRESS) continue; /* Same entry found. Share this entry. */ atomic_inc(&ptr->head.users); found = true; break; } if (!found) { if (tomoyo_memory_ok(entry)) { atomic_set(&entry->head.users, 1); list_add(&entry->head.list, &tomoyo_condition_list); } else { found = true; ptr = NULL; } } mutex_unlock(&tomoyo_policy_lock); out: if (found) { tomoyo_del_condition(&entry->head.list); kfree(entry); entry = ptr; } return entry; } /** * tomoyo_get_transit_preference - Parse domain transition preference for execve(). * * @param: Pointer to "struct tomoyo_acl_param". * @e: Pointer to "struct tomoyo_condition". * * Returns the condition string part. */ static char *tomoyo_get_transit_preference(struct tomoyo_acl_param *param, struct tomoyo_condition *e) { char * const pos = param->data; bool flag; if (*pos == '<') { e->transit = tomoyo_get_domainname(param); goto done; } { char *cp = strchr(pos, ' '); if (cp) *cp = '\0'; flag = tomoyo_correct_path(pos) || !strcmp(pos, "keep") || !strcmp(pos, "initialize") || !strcmp(pos, "reset") || !strcmp(pos, "child") || !strcmp(pos, "parent"); if (cp) *cp = ' '; } if (!flag) return pos; e->transit = tomoyo_get_name(tomoyo_read_token(param)); done: if (e->transit) return param->data; /* * Return a bad read-only condition string that will let * tomoyo_get_condition() return NULL. */ return "/"; } /** * tomoyo_get_condition - Parse condition part. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise. */ struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param) { struct tomoyo_condition *entry = NULL; struct tomoyo_condition_element *condp = NULL; struct tomoyo_number_union *numbers_p = NULL; struct tomoyo_name_union *names_p = NULL; struct tomoyo_argv *argv = NULL; struct tomoyo_envp *envp = NULL; struct tomoyo_condition e = { }; char * const start_of_string = tomoyo_get_transit_preference(param, &e); char * const end_of_string = start_of_string + strlen(start_of_string); char *pos; rerun: pos = start_of_string; while (1) { u8 left = -1; u8 right = -1; char *left_word = pos; char *cp; char *right_word; bool is_not; if (!*left_word) break; /* * Since left-hand condition does not allow use of "path_group" * or "number_group" and environment variable's names do not * accept '=', it is guaranteed that the original line consists * of one or more repetition of $left$operator$right blocks * where "$left is free from '=' and ' '" and "$operator is * either '=' or '!='" and "$right is free from ' '". * Therefore, we can reconstruct the original line at the end * of dry run even if we overwrite $operator with '\0'. */ cp = strchr(pos, ' '); if (cp) { *cp = '\0'; /* Will restore later. */ pos = cp + 1; } else { pos = ""; } right_word = strchr(left_word, '='); if (!right_word || right_word == left_word) goto out; is_not = *(right_word - 1) == '!'; if (is_not) *(right_word++ - 1) = '\0'; /* Will restore later. */ else if (*(right_word + 1) != '=') *right_word++ = '\0'; /* Will restore later. */ else goto out; dprintk(KERN_WARNING "%u: <%s>%s=<%s>\n", __LINE__, left_word, is_not ? "!" : "", right_word); if (!strcmp(left_word, "grant_log")) { if (entry) { if (is_not || entry->grant_log != TOMOYO_GRANTLOG_AUTO) goto out; else if (!strcmp(right_word, "yes")) entry->grant_log = TOMOYO_GRANTLOG_YES; else if (!strcmp(right_word, "no")) entry->grant_log = TOMOYO_GRANTLOG_NO; else goto out; } continue; } if (!strncmp(left_word, "exec.argv[", 10)) { if (!argv) { e.argc++; e.condc++; } else { e.argc--; e.condc--; left = TOMOYO_ARGV_ENTRY; argv->is_not = is_not; if (!tomoyo_parse_argv(left_word + 10, right_word, argv++)) goto out; } goto store_value; } if (!strncmp(left_word, "exec.envp[\"", 11)) { if (!envp) { e.envc++; e.condc++; } else { e.envc--; e.condc--; left = TOMOYO_ENVP_ENTRY; envp->is_not = is_not; if (!tomoyo_parse_envp(left_word + 11, right_word, envp++)) goto out; } goto store_value; } left = tomoyo_condition_type(left_word); dprintk(KERN_WARNING "%u: <%s> left=%u\n", __LINE__, left_word, left); if (left == TOMOYO_MAX_CONDITION_KEYWORD) { if (!numbers_p) { e.numbers_count++; } else { e.numbers_count--; left = TOMOYO_NUMBER_UNION; param->data = left_word; if (*left_word == '@' || !tomoyo_parse_number_union(param, numbers_p++)) goto out; } } if (!condp) e.condc++; else e.condc--; if (left == TOMOYO_EXEC_REALPATH || left == TOMOYO_SYMLINK_TARGET) { if (!names_p) { e.names_count++; } else { e.names_count--; right = TOMOYO_NAME_UNION; param->data = right_word; if (!tomoyo_parse_name_union_quoted(param, names_p++)) goto out; } goto store_value; } right = tomoyo_condition_type(right_word); if (right == TOMOYO_MAX_CONDITION_KEYWORD) { if (!numbers_p) { e.numbers_count++; } else { e.numbers_count--; right = TOMOYO_NUMBER_UNION; param->data = right_word; if (!tomoyo_parse_number_union(param, numbers_p++)) goto out; } } store_value: if (!condp) { dprintk(KERN_WARNING "%u: dry_run left=%u right=%u match=%u\n", __LINE__, left, right, !is_not); continue; } condp->left = left; condp->right = right; condp->equals = !is_not; dprintk(KERN_WARNING "%u: left=%u right=%u match=%u\n", __LINE__, condp->left, condp->right, condp->equals); condp++; } dprintk(KERN_INFO "%u: cond=%u numbers=%u names=%u ac=%u ec=%u\n", __LINE__, e.condc, e.numbers_count, e.names_count, e.argc, e.envc); if (entry) { BUG_ON(e.names_count | e.numbers_count | e.argc | e.envc | e.condc); return tomoyo_commit_condition(entry); } e.size = sizeof(*entry) + e.condc * sizeof(struct tomoyo_condition_element) + e.numbers_count * sizeof(struct tomoyo_number_union) + e.names_count * sizeof(struct tomoyo_name_union) + e.argc * sizeof(struct tomoyo_argv) + e.envc * sizeof(struct tomoyo_envp); entry = kzalloc(e.size, GFP_NOFS); if (!entry) goto out2; *entry = e; e.transit = NULL; condp = (struct tomoyo_condition_element *) (entry + 1); numbers_p = (struct tomoyo_number_union *) (condp + e.condc); names_p = (struct tomoyo_name_union *) (numbers_p + e.numbers_count); argv = (struct tomoyo_argv *) (names_p + e.names_count); envp = (struct tomoyo_envp *) (argv + e.argc); { bool flag = false; for (pos = start_of_string; pos < end_of_string; pos++) { if (*pos) continue; if (flag) /* Restore " ". */ *pos = ' '; else if (*(pos + 1) == '=') /* Restore "!=". */ *pos = '!'; else /* Restore "=". */ *pos = '='; flag = !flag; } } goto rerun; out: dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__); if (entry) { tomoyo_del_condition(&entry->head.list); kfree(entry); } out2: tomoyo_put_name(e.transit); return NULL; } /** * tomoyo_get_attributes - Revalidate "struct inode". * * @obj: Pointer to "struct tomoyo_obj_info". * * Returns nothing. */ void tomoyo_get_attributes(struct tomoyo_obj_info *obj) { u8 i; struct dentry *dentry = NULL; for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) { struct inode *inode; switch (i) { case TOMOYO_PATH1: dentry = obj->path1.dentry; if (!dentry) continue; break; case TOMOYO_PATH2: dentry = obj->path2.dentry; if (!dentry) continue; break; default: if (!dentry) continue; dentry = dget_parent(dentry); break; } inode = d_backing_inode(dentry); if (inode) { struct tomoyo_mini_stat *stat = &obj->stat[i]; stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->dev = inode->i_sb->s_dev; stat->rdev = inode->i_rdev; obj->stat_valid[i] = true; } if (i & 1) /* TOMOYO_PATH1_PARENT or TOMOYO_PATH2_PARENT */ dput(dentry); } } /** * tomoyo_condition - Check condition part. * * @r: Pointer to "struct tomoyo_request_info". * @cond: Pointer to "struct tomoyo_condition". Maybe NULL. * * Returns true on success, false otherwise. * * Caller holds tomoyo_read_lock(). */ bool tomoyo_condition(struct tomoyo_request_info *r, const struct tomoyo_condition *cond) { u32 i; unsigned long min_v[2] = { 0, 0 }; unsigned long max_v[2] = { 0, 0 }; const struct tomoyo_condition_element *condp; const struct tomoyo_number_union *numbers_p; const struct tomoyo_name_union *names_p; const struct tomoyo_argv *argv; const struct tomoyo_envp *envp; struct tomoyo_obj_info *obj; u16 condc; u16 argc; u16 envc; struct linux_binprm *bprm = NULL; if (!cond) return true; condc = cond->condc; argc = cond->argc; envc = cond->envc; obj = r->obj; if (r->ee) bprm = r->ee->bprm; if (!bprm && (argc || envc)) return false; condp = (struct tomoyo_condition_element *) (cond + 1); numbers_p = (const struct tomoyo_number_union *) (condp + condc); names_p = (const struct tomoyo_name_union *) (numbers_p + cond->numbers_count); argv = (const struct tomoyo_argv *) (names_p + cond->names_count); envp = (const struct tomoyo_envp *) (argv + argc); for (i = 0; i < condc; i++) { const bool match = condp->equals; const u8 left = condp->left; const u8 right = condp->right; bool is_bitop[2] = { false, false }; u8 j; condp++; /* Check argv[] and envp[] later. */ if (left == TOMOYO_ARGV_ENTRY || left == TOMOYO_ENVP_ENTRY) continue; /* Check string expressions. */ if (right == TOMOYO_NAME_UNION) { const struct tomoyo_name_union *ptr = names_p++; struct tomoyo_path_info *symlink; struct tomoyo_execve *ee; struct file *file; switch (left) { case TOMOYO_SYMLINK_TARGET: symlink = obj ? obj->symlink_target : NULL; if (!symlink || !tomoyo_compare_name_union(symlink, ptr) == match) goto out; break; case TOMOYO_EXEC_REALPATH: ee = r->ee; file = ee ? ee->bprm->file : NULL; if (!tomoyo_scan_exec_realpath(file, ptr, match)) goto out; break; } continue; } /* Check numeric or bit-op expressions. */ for (j = 0; j < 2; j++) { const u8 index = j ? right : left; unsigned long value = 0; switch (index) { case TOMOYO_TASK_UID: value = from_kuid(&init_user_ns, current_uid()); break; case TOMOYO_TASK_EUID: value = from_kuid(&init_user_ns, current_euid()); break; case TOMOYO_TASK_SUID: value = from_kuid(&init_user_ns, current_suid()); break; case TOMOYO_TASK_FSUID: value = from_kuid(&init_user_ns, current_fsuid()); break; case TOMOYO_TASK_GID: value = from_kgid(&init_user_ns, current_gid()); break; case TOMOYO_TASK_EGID: value = from_kgid(&init_user_ns, current_egid()); break; case TOMOYO_TASK_SGID: value = from_kgid(&init_user_ns, current_sgid()); break; case TOMOYO_TASK_FSGID: value = from_kgid(&init_user_ns, current_fsgid()); break; case TOMOYO_TASK_PID: value = tomoyo_sys_getpid(); break; case TOMOYO_TASK_PPID: value = tomoyo_sys_getppid(); break; case TOMOYO_TYPE_IS_SOCKET: value = S_IFSOCK; break; case TOMOYO_TYPE_IS_SYMLINK: value = S_IFLNK; break; case TOMOYO_TYPE_IS_FILE: value = S_IFREG; break; case TOMOYO_TYPE_IS_BLOCK_DEV: value = S_IFBLK; break; case TOMOYO_TYPE_IS_DIRECTORY: value = S_IFDIR; break; case TOMOYO_TYPE_IS_CHAR_DEV: value = S_IFCHR; break; case TOMOYO_TYPE_IS_FIFO: value = S_IFIFO; break; case TOMOYO_MODE_SETUID: value = S_ISUID; break; case TOMOYO_MODE_SETGID: value = S_ISGID; break; case TOMOYO_MODE_STICKY: value = S_ISVTX; break; case TOMOYO_MODE_OWNER_READ: value = 0400; break; case TOMOYO_MODE_OWNER_WRITE: value = 0200; break; case TOMOYO_MODE_OWNER_EXECUTE: value = 0100; break; case TOMOYO_MODE_GROUP_READ: value = 0040; break; case TOMOYO_MODE_GROUP_WRITE: value = 0020; break; case TOMOYO_MODE_GROUP_EXECUTE: value = 0010; break; case TOMOYO_MODE_OTHERS_READ: value = 0004; break; case TOMOYO_MODE_OTHERS_WRITE: value = 0002; break; case TOMOYO_MODE_OTHERS_EXECUTE: value = 0001; break; case TOMOYO_EXEC_ARGC: if (!bprm) goto out; value = bprm->argc; break; case TOMOYO_EXEC_ENVC: if (!bprm) goto out; value = bprm->envc; break; case TOMOYO_NUMBER_UNION: /* Fetch values later. */ break; default: if (!obj) goto out; if (!obj->validate_done) { tomoyo_get_attributes(obj); obj->validate_done = true; } { u8 stat_index; struct tomoyo_mini_stat *stat; switch (index) { case TOMOYO_PATH1_UID: case TOMOYO_PATH1_GID: case TOMOYO_PATH1_INO: case TOMOYO_PATH1_MAJOR: case TOMOYO_PATH1_MINOR: case TOMOYO_PATH1_TYPE: case TOMOYO_PATH1_DEV_MAJOR: case TOMOYO_PATH1_DEV_MINOR: case TOMOYO_PATH1_PERM: stat_index = TOMOYO_PATH1; break; case TOMOYO_PATH2_UID: case TOMOYO_PATH2_GID: case TOMOYO_PATH2_INO: case TOMOYO_PATH2_MAJOR: case TOMOYO_PATH2_MINOR: case TOMOYO_PATH2_TYPE: case TOMOYO_PATH2_DEV_MAJOR: case TOMOYO_PATH2_DEV_MINOR: case TOMOYO_PATH2_PERM: stat_index = TOMOYO_PATH2; break; case TOMOYO_PATH1_PARENT_UID: case TOMOYO_PATH1_PARENT_GID: case TOMOYO_PATH1_PARENT_INO: case TOMOYO_PATH1_PARENT_PERM: stat_index = TOMOYO_PATH1_PARENT; break; case TOMOYO_PATH2_PARENT_UID: case TOMOYO_PATH2_PARENT_GID: case TOMOYO_PATH2_PARENT_INO: case TOMOYO_PATH2_PARENT_PERM: stat_index = TOMOYO_PATH2_PARENT; break; default: goto out; } if (!obj->stat_valid[stat_index]) goto out; stat = &obj->stat[stat_index]; switch (index) { case TOMOYO_PATH1_UID: case TOMOYO_PATH2_UID: case TOMOYO_PATH1_PARENT_UID: case TOMOYO_PATH2_PARENT_UID: value = from_kuid(&init_user_ns, stat->uid); break; case TOMOYO_PATH1_GID: case TOMOYO_PATH2_GID: case TOMOYO_PATH1_PARENT_GID: case TOMOYO_PATH2_PARENT_GID: value = from_kgid(&init_user_ns, stat->gid); break; case TOMOYO_PATH1_INO: case TOMOYO_PATH2_INO: case TOMOYO_PATH1_PARENT_INO: case TOMOYO_PATH2_PARENT_INO: value = stat->ino; break; case TOMOYO_PATH1_MAJOR: case TOMOYO_PATH2_MAJOR: value = MAJOR(stat->dev); break; case TOMOYO_PATH1_MINOR: case TOMOYO_PATH2_MINOR: value = MINOR(stat->dev); break; case TOMOYO_PATH1_TYPE: case TOMOYO_PATH2_TYPE: value = stat->mode & S_IFMT; break; case TOMOYO_PATH1_DEV_MAJOR: case TOMOYO_PATH2_DEV_MAJOR: value = MAJOR(stat->rdev); break; case TOMOYO_PATH1_DEV_MINOR: case TOMOYO_PATH2_DEV_MINOR: value = MINOR(stat->rdev); break; case TOMOYO_PATH1_PERM: case TOMOYO_PATH2_PERM: case TOMOYO_PATH1_PARENT_PERM: case TOMOYO_PATH2_PARENT_PERM: value = stat->mode & S_IALLUGO; break; } } break; } max_v[j] = value; min_v[j] = value; switch (index) { case TOMOYO_MODE_SETUID: case TOMOYO_MODE_SETGID: case TOMOYO_MODE_STICKY: case TOMOYO_MODE_OWNER_READ: case TOMOYO_MODE_OWNER_WRITE: case TOMOYO_MODE_OWNER_EXECUTE: case TOMOYO_MODE_GROUP_READ: case TOMOYO_MODE_GROUP_WRITE: case TOMOYO_MODE_GROUP_EXECUTE: case TOMOYO_MODE_OTHERS_READ: case TOMOYO_MODE_OTHERS_WRITE: case TOMOYO_MODE_OTHERS_EXECUTE: is_bitop[j] = true; } } if (left == TOMOYO_NUMBER_UNION) { /* Fetch values now. */ const struct tomoyo_number_union *ptr = numbers_p++; min_v[0] = ptr->values[0]; max_v[0] = ptr->values[1]; } if (right == TOMOYO_NUMBER_UNION) { /* Fetch values now. */ const struct tomoyo_number_union *ptr = numbers_p++; if (ptr->group) { if (tomoyo_number_matches_group(min_v[0], max_v[0], ptr->group) == match) continue; } else { if ((min_v[0] <= ptr->values[1] && max_v[0] >= ptr->values[0]) == match) continue; } goto out; } /* * Bit operation is valid only when counterpart value * represents permission. */ if (is_bitop[0] && is_bitop[1]) { goto out; } else if (is_bitop[0]) { switch (right) { case TOMOYO_PATH1_PERM: case TOMOYO_PATH1_PARENT_PERM: case TOMOYO_PATH2_PERM: case TOMOYO_PATH2_PARENT_PERM: if (!(max_v[0] & max_v[1]) == !match) continue; } goto out; } else if (is_bitop[1]) { switch (left) { case TOMOYO_PATH1_PERM: case TOMOYO_PATH1_PARENT_PERM: case TOMOYO_PATH2_PERM: case TOMOYO_PATH2_PARENT_PERM: if (!(max_v[0] & max_v[1]) == !match) continue; } goto out; } /* Normal value range comparison. */ if ((min_v[0] <= max_v[1] && max_v[0] >= min_v[1]) == match) continue; out: return false; } /* Check argv[] and envp[] now. */ if (r->ee && (argc || envc)) return tomoyo_scan_bprm(r->ee, argc, argv, envc, envp); return true; }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/types.h> #include <linux/usb/uvc.h> #include <linux/videodev2.h> /* ------------------------------------------------------------------------ * Video formats */ static const struct uvc_format_desc uvc_fmts[] = { { .guid = UVC_GUID_FORMAT_YUY2, .fcc = V4L2_PIX_FMT_YUYV, }, { .guid = UVC_GUID_FORMAT_YUY2_ISIGHT, .fcc = V4L2_PIX_FMT_YUYV, }, { .guid = UVC_GUID_FORMAT_NV12, .fcc = V4L2_PIX_FMT_NV12, }, { .guid = UVC_GUID_FORMAT_MJPEG, .fcc = V4L2_PIX_FMT_MJPEG, }, { .guid = UVC_GUID_FORMAT_YV12, .fcc = V4L2_PIX_FMT_YVU420, }, { .guid = UVC_GUID_FORMAT_I420, .fcc = V4L2_PIX_FMT_YUV420, }, { .guid = UVC_GUID_FORMAT_M420, .fcc = V4L2_PIX_FMT_M420, }, { .guid = UVC_GUID_FORMAT_UYVY, .fcc = V4L2_PIX_FMT_UYVY, }, { .guid = UVC_GUID_FORMAT_Y800, .fcc = V4L2_PIX_FMT_GREY, }, { .guid = UVC_GUID_FORMAT_Y8, .fcc = V4L2_PIX_FMT_GREY, }, { .guid = UVC_GUID_FORMAT_D3DFMT_L8, .fcc = V4L2_PIX_FMT_GREY, }, { .guid = UVC_GUID_FORMAT_KSMEDIA_L8_IR, .fcc = V4L2_PIX_FMT_GREY, }, { .guid = UVC_GUID_FORMAT_Y10, .fcc = V4L2_PIX_FMT_Y10, }, { .guid = UVC_GUID_FORMAT_Y12, .fcc = V4L2_PIX_FMT_Y12, }, { .guid = UVC_GUID_FORMAT_Y16, .fcc = V4L2_PIX_FMT_Y16, }, { .guid = UVC_GUID_FORMAT_BY8, .fcc = V4L2_PIX_FMT_SBGGR8, }, { .guid = UVC_GUID_FORMAT_BA81, .fcc = V4L2_PIX_FMT_SBGGR8, }, { .guid = UVC_GUID_FORMAT_GBRG, .fcc = V4L2_PIX_FMT_SGBRG8, }, { .guid = UVC_GUID_FORMAT_GRBG, .fcc = V4L2_PIX_FMT_SGRBG8, }, { .guid = UVC_GUID_FORMAT_RGGB, .fcc = V4L2_PIX_FMT_SRGGB8, }, { .guid = UVC_GUID_FORMAT_RGBP, .fcc = V4L2_PIX_FMT_RGB565, }, { .guid = UVC_GUID_FORMAT_D3DFMT_R5G6B5, .fcc = V4L2_PIX_FMT_RGB565, }, { .guid = UVC_GUID_FORMAT_BGR3, .fcc = V4L2_PIX_FMT_BGR24, }, { .guid = UVC_GUID_FORMAT_BGR4, .fcc = V4L2_PIX_FMT_XBGR32, }, { .guid = UVC_GUID_FORMAT_H264, .fcc = V4L2_PIX_FMT_H264, }, { .guid = UVC_GUID_FORMAT_H265, .fcc = V4L2_PIX_FMT_HEVC, }, { .guid = UVC_GUID_FORMAT_Y8I, .fcc = V4L2_PIX_FMT_Y8I, }, { .guid = UVC_GUID_FORMAT_Y12I, .fcc = V4L2_PIX_FMT_Y12I, }, { .guid = UVC_GUID_FORMAT_Y16I, .fcc = V4L2_PIX_FMT_Y16I, }, { .guid = UVC_GUID_FORMAT_Z16, .fcc = V4L2_PIX_FMT_Z16, }, { .guid = UVC_GUID_FORMAT_RW10, .fcc = V4L2_PIX_FMT_SRGGB10P, }, { .guid = UVC_GUID_FORMAT_BG16, .fcc = V4L2_PIX_FMT_SBGGR16, }, { .guid = UVC_GUID_FORMAT_GB16, .fcc = V4L2_PIX_FMT_SGBRG16, }, { .guid = UVC_GUID_FORMAT_RG16, .fcc = V4L2_PIX_FMT_SRGGB16, }, { .guid = UVC_GUID_FORMAT_GR16, .fcc = V4L2_PIX_FMT_SGRBG16, }, { .guid = UVC_GUID_FORMAT_INVZ, .fcc = V4L2_PIX_FMT_Z16, }, { .guid = UVC_GUID_FORMAT_INVI, .fcc = V4L2_PIX_FMT_Y10, }, { .guid = UVC_GUID_FORMAT_INZI, .fcc = V4L2_PIX_FMT_INZI, }, { .guid = UVC_GUID_FORMAT_CNF4, .fcc = V4L2_PIX_FMT_CNF4, }, { .guid = UVC_GUID_FORMAT_HEVC, .fcc = V4L2_PIX_FMT_HEVC, }, }; const struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]) { unsigned int len = ARRAY_SIZE(uvc_fmts); unsigned int i; for (i = 0; i < len; ++i) { if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) return &uvc_fmts[i]; } return NULL; } EXPORT_SYMBOL_GPL(uvc_format_by_guid); MODULE_DESCRIPTION("USB Video Class common code"); MODULE_LICENSE("GPL");
120 120 55 55 60 56 30 4 5 1 8 1 13 8 20 45 43 15 70 69 30 2 2 6 6 3 3 24 2 1 4 2 1 1 2 5 5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Timer control routines * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_timer.h" #include "seq_oss_event.h" #include <sound/seq_oss_legacy.h> #include <linux/slab.h> /* */ #define MIN_OSS_TEMPO 8 #define MAX_OSS_TEMPO 360 #define MIN_OSS_TIMEBASE 1 #define MAX_OSS_TIMEBASE 1000 /* */ static void calc_alsa_tempo(struct seq_oss_timer *timer); static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value); /* * create and register a new timer. * if queue is not started yet, start it. */ struct seq_oss_timer * snd_seq_oss_timer_new(struct seq_oss_devinfo *dp) { struct seq_oss_timer *rec; rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (rec == NULL) return NULL; rec->dp = dp; rec->cur_tick = 0; rec->realtime = 0; rec->running = 0; rec->oss_tempo = 60; rec->oss_timebase = 100; calc_alsa_tempo(rec); return rec; } /* * delete timer. * if no more timer exists, stop the queue. */ void snd_seq_oss_timer_delete(struct seq_oss_timer *rec) { if (rec) { snd_seq_oss_timer_stop(rec); kfree(rec); } } /* * process one timing event * return 1 : event proceseed -- skip this event * 0 : not a timer event -- enqueue this event */ int snd_seq_oss_process_timer_event(struct seq_oss_timer *rec, union evrec *ev) { abstime_t parm = ev->t.time; if (ev->t.code == EV_TIMING) { switch (ev->t.cmd) { case TMR_WAIT_REL: parm += rec->cur_tick; rec->realtime = 0; fallthrough; case TMR_WAIT_ABS: if (parm == 0) { rec->realtime = 1; } else if (parm >= rec->cur_tick) { rec->realtime = 0; rec->cur_tick = parm; } return 1; /* skip this event */ case TMR_START: snd_seq_oss_timer_start(rec); return 1; } } else if (ev->s.code == SEQ_WAIT) { /* time = from 1 to 3 bytes */ parm = (ev->echo >> 8) & 0xffffff; if (parm > rec->cur_tick) { /* set next event time */ rec->cur_tick = parm; rec->realtime = 0; } return 1; } return 0; } /* * convert tempo units */ static void calc_alsa_tempo(struct seq_oss_timer *timer) { timer->tempo = (60 * 1000000) / timer->oss_tempo; timer->ppq = timer->oss_timebase; } /* * dispatch a timer event */ static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value) { struct snd_seq_event ev; memset(&ev, 0, sizeof(ev)); ev.type = type; ev.source.client = dp->cseq; ev.source.port = 0; ev.dest.client = SNDRV_SEQ_CLIENT_SYSTEM; ev.dest.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; ev.queue = dp->queue; ev.data.queue.queue = dp->queue; ev.data.queue.param.value = value; return snd_seq_kernel_client_dispatch(dp->cseq, &ev, 1, 0); } /* * set queue tempo and start queue */ int snd_seq_oss_timer_start(struct seq_oss_timer *timer) { struct seq_oss_devinfo *dp = timer->dp; struct snd_seq_queue_tempo tmprec; if (timer->running) snd_seq_oss_timer_stop(timer); memset(&tmprec, 0, sizeof(tmprec)); tmprec.queue = dp->queue; tmprec.ppq = timer->ppq; tmprec.tempo = timer->tempo; snd_seq_set_queue_tempo(dp->cseq, &tmprec); send_timer_event(dp, SNDRV_SEQ_EVENT_START, 0); timer->running = 1; timer->cur_tick = 0; return 0; } /* * stop queue */ int snd_seq_oss_timer_stop(struct seq_oss_timer *timer) { if (! timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_STOP, 0); timer->running = 0; return 0; } /* * continue queue */ int snd_seq_oss_timer_continue(struct seq_oss_timer *timer) { if (timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_CONTINUE, 0); timer->running = 1; return 0; } /* * change queue tempo */ int snd_seq_oss_timer_tempo(struct seq_oss_timer *timer, int value) { if (value < MIN_OSS_TEMPO) value = MIN_OSS_TEMPO; else if (value > MAX_OSS_TEMPO) value = MAX_OSS_TEMPO; timer->oss_tempo = value; calc_alsa_tempo(timer); if (timer->running) send_timer_event(timer->dp, SNDRV_SEQ_EVENT_TEMPO, timer->tempo); return 0; } /* * ioctls */ int snd_seq_oss_timer_ioctl(struct seq_oss_timer *timer, unsigned int cmd, int __user *arg) { int value; if (cmd == SNDCTL_SEQ_CTRLRATE) { /* if *arg == 0, just return the current rate */ if (get_user(value, arg)) return -EFAULT; if (value) return -EINVAL; value = ((timer->oss_tempo * timer->oss_timebase) + 30) / 60; return put_user(value, arg) ? -EFAULT : 0; } if (timer->dp->seq_mode == SNDRV_SEQ_OSS_MODE_SYNTH) return 0; switch (cmd) { case SNDCTL_TMR_START: return snd_seq_oss_timer_start(timer); case SNDCTL_TMR_STOP: return snd_seq_oss_timer_stop(timer); case SNDCTL_TMR_CONTINUE: return snd_seq_oss_timer_continue(timer); case SNDCTL_TMR_TEMPO: if (get_user(value, arg)) return -EFAULT; return snd_seq_oss_timer_tempo(timer, value); case SNDCTL_TMR_TIMEBASE: if (get_user(value, arg)) return -EFAULT; if (value < MIN_OSS_TIMEBASE) value = MIN_OSS_TIMEBASE; else if (value > MAX_OSS_TIMEBASE) value = MAX_OSS_TIMEBASE; timer->oss_timebase = value; calc_alsa_tempo(timer); return 0; case SNDCTL_TMR_METRONOME: case SNDCTL_TMR_SELECT: case SNDCTL_TMR_SOURCE: /* not supported */ return 0; } return 0; }
1 6 22 13 7 2 4 10 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) - RFC3173. * * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> * * Todo: * - Tunable compression parameters. * - Compression stats. * - Adaptive compression. */ #include <linux/module.h> #include <linux/err.h> #include <linux/rtnetlink.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/icmp.h> #include <net/ipcomp.h> #include <net/protocol.h> #include <net/sock.h> static int ipcomp4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct iphdr *iph = (const struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return 0; break; case ICMP_REDIRECT: break; default: return 0; } spi = htonl(ntohs(ipch->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, IPPROTO_COMP); else ipv4_redirect(skb, net, 0, IPPROTO_COMP); xfrm_state_put(x); return 0; } /* We always hold one tunnel user reference to indicate a tunnel */ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPIP; t->id.spi = x->props.saddr.a4; t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } /* * Must be protected by xfrm_cfg_mutex. State and tunnel user references are * always incremented on success. */ static int ipcomp_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t; u32 mark = x->mark.v & x->mark.m; t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp4_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct iphdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp4_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp4_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output }; static struct xfrm4_protocol ipcomp4_protocol = { .handler = xfrm4_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp4_rcv_cb, .err_handler = ipcomp4_err, .priority = 0, }; static int __init ipcomp4_init(void) { if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm4_protocol_register(&ipcomp4_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp_type, AF_INET); return -EAGAIN; } return 0; } static void __exit ipcomp4_fini(void) { if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp_type, AF_INET); } module_init(ipcomp4_init); module_exit(ipcomp4_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
9 9 9 9 10 10 10 10 9 9 9 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 // SPDX-License-Identifier: GPL-2.0 /* * attribute_container.c - implementation of a simple container for classes * * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com> * * The basic idea here is to enable a device to be attached to an * aritrary numer of classes without having to allocate storage for them. * Instead, the contained classes select the devices they need to attach * to via a matching function. */ #include <linux/attribute_container.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/module.h> #include <linux/mutex.h> #include "base.h" /* This is a private structure used to tie the classdev and the * container .. it should never be visible outside this file */ struct internal_container { struct klist_node node; struct attribute_container *cont; struct device classdev; }; static void internal_container_klist_get(struct klist_node *n) { struct internal_container *ic = container_of(n, struct internal_container, node); get_device(&ic->classdev); } static void internal_container_klist_put(struct klist_node *n) { struct internal_container *ic = container_of(n, struct internal_container, node); put_device(&ic->classdev); } /** * attribute_container_classdev_to_container - given a classdev, return the container * * @classdev: the class device created by attribute_container_add_device. * * Returns the container associated with this classdev. */ struct attribute_container * attribute_container_classdev_to_container(struct device *classdev) { struct internal_container *ic = container_of(classdev, struct internal_container, classdev); return ic->cont; } EXPORT_SYMBOL_GPL(attribute_container_classdev_to_container); static LIST_HEAD(attribute_container_list); static DEFINE_MUTEX(attribute_container_mutex); /** * attribute_container_register - register an attribute container * * @cont: The container to register. This must be allocated by the * callee and should also be zeroed by it. */ int attribute_container_register(struct attribute_container *cont) { INIT_LIST_HEAD(&cont->node); klist_init(&cont->containers, internal_container_klist_get, internal_container_klist_put); mutex_lock(&attribute_container_mutex); list_add_tail(&cont->node, &attribute_container_list); mutex_unlock(&attribute_container_mutex); return 0; } EXPORT_SYMBOL_GPL(attribute_container_register); /** * attribute_container_unregister - remove a container registration * * @cont: previously registered container to remove */ int attribute_container_unregister(struct attribute_container *cont) { int retval = -EBUSY; mutex_lock(&attribute_container_mutex); spin_lock(&cont->containers.k_lock); if (!list_empty(&cont->containers.k_list)) goto out; retval = 0; list_del(&cont->node); out: spin_unlock(&cont->containers.k_lock); mutex_unlock(&attribute_container_mutex); return retval; } EXPORT_SYMBOL_GPL(attribute_container_unregister); /* private function used as class release */ static void attribute_container_release(struct device *classdev) { struct internal_container *ic = container_of(classdev, struct internal_container, classdev); struct device *dev = classdev->parent; kfree(ic); put_device(dev); } /** * attribute_container_add_device - see if any container is interested in dev * * @dev: device to add attributes to * @fn: function to trigger addition of class device. * * This function allocates storage for the class device(s) to be * attached to dev (one for each matching attribute_container). If no * fn is provided, the code will simply register the class device via * device_add. If a function is provided, it is expected to add * the class device at the appropriate time. One of the things that * might be necessary is to allocate and initialise the classdev and * then add it a later time. To do this, call this routine for * allocation and initialisation and then use * attribute_container_device_trigger() to call device_add() on * it. Note: after this, the class device contains a reference to dev * which is not relinquished until the release of the classdev. */ void attribute_container_add_device(struct device *dev, int (*fn)(struct attribute_container *, struct device *, struct device *)) { struct attribute_container *cont; mutex_lock(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { struct internal_container *ic; if (attribute_container_no_classdevs(cont)) continue; if (!cont->match(cont, dev)) continue; ic = kzalloc(sizeof(*ic), GFP_KERNEL); if (!ic) { dev_err(dev, "failed to allocate class container\n"); continue; } ic->cont = cont; device_initialize(&ic->classdev); ic->classdev.parent = get_device(dev); ic->classdev.class = cont->class; cont->class->dev_release = attribute_container_release; dev_set_name(&ic->classdev, "%s", dev_name(dev)); if (fn) fn(cont, dev, &ic->classdev); else attribute_container_add_class_device(&ic->classdev); klist_add_tail(&ic->node, &cont->containers); } mutex_unlock(&attribute_container_mutex); } /* FIXME: can't break out of this unless klist_iter_exit is also * called before doing the break */ #define klist_for_each_entry(pos, head, member, iter) \ for (klist_iter_init(head, iter); (pos = ({ \ struct klist_node *n = klist_next(iter); \ n ? container_of(n, typeof(*pos), member) : \ ({ klist_iter_exit(iter) ; NULL; }); \ })) != NULL;) /** * attribute_container_remove_device - make device eligible for removal. * * @dev: The generic device * @fn: A function to call to remove the device * * This routine triggers device removal. If fn is NULL, then it is * simply done via device_unregister (note that if something * still has a reference to the classdev, then the memory occupied * will not be freed until the classdev is released). If you want a * two phase release: remove from visibility and then delete the * device, then you should use this routine with a fn that calls * device_del() and then use attribute_container_device_trigger() * to do the final put on the classdev. */ void attribute_container_remove_device(struct device *dev, void (*fn)(struct attribute_container *, struct device *, struct device *)) { struct attribute_container *cont; mutex_lock(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { struct internal_container *ic; struct klist_iter iter; if (attribute_container_no_classdevs(cont)) continue; if (!cont->match(cont, dev)) continue; klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev != ic->classdev.parent) continue; klist_del(&ic->node); if (fn) fn(cont, dev, &ic->classdev); else { attribute_container_remove_attrs(&ic->classdev); device_unregister(&ic->classdev); } } } mutex_unlock(&attribute_container_mutex); } static int do_attribute_container_device_trigger_safe(struct device *dev, struct attribute_container *cont, int (*fn)(struct attribute_container *, struct device *, struct device *), int (*undo)(struct attribute_container *, struct device *, struct device *)) { int ret; struct internal_container *ic, *failed; struct klist_iter iter; if (attribute_container_no_classdevs(cont)) return fn(cont, dev, NULL); klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev == ic->classdev.parent) { ret = fn(cont, dev, &ic->classdev); if (ret) { failed = ic; klist_iter_exit(&iter); goto fail; } } } return 0; fail: if (!undo) return ret; /* Attempt to undo the work partially done. */ klist_for_each_entry(ic, &cont->containers, node, &iter) { if (ic == failed) { klist_iter_exit(&iter); break; } if (dev == ic->classdev.parent) undo(cont, dev, &ic->classdev); } return ret; } /** * attribute_container_device_trigger_safe - execute a trigger for each * matching classdev or fail all of them. * * @dev: The generic device to run the trigger for * @fn: the function to execute for each classdev. * @undo: A function to undo the work previously done in case of error * * This function is a safe version of * attribute_container_device_trigger. It stops on the first error and * undo the partial work that has been done, on previous classdev. It * is guaranteed that either they all succeeded, or none of them * succeeded. */ int attribute_container_device_trigger_safe(struct device *dev, int (*fn)(struct attribute_container *, struct device *, struct device *), int (*undo)(struct attribute_container *, struct device *, struct device *)) { struct attribute_container *cont, *failed = NULL; int ret = 0; mutex_lock(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { if (!cont->match(cont, dev)) continue; ret = do_attribute_container_device_trigger_safe(dev, cont, fn, undo); if (ret) { failed = cont; break; } } if (ret && !WARN_ON(!undo)) { list_for_each_entry(cont, &attribute_container_list, node) { if (failed == cont) break; if (!cont->match(cont, dev)) continue; do_attribute_container_device_trigger_safe(dev, cont, undo, NULL); } } mutex_unlock(&attribute_container_mutex); return ret; } /** * attribute_container_device_trigger - execute a trigger for each matching classdev * * @dev: The generic device to run the trigger for * @fn: the function to execute for each classdev. * * This function is for executing a trigger when you need to know both * the container and the classdev. */ void attribute_container_device_trigger(struct device *dev, int (*fn)(struct attribute_container *, struct device *, struct device *)) { struct attribute_container *cont; mutex_lock(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { struct internal_container *ic; struct klist_iter iter; if (!cont->match(cont, dev)) continue; if (attribute_container_no_classdevs(cont)) { fn(cont, dev, NULL); continue; } klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev == ic->classdev.parent) fn(cont, dev, &ic->classdev); } } mutex_unlock(&attribute_container_mutex); } /** * attribute_container_add_attrs - add attributes * * @classdev: The class device * * This simply creates all the class device sysfs files from the * attributes listed in the container */ int attribute_container_add_attrs(struct device *classdev) { struct attribute_container *cont = attribute_container_classdev_to_container(classdev); struct device_attribute **attrs = cont->attrs; int i, error; BUG_ON(attrs && cont->grp); if (!attrs && !cont->grp) return 0; if (cont->grp) return sysfs_create_group(&classdev->kobj, cont->grp); for (i = 0; attrs[i]; i++) { sysfs_attr_init(&attrs[i]->attr); error = device_create_file(classdev, attrs[i]); if (error) return error; } return 0; } /** * attribute_container_add_class_device - same function as device_add * * @classdev: the class device to add * * This performs essentially the same function as device_add except for * attribute containers, namely add the classdev to the system and then * create the attribute files */ int attribute_container_add_class_device(struct device *classdev) { int error = device_add(classdev); if (error) return error; return attribute_container_add_attrs(classdev); } /** * attribute_container_remove_attrs - remove any attribute files * * @classdev: The class device to remove the files from * */ void attribute_container_remove_attrs(struct device *classdev) { struct attribute_container *cont = attribute_container_classdev_to_container(classdev); struct device_attribute **attrs = cont->attrs; int i; if (!attrs && !cont->grp) return; if (cont->grp) { sysfs_remove_group(&classdev->kobj, cont->grp); return ; } for (i = 0; attrs[i]; i++) device_remove_file(classdev, attrs[i]); } /** * attribute_container_class_device_del - equivalent of class_device_del * * @classdev: the class device * * This function simply removes all the attribute files and then calls * device_del. */ void attribute_container_class_device_del(struct device *classdev) { attribute_container_remove_attrs(classdev); device_del(classdev); } /** * attribute_container_find_class_device - find the corresponding class_device * * @cont: the container * @dev: the generic device * * Looks up the device in the container's list of class devices and returns * the corresponding class_device. */ struct device * attribute_container_find_class_device(struct attribute_container *cont, struct device *dev) { struct device *cdev = NULL; struct internal_container *ic; struct klist_iter iter; klist_for_each_entry(ic, &cont->containers, node, &iter) { if (ic->classdev.parent == dev) { cdev = &ic->classdev; /* FIXME: must exit iterator then break */ klist_iter_exit(&iter); break; } } return cdev; } EXPORT_SYMBOL_GPL(attribute_container_find_class_device);
1757 1758 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Author: Roberto Sassu <roberto.sassu@polito.it> * * File: ima_template.c * Helpers to manage template descriptors. */ #include <linux/rculist.h> #include "ima.h" #include "ima_template_lib.h" enum header_fields { HDR_PCR, HDR_DIGEST, HDR_TEMPLATE_NAME, HDR_TEMPLATE_DATA, HDR__LAST }; static struct ima_template_desc builtin_templates[] = { {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, {.name = "ima-ng", .fmt = "d-ng|n-ng"}, {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"}, {.name = "ima-ngv2", .fmt = "d-ngv2|n-ng"}, {.name = "ima-sigv2", .fmt = "d-ngv2|n-ng|sig"}, {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"}, {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"}, {.name = "evm-sig", .fmt = "d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode"}, {.name = "", .fmt = ""}, /* placeholder for a custom format */ }; static LIST_HEAD(defined_templates); static DEFINE_SPINLOCK(template_list); static int template_setup_done; static const struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, .field_show = ima_show_template_digest}, {.field_id = "n", .field_init = ima_eventname_init, .field_show = ima_show_template_string}, {.field_id = "d-ng", .field_init = ima_eventdigest_ng_init, .field_show = ima_show_template_digest_ng}, {.field_id = "d-ngv2", .field_init = ima_eventdigest_ngv2_init, .field_show = ima_show_template_digest_ngv2}, {.field_id = "n-ng", .field_init = ima_eventname_ng_init, .field_show = ima_show_template_string}, {.field_id = "sig", .field_init = ima_eventsig_init, .field_show = ima_show_template_sig}, {.field_id = "buf", .field_init = ima_eventbuf_init, .field_show = ima_show_template_buf}, {.field_id = "d-modsig", .field_init = ima_eventdigest_modsig_init, .field_show = ima_show_template_digest_ng}, {.field_id = "modsig", .field_init = ima_eventmodsig_init, .field_show = ima_show_template_sig}, {.field_id = "evmsig", .field_init = ima_eventevmsig_init, .field_show = ima_show_template_sig}, {.field_id = "iuid", .field_init = ima_eventinodeuid_init, .field_show = ima_show_template_uint}, {.field_id = "igid", .field_init = ima_eventinodegid_init, .field_show = ima_show_template_uint}, {.field_id = "imode", .field_init = ima_eventinodemode_init, .field_show = ima_show_template_uint}, {.field_id = "xattrnames", .field_init = ima_eventinodexattrnames_init, .field_show = ima_show_template_string}, {.field_id = "xattrlengths", .field_init = ima_eventinodexattrlengths_init, .field_show = ima_show_template_sig}, {.field_id = "xattrvalues", .field_init = ima_eventinodexattrvalues_init, .field_show = ima_show_template_sig}, }; /* * Used when restoring measurements carried over from a kexec. 'd' and 'n' don't * need to be accounted for since they shouldn't be defined in the same template * description as 'd-ng' and 'n-ng' respectively. */ #define MAX_TEMPLATE_NAME_LEN \ sizeof("d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode") static struct ima_template_desc *ima_template; static struct ima_template_desc *ima_buf_template; /** * ima_template_has_modsig - Check whether template has modsig-related fields. * @ima_template: IMA template to check. * * Tells whether the given template has fields referencing a file's appended * signature. */ bool ima_template_has_modsig(const struct ima_template_desc *ima_template) { int i; for (i = 0; i < ima_template->num_fields; i++) if (!strcmp(ima_template->fields[i]->field_id, "modsig") || !strcmp(ima_template->fields[i]->field_id, "d-modsig")) return true; return false; } static int __init ima_template_setup(char *str) { struct ima_template_desc *template_desc; int template_len = strlen(str); if (template_setup_done) return 1; if (!ima_template) ima_init_template_list(); /* * Verify that a template with the supplied name exists. * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. */ template_desc = lookup_template_desc(str); if (!template_desc) { pr_err("template %s not found, using %s\n", str, CONFIG_IMA_DEFAULT_TEMPLATE); return 1; } /* * Verify whether the current hash algorithm is supported * by the 'ima' template. */ if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 && ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) { pr_err("template does not support hash alg\n"); return 1; } ima_template = template_desc; template_setup_done = 1; return 1; } __setup("ima_template=", ima_template_setup); static int __init ima_template_fmt_setup(char *str) { int num_templates = ARRAY_SIZE(builtin_templates); if (template_setup_done) return 1; if (template_desc_init_fields(str, NULL, NULL) < 0) { pr_err("format string '%s' not valid, using template %s\n", str, CONFIG_IMA_DEFAULT_TEMPLATE); return 1; } builtin_templates[num_templates - 1].fmt = str; ima_template = builtin_templates + num_templates - 1; template_setup_done = 1; return 1; } __setup("ima_template_fmt=", ima_template_fmt_setup); struct ima_template_desc *lookup_template_desc(const char *name) { struct ima_template_desc *template_desc; int found = 0; rcu_read_lock(); list_for_each_entry_rcu(template_desc, &defined_templates, list) { if ((strcmp(template_desc->name, name) == 0) || (strcmp(template_desc->fmt, name) == 0)) { found = 1; break; } } rcu_read_unlock(); return found ? template_desc : NULL; } static const struct ima_template_field * lookup_template_field(const char *field_id) { int i; for (i = 0; i < ARRAY_SIZE(supported_fields); i++) if (strncmp(supported_fields[i].field_id, field_id, IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0) return &supported_fields[i]; return NULL; } static int template_fmt_size(const char *template_fmt) { char c; int template_fmt_len = strlen(template_fmt); int i = 0, j = 0; while (i < template_fmt_len) { c = template_fmt[i]; if (c == '|') j++; i++; } return j + 1; } int template_desc_init_fields(const char *template_fmt, const struct ima_template_field ***fields, int *num_fields) { const char *template_fmt_ptr; const struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX]; int template_num_fields; int i, len; if (num_fields && *num_fields > 0) /* already initialized? */ return 0; template_num_fields = template_fmt_size(template_fmt); if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) { pr_err("format string '%s' contains too many fields\n", template_fmt); return -EINVAL; } for (i = 0, template_fmt_ptr = template_fmt; i < template_num_fields; i++, template_fmt_ptr += len + 1) { char tmp_field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN + 1]; len = strchrnul(template_fmt_ptr, '|') - template_fmt_ptr; if (len == 0 || len > IMA_TEMPLATE_FIELD_ID_MAX_LEN) { pr_err("Invalid field with length %d\n", len); return -EINVAL; } memcpy(tmp_field_id, template_fmt_ptr, len); tmp_field_id[len] = '\0'; found_fields[i] = lookup_template_field(tmp_field_id); if (!found_fields[i]) { pr_err("field '%s' not found\n", tmp_field_id); return -ENOENT; } } if (fields && num_fields) { *fields = kmalloc_array(i, sizeof(**fields), GFP_KERNEL); if (*fields == NULL) return -ENOMEM; memcpy(*fields, found_fields, i * sizeof(**fields)); *num_fields = i; } return 0; } void ima_init_template_list(void) { int i; if (!list_empty(&defined_templates)) return; spin_lock(&template_list); for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) { list_add_tail_rcu(&builtin_templates[i].list, &defined_templates); } spin_unlock(&template_list); } struct ima_template_desc *ima_template_desc_current(void) { if (!ima_template) { ima_init_template_list(); ima_template = lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); } return ima_template; } struct ima_template_desc *ima_template_desc_buf(void) { if (!ima_buf_template) { ima_init_template_list(); ima_buf_template = lookup_template_desc("ima-buf"); } return ima_buf_template; } int __init ima_init_template(void) { struct ima_template_desc *template = ima_template_desc_current(); int result; result = template_desc_init_fields(template->fmt, &(template->fields), &(template->num_fields)); if (result < 0) { pr_err("template %s init failed, result: %d\n", (strlen(template->name) ? template->name : template->fmt), result); return result; } template = ima_template_desc_buf(); if (!template) { pr_err("Failed to get ima-buf template\n"); return -EINVAL; } result = template_desc_init_fields(template->fmt, &(template->fields), &(template->num_fields)); if (result < 0) pr_err("template %s init failed, result: %d\n", (strlen(template->name) ? template->name : template->fmt), result); return result; } static struct ima_template_desc *restore_template_fmt(char *template_name) { struct ima_template_desc *template_desc = NULL; int ret; ret = template_desc_init_fields(template_name, NULL, NULL); if (ret < 0) { pr_err("attempting to initialize the template \"%s\" failed\n", template_name); goto out; } template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL); if (!template_desc) goto out; template_desc->name = ""; template_desc->fmt = kstrdup(template_name, GFP_KERNEL); if (!template_desc->fmt) { kfree(template_desc); template_desc = NULL; goto out; } spin_lock(&template_list); list_add_tail_rcu(&template_desc->list, &defined_templates); spin_unlock(&template_list); out: return template_desc; } static int ima_restore_template_data(struct ima_template_desc *template_desc, void *template_data, int template_data_size, struct ima_template_entry **entry) { struct tpm_digest *digests; int ret = 0; int i; *entry = kzalloc(struct_size(*entry, template_data, template_desc->num_fields), GFP_NOFS); if (!*entry) return -ENOMEM; digests = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots, sizeof(*digests), GFP_NOFS); if (!digests) { kfree(*entry); return -ENOMEM; } (*entry)->digests = digests; ret = ima_parse_buf(template_data, template_data + template_data_size, NULL, template_desc->num_fields, (*entry)->template_data, NULL, NULL, ENFORCE_FIELDS | ENFORCE_BUFEND, "template data"); if (ret < 0) { kfree((*entry)->digests); kfree(*entry); return ret; } (*entry)->template_desc = template_desc; for (i = 0; i < template_desc->num_fields; i++) { struct ima_field_data *field_data = &(*entry)->template_data[i]; u8 *data = field_data->data; (*entry)->template_data[i].data = kzalloc(field_data->len + 1, GFP_KERNEL); if (!(*entry)->template_data[i].data) { ret = -ENOMEM; break; } memcpy((*entry)->template_data[i].data, data, field_data->len); (*entry)->template_data_len += sizeof(field_data->len); (*entry)->template_data_len += field_data->len; } if (ret < 0) { ima_free_template_entry(*entry); *entry = NULL; } return ret; } /* Restore the serialized binary measurement list without extending PCRs. */ int ima_restore_measurement_list(loff_t size, void *buf) { char template_name[MAX_TEMPLATE_NAME_LEN]; unsigned char zero[TPM_DIGEST_SIZE] = { 0 }; struct ima_kexec_hdr *khdr = buf; struct ima_field_data hdr[HDR__LAST] = { [HDR_PCR] = {.len = sizeof(u32)}, [HDR_DIGEST] = {.len = TPM_DIGEST_SIZE}, }; void *bufp = buf + sizeof(*khdr); void *bufendp; struct ima_template_entry *entry; struct ima_template_desc *template_desc; DECLARE_BITMAP(hdr_mask, HDR__LAST); unsigned long count = 0; int ret = 0; if (!buf || size < sizeof(*khdr)) return 0; if (ima_canonical_fmt) { khdr->version = le16_to_cpu((__force __le16)khdr->version); khdr->count = le64_to_cpu((__force __le64)khdr->count); khdr->buffer_size = le64_to_cpu((__force __le64)khdr->buffer_size); } if (khdr->version != 1) { pr_err("attempting to restore a incompatible measurement list"); return -EINVAL; } if (khdr->count > ULONG_MAX - 1) { pr_err("attempting to restore too many measurements"); return -EINVAL; } bitmap_zero(hdr_mask, HDR__LAST); bitmap_set(hdr_mask, HDR_PCR, 1); bitmap_set(hdr_mask, HDR_DIGEST, 1); /* * ima kexec buffer prefix: version, buffer size, count * v1 format: pcr, digest, template-name-len, template-name, * template-data-size, template-data */ bufendp = buf + khdr->buffer_size; while ((bufp < bufendp) && (count++ < khdr->count)) { int enforce_mask = ENFORCE_FIELDS; enforce_mask |= (count == khdr->count) ? ENFORCE_BUFEND : 0; ret = ima_parse_buf(bufp, bufendp, &bufp, HDR__LAST, hdr, NULL, hdr_mask, enforce_mask, "entry header"); if (ret < 0) break; if (hdr[HDR_TEMPLATE_NAME].len >= MAX_TEMPLATE_NAME_LEN) { pr_err("attempting to restore a template name that is too long\n"); ret = -EINVAL; break; } /* template name is not null terminated */ memcpy(template_name, hdr[HDR_TEMPLATE_NAME].data, hdr[HDR_TEMPLATE_NAME].len); template_name[hdr[HDR_TEMPLATE_NAME].len] = 0; if (strcmp(template_name, "ima") == 0) { pr_err("attempting to restore an unsupported template \"%s\" failed\n", template_name); ret = -EINVAL; break; } template_desc = lookup_template_desc(template_name); if (!template_desc) { template_desc = restore_template_fmt(template_name); if (!template_desc) break; } /* * Only the running system's template format is initialized * on boot. As needed, initialize the other template formats. */ ret = template_desc_init_fields(template_desc->fmt, &(template_desc->fields), &(template_desc->num_fields)); if (ret < 0) { pr_err("attempting to restore the template fmt \"%s\" failed\n", template_desc->fmt); ret = -EINVAL; break; } ret = ima_restore_template_data(template_desc, hdr[HDR_TEMPLATE_DATA].data, hdr[HDR_TEMPLATE_DATA].len, &entry); if (ret < 0) break; if (memcmp(hdr[HDR_DIGEST].data, zero, sizeof(zero))) { ret = ima_calc_field_array_hash( &entry->template_data[0], entry); if (ret < 0) { pr_err("cannot calculate template digest\n"); ret = -EINVAL; break; } } entry->pcr = !ima_canonical_fmt ? *(u32 *)(hdr[HDR_PCR].data) : le32_to_cpu(*(__le32 *)(hdr[HDR_PCR].data)); ret = ima_restore_measurement_entry(entry); if (ret < 0) break; } return ret; }
38 2630 71 2631 913 1964 2625 2512 299 2633 2 2 2 2 2 2 2 23 8 15 23 58 1716 4241 3849 1747 16 11 1761 2 2 251 249 2 6 34 35 71 71 71 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 // SPDX-License-Identifier: GPL-2.0-only /* * net/core/dst.c Protocol independent destination cache. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * */ #include <linux/bitops.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/workqueue.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> #include <net/net_namespace.h> #include <linux/sched.h> #include <linux/prefetch.h> #include <net/lwtunnel.h> #include <net/xfrm.h> #include <net/dst.h> #include <net/dst_metadata.h> int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } EXPORT_SYMBOL(dst_discard_out); const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ .refcnt = REFCOUNT_INIT(1), }; EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif dst->input = dst_discard; dst->output = dst_discard_out; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; dst->trailer_len = 0; #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = 0; #endif dst->lwtstate = NULL; rcuref_init(&dst->__rcuref, 1); INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); } EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags) { struct dst_entry *dst; if (ops->gc && !(flags & DST_NOCOUNT) && dst_entries_get_fast(ops) > ops->gc_thresh) ops->gc(ops); dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } EXPORT_SYMBOL(dst_alloc); static void dst_destroy(struct dst_entry *dst) { struct dst_entry *child = NULL; smp_rmb(); #ifdef CONFIG_XFRM if (dst->xfrm) { struct xfrm_dst *xdst = (struct xfrm_dst *) dst; child = xdst->child; } #endif if (dst->ops->destroy) dst->ops->destroy(dst); netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); if (dst->flags & DST_METADATA) metadata_dst_free((struct metadata_dst *)dst); else kmem_cache_free(dst->ops->kmem_cachep, dst); dst = child; if (dst) dst_release_immediate(dst); } static void dst_destroy_rcu(struct rcu_head *head) { struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); dst_destroy(dst); } /* Operations to mark dst as DEAD and clean up the net device referenced * by dst: * 1. put the dst under blackhole interface and discard all tx/rx packets * on this route. * 2. release the net_device * This function should be called when removing routes from the fib tree * in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to * make the next dst_ops->check() fail. */ void dst_dev_put(struct dst_entry *dst) { struct net_device *dev = dst->dev; dst->obsolete = DST_OBSOLETE_DEAD; if (dst->ops->ifdown) dst->ops->ifdown(dst, dev); dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); static void dst_count_dec(struct dst_entry *dst) { if (!(dst->flags & DST_NOCOUNT)) dst_entries_add(dst->ops, -1); } void dst_release(struct dst_entry *dst) { if (dst && rcuref_put(&dst->__rcuref)) { #ifdef CONFIG_DST_CACHE if (dst->flags & DST_METADATA) { struct metadata_dst *md_dst = (struct metadata_dst *)dst; if (md_dst->type == METADATA_IP_TUNNEL) dst_cache_reset_now(&md_dst->u.tun_info.dst_cache); } #endif dst_count_dec(dst); call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); void dst_release_immediate(struct dst_entry *dst) { if (dst && rcuref_put(&dst->__rcuref)) { dst_count_dec(dst); dst_destroy(dst); } } EXPORT_SYMBOL(dst_release_immediate); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; refcount_set(&p->refcnt, 1); memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; } else if (prev & DST_METRICS_REFCOUNTED) { if (refcount_dec_and_test(&old_p->refcnt)) kfree(old_p); } } BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); /* Caller asserts that dst_metrics_read_only(dst) is false. */ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); } EXPORT_SYMBOL(__dst_destroy_metrics_generic); struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie) { return NULL; } u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old) { return NULL; } struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { return NULL; } void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { } EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu); void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { } EXPORT_SYMBOL_GPL(dst_blackhole_redirect); unsigned int dst_blackhole_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); return mtu ? : dst->dev->mtu; } EXPORT_SYMBOL_GPL(dst_blackhole_mtu); static struct dst_ops dst_blackhole_ops = { .family = AF_UNSPEC, .neigh_lookup = dst_blackhole_neigh_lookup, .check = dst_blackhole_check, .cow_metrics = dst_blackhole_cow_metrics, .update_pmtu = dst_blackhole_update_pmtu, .redirect = dst_blackhole_redirect, .mtu = dst_blackhole_mtu, }; static void __metadata_dst_init(struct metadata_dst *md_dst, enum metadata_type type, u8 optslen) { struct dst_entry *dst; dst = &md_dst->dst; dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; } struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags) { struct metadata_dst *md_dst; md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen), flags); if (!md_dst) return NULL; __metadata_dst_init(md_dst, type, optslen); return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc); void metadata_dst_free(struct metadata_dst *md_dst) { #ifdef CONFIG_DST_CACHE if (md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&md_dst->u.tun_info.dst_cache); #endif if (md_dst->type == METADATA_XFRM) dst_release(md_dst->u.xfrm_info.dst_orig); kfree(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) { int cpu; struct metadata_dst __percpu *md_dst; md_dst = __alloc_percpu_gfp(struct_size(md_dst, u.tun_info.options, optslen), __alignof__(struct metadata_dst), flags); if (!md_dst) return NULL; for_each_possible_cpu(cpu) __metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen); return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) { int cpu; for_each_possible_cpu(cpu) { struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); #ifdef CONFIG_DST_CACHE if (one_md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); #endif if (one_md_dst->type == METADATA_XFRM) dst_release(one_md_dst->u.xfrm_info.dst_orig); } free_percpu(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
192 4 189 193 37 20 20 68 27 18 18 93 2 4 8 69 66 5 35 25 25 9 25 68 21 33 77 1907 1410 1 1 29 566 49 622 27 611 609 610 1 289 282 193 160 224 1 611 610 4 1 4 15 15 15 196 197 195 195 197 197 196 197 195 195 195 196 196 197 195 1 197 197 197 195 197 9 193 196 3 3 2 1 2 1 189 188 189 189 160 32 189 7 4 3 3 4 3 190 32 160 191 7 53 12 12 1 1 10 2 4 3 5 1 4 4 2 2 2 2 3 1 3 4 4 2 14 14 14 1 1 12 10 2 4 4 2 2 5 1 2 2 1 1 1 1 1 27 27 27 56 55 4 52 3 48 1 49 49 44 10 43 14 5 3 35 10 8 40 40 8 48 34 43 5 47 46 20 23 23 23 23 10 21 26 22 38 2 9 5 2 3 21 18 27 27 48 48 48 27 27 4 7 12 4 4 112 111 1 113 113 111 110 112 2 1 12 14 1 56 27 2173 2022 160 32 593 593 59 552 550 418 180 509 122 515 73 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 // SPDX-License-Identifier: GPL-2.0-or-later /* * Neighbour Discovery for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Mike Shaver <shaver@ingenia.com> */ /* * Changes: * * Alexey I. Froloff : RFC6106 (DNSSL) support * Pierre Ynard : export userland ND options * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt * of an RA. * Janos Farkas : kmalloc failure checks * Alexey Kuznetsov : state machine reworked * and moved to net/core. * Pekka Savola : RFC2461 validation * YOSHIFUJI Hideaki @USAGI : Verify ND options properly */ #define pr_fmt(fmt) "ICMPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/sched.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/route.h> #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <net/flow.h> #include <net/ip6_checksum.h> #include <net/inet_common.h> #include <linux/proc_fs.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_connected_output, }; static const struct neigh_ops ndisc_hh_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_resolve_output, }; static const struct neigh_ops ndisc_direct_ops = { .family = AF_INET6, .output = neigh_direct_output, .connected_output = neigh_direct_output, }; struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, .reachable_time = ND_REACHABLE_TIME, .data = { [NEIGH_VAR_MCAST_PROBES] = 3, [NEIGH_VAR_UCAST_PROBES] = 3, [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, .gc_thresh2 = 512, .gc_thresh3 = 1024, }; EXPORT_SYMBOL_GPL(nd_tbl); void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; memset(opt + 2, 0, pad); opt += pad; space -= pad; memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; space -= data_len; if (space > 0) memset(opt, 0, space); } EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type); } static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb, void *ha, const u8 *ops_data) { ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT); ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data); } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { int type; if (!cur || !end || cur >= end) return NULL; type = cur->nd_opt_type; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && cur->nd_opt_type != type); return cur <= end && cur->nd_opt_type == type ? cur : NULL; } static inline int ndisc_is_useropt(const struct net_device *dev, struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_PREFIX_INFO || opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || opt->nd_opt_type == ND_OPT_6CO || opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || opt->nd_opt_type == ND_OPT_PREF64; } static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev, struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { if (!cur || !end || cur >= end) return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && !ndisc_is_useropt(dev, cur)); return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL; } struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; if (!nd_opt || opt_len < 0 || !ndopts) return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; l = nd_opt->nd_opt_len << 3; if (opt_len < l || l == 0) return NULL; if (ndisc_ops_parse_options(dev, nd_opt, ndopts)) goto next_opt; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: case ND_OPT_MTU: case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK(2, warn, "%s: duplicated ND6 option found: type=%d\n", __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFO: ndopts->nd_opts_pi_end = nd_opt; if (!ndopts->nd_opt_array[nd_opt->nd_opt_type]) ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; break; #ifdef CONFIG_IPV6_ROUTE_INFO case ND_OPT_ROUTE_INFO: ndopts->nd_opts_ri_end = nd_opt; if (!ndopts->nd_opts_ri) ndopts->nd_opts_ri = nd_opt; break; #endif default: unknown = true; } if (ndisc_is_useropt(dev, nd_opt)) { ndopts->nd_useropts_end = nd_opt; if (!ndopts->nd_useropts) ndopts->nd_useropts = nd_opt; } else if (unknown) { /* * Unknown options must be silently ignored, * to accommodate future extension to the * protocol. */ ND_PRINTK(2, notice, "%s: ignored unsupported option; type=%d, len=%d\n", __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } next_opt: opt_len -= l; nd_opt = ((void *)nd_opt) + l; } return ndopts; } int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { case ARPHRD_ETHER: case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */ case ARPHRD_FDDI: ipv6_eth_mc_map(addr, buf); return 0; case ARPHRD_ARCNET: ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; case ARPHRD_IPGRE: return ipv6_ipgre_mc_map(addr, dev->broadcast, buf); default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); return 0; } } return -EINVAL; } EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { return ndisc_hashfn(pkey, dev, hash_rnd); } static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) { return neigh_key_eq128(n, pkey); } static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); if (!in6_dev) { return -EINVAL; } parms = in6_dev->nd_parms; __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; neigh->output = neigh_direct_output; } else { if (is_multicast) { neigh->nud_state = NUD_NOARP; ndisc_mc_map(addr, neigh->ha, dev, 1); } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->dev_addr, dev->addr_len); if (dev->flags&IFF_LOOPBACK) neigh->type = RTN_LOCAL; } else if (dev->flags&IFF_POINTOPOINT) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } if (dev->header_ops->cache) neigh->ops = &ndisc_hh_ops; else neigh->ops = &ndisc_generic_ops; if (neigh->nud_state&NUD_VALID) neigh->output = neigh->ops->connected_output; else neigh->output = neigh->ops->output; } in6_dev_put(in6_dev); return 0; } static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); return 0; } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } /* called with rtnl held */ static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device"); return false; } return true; } static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int len) { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) return NULL; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ rcu_read_lock(); skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); rcu_read_unlock(); return skb; } static void ip6_nd_hdr(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int hop_limit, int len) { struct ipv6hdr *hdr; struct inet6_dev *idev; unsigned tclass; rcu_read_lock(); idev = __in6_dev_get(skb->dev); tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = hop_limit; hdr->saddr = *saddr; hdr->daddr = *daddr; } void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); struct inet6_dev *idev; struct net *net; struct sock *sk; int err; u8 type; type = icmp6h->icmp6_type; rcu_read_lock(); net = dev_net_rcu(skb->dev); sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { rcu_read_unlock(); kfree_skb(skb); return; } skb_dst_set(skb, dst); } icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, csum_partial(icmp6h, skb->len, 0)); ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt) { struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; struct nd_msg *msg; int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; src_addr = &tmpaddr; } if (!dev->addr_len) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_ADVERTISEMENT); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, .icmp6_router = router, .icmp6_solicited = solicited, .icmp6_override = override, }, .target = *solicited_addr, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_ADVERTISEMENT); ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; idev = in6_dev_get(dev); if (!idev) return; read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { /* skip tentative addresses until dad completes */ if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_OPTIMISTIC)) continue; ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, /*inc_opt=*/ true); } read_unlock_bh(&idev->lock); in6_dev_put(idev); } struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *saddr, u64 nonce) { int inc_opt = dev->addr_len; struct sk_buff *skb; struct nd_msg *msg; int optlen = 0; if (!saddr) return NULL; if (ipv6_addr_any(saddr)) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) optlen += 8; skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return NULL; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, }, .target = *solicit, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) { u8 *opt = skb_put(skb, 8); opt[0] = ND_OPT_NONCE; opt[1] = 8 >> 3; memcpy(opt + 2, &nonce, 6); } return skb; } EXPORT_SYMBOL(ndisc_ns_create); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce) { struct in6_addr addr_buf; struct sk_buff *skb; if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) return; saddr = &addr_buf; } skb = ndisc_ns_create(dev, solicit, saddr, nonce); if (skb) ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct sk_buff *skb; struct rs_msg *msg; int send_sllao = dev->addr_len; int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * According to section 2.2 of RFC 4429, we must not * send router solicitations with a sllao from * optimistic addresses, but we may send the solicitation * if we don't include the sllao. So here we check * if our address is optimistic, and if so, we * suppress the inclusion of the sllao. */ if (send_sllao) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { send_sllao = 0; } in6_ifa_put(ifp); } else { send_sllao = 0; } } #endif if (send_sllao) optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct rs_msg) { .icmph = { .icmp6_type = NDISC_ROUTER_SOLICITATION, }, }; if (send_sllao) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_ROUTER_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) { /* * "The sender MUST return an ICMP * destination unreachable" */ dst_link_failure(skb); kfree_skb(skb); } /* Called with locked neigh: either read or both */ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) { struct in6_addr *saddr = NULL; struct in6_addr mcaddr; struct net_device *dev = neigh->dev; struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(dev, target, &mcaddr, saddr, 0); } } static int pndisc_is_router(const void *pkey, struct net_device *dev) { struct pneigh_entry *n; int ret = -1; read_lock_bh(&nd_tbl.lock); n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); if (n) ret = !!(n->flags & NTF_ROUTER); read_unlock_bh(&nd_tbl.lock); return ret; } void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts) { neigh_update(neigh, lladdr, new, flags, 0); /* report ndisc ops about neighbour update */ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); } static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; int dad = ipv6_addr_any(saddr); int is_router = -1; SKB_DR(reason); u64 nonce = 0; bool inc; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return reason; } /* * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return reason; } if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NS: invalid link-layer address length\n"); return reason; } /* RFC2461 7.1.1: * If the IP source address is the unspecified address, * there MUST NOT be source link-layer address option * in the message. */ if (dad) { ND_PRINTK(2, warn, "NS: bad DAD packet (link-layer address option)\n"); return reason; } } if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1) memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ ND_PRINTK(2, notice, "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", ifp->idev->dev->name, &ifp->addr, np); goto out; } /* * We are colliding with another node * who is doing DAD * so fail our DAD process */ addrconf_dad_failure(skb, ifp); return reason; } else { /* * This is not a dad solicitation. * If we are an optimistic node, * we should respond. * Otherwise, we should ignore it. */ if (!(ifp->flags & IFA_F_OPTIMISTIC)) goto out; } } idev = ifp->idev; } else { struct net *net = dev_net(dev); /* perhaps an address on the master device */ if (netif_is_l3_slave(dev)) { struct net_device *mdev; mdev = netdev_master_upper_dev_get_rcu(dev); if (mdev) { ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); if (ifp) goto have_ifp; } } idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ return reason; } if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc && NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) { /* * for anycast or proxy, * sender should delay its response * by a random time between 0 and * MAX_ANYCAST_DELAY_TIME seconds. * (RFC2461) -- yoshfuji */ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC); if (n) pneigh_enqueue(&nd_tbl, idev->nd_parms, n); goto out; } } else { SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST); goto out; } } if (is_router < 0) is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, !!is_router, false, (ifp != NULL), true); goto out; } if (inc) NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast); else NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast); /* * update / create cache entry * for the source address */ neigh = __neigh_lookup(&nd_tbl, saddr, dev, !inc || lladdr || !dev->addr_len); if (neigh) ndisc_update(dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE, NDISC_NEIGHBOUR_SOLICITATION, &ndopts); if (neigh || !dev->header_ops) { ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); reason = SKB_CONSUMED; } out: if (ifp) in6_ifa_put(ifp); else in6_dev_put(idev); return reason; } static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ return 1; case 2: /* Create new entries from untracked na only if saddr is in the * same subnet as an address configured on the interface that * received the na */ return !!ipv6_chk_prefix(saddr, dev); default: return 0; } } static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; SKB_DR(reason); u8 new_state; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); return reason; } /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NA: invalid link-layer address length\n"); return reason; } } ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { if (skb->pkt_type != PACKET_LOOPBACK && (ifp->flags & IFA_F_TENTATIVE)) { addrconf_dad_failure(skb, ifp); return reason; } /* What should we make now? The advertisement is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) We should not print the error if NA has been received from loopback - it is just our own unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) ND_PRINTK(1, warn, "NA: %pM advertised our address %pI6c on %s!\n", eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } neigh = neigh_lookup(&nd_tbl, &msg->target, dev); /* RFC 9131 updates original Neighbour Discovery RFC 4861. * NAs with Target LL Address option without a corresponding * entry in the neighbour cache can now create a STALE neighbour * cache entry on routers. * * entry accept fwding solicited behaviour * ------- ------ ------ --------- ---------------------- * present X X 0 Set state to STALE * present X X 1 Set state to REACHABLE * absent 0 X X Do nothing * absent 1 0 X Do nothing * absent 1 1 X Add a new STALE entry * * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; } } if (neigh && !IS_ERR(neigh)) { u8 old_flags = neigh->flags; struct net *net = dev_net(dev); if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* * Don't update the neighbor cache entry on a proxy NA from * ourselves because either the proxied node is off link or it * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; } ndisc_update(dev, neigh, lladdr, new_state, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0), NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts); if ((old_flags & ~neigh->flags) & NTF_ROUTER) { /* * Change: router to host */ rt6_clean_tohost(dev_net(dev), saddr); } reason = SKB_CONSUMED; out: neigh_release(neigh); } return reason; } static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) { struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb); unsigned long ndoptlen = skb->len - sizeof(*rs_msg); struct neighbour *neigh; struct inet6_dev *idev; const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; struct ndisc_options ndopts; u8 *lladdr = NULL; SKB_DR(reason); if (skb->len < sizeof(*rs_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; idev = __in6_dev_get(skb->dev); if (!idev) { ND_PRINTK(1, err, "RS: can't find in6 device\n"); return reason; } /* Don't accept RS if we're not in router mode */ if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* * Don't update NCE if src = ::; * this implies that the source node has no ip address assigned yet. */ if (ipv6_addr_any(saddr)) goto out; /* Parse ND options */ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) goto out; } neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1); if (neigh) { ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER, NDISC_ROUTER_SOLICITATION, &ndopts); neigh_release(neigh); reason = SKB_CONSUMED; } out: return reason; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) { struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra); struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; struct net *net = dev_net(ra->dev); int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); if (!nlh) { goto nla_put_failure; } ndmsg = nlmsg_data(nlh); ndmsg->nduseropt_family = AF_INET6; ndmsg->nduseropt_ifindex = ra->dev->ifindex; ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); bool send_ifinfo_notify = false; struct neighbour *neigh = NULL; struct ndisc_options ndopts; struct fib6_info *rt = NULL; struct inet6_dev *in6_dev; struct fib6_table *table; u32 defrtr_usr_metric; unsigned int pref = 0; __u32 old_if_flags; struct net *net; SKB_DR(reason); int lifetime; int optlen; __u8 *opt = (__u8 *)(ra_msg + 1); optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); ND_PRINTK(2, info, "RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return reason; } if (optlen < 0) return SKB_DROP_REASON_PKT_TOO_SMALL; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, did not accept ra for dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #endif if (in6_dev->if_flags & IF_RS_SENT) { /* * flag that an RA was received after an RS was sent * out on this interface. */ in6_dev->if_flags |= IF_RA_RCVD; } /* * Remember the managed/otherconf flags from most recently * received RA message (RFC 2462) -- yoshfuji */ old_if_flags = in6_dev->if_flags; in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED | IF_RA_OTHERCONF)) | (ra_msg->icmph.icmp6_addrconf_managed ? IF_RA_MANAGED : 0) | (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); goto skip_defrtr; } /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); goto skip_defrtr; } #ifdef CONFIG_IPV6_ROUTER_PREF pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } } /* Set default route metric as specified by user */ defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric; /* delete the route if lifetime is 0 or if metric needs change */ if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) { ip6_del_rt(net, rt, false); rt = NULL; } ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); if (neigh) neigh_release(neigh); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); return reason; } neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } neigh->flags |= NTF_ROUTER; } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) { struct nl_info nlinfo = { .nl_net = net, }; rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } if (rt) { table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); fib6_set_expires(rt, jiffies + (HZ * lifetime)); fib6_add_gc_list(rt); spin_unlock_bh(&table->tb6_lock); } if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= ra_msg->icmph.icmp6_hop_limit) { WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } } skip_defrtr: /* * Update Reachable Time and Retrans Timer */ if (in6_dev->nd_parms) { unsigned long rtime = ntohl(ra_msg->retrans_timer); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) { rtime = (rtime*HZ)/1000; if (rtime < HZ/100) rtime = HZ/100; NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } rtime = ntohl(ra_msg->reachable_time); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) { rtime = (rtime*HZ)/1000; if (rtime < HZ/10) rtime = HZ/10; if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { NEIGH_VAR_SET(in6_dev->nd_parms, BASE_REACHABLE_TIME, rtime); NEIGH_VAR_SET(in6_dev->nd_parms, GC_STALETIME, 3 * rtime); in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } } } skip_linkparms: /* * Process options. */ if (!neigh) neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr, skb->dev, 1); if (neigh) { u8 *lladdr = NULL; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { ND_PRINTK(2, warn, "RA: invalid link-layer address length\n"); goto out; } } ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| NEIGH_UPDATE_F_ISROUTER, NDISC_ROUTER_ADVERTISEMENT, &ndopts); reason = SKB_CONSUMED; } if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, accept_ra is false for dev: %s\n", __func__, skb->dev->name); goto out; } #ifdef CONFIG_IPV6_ROUTE_INFO if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: router info ignored.\n", skb->dev->name); goto skip_routeinfo; } if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) { struct route_info *ri = (struct route_info *)p; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT && ri->prefix_len == 0) continue; #endif if (ri->prefix_len == 0 && !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); } } skip_routeinfo: #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", __func__, skb->dev->name); goto out; } #endif if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) { addrconf_prefix_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, ndopts.nd_opts_src_lladdr != NULL); } } if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); if (in6_dev->ra_mtu != mtu) { in6_dev->ra_mtu = mtu; send_ifinfo_notify = true; } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } if (ndopts.nd_useropts) { struct nd_opt_hdr *p; for (p = ndopts.nd_useropts; p; p = ndisc_next_useropt(skb->dev, p, ndopts.nd_useropts_end)) { ndisc_ra_useropt(skb, p); } } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or * timer settings or ra_mtu value */ if (send_ifinfo_notify) inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); fib6_info_release(rt); if (neigh) neigh_release(neigh); return reason; } static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) { struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); struct ndisc_options ndopts; SKB_DR(reason); u8 *hdr; #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: ND_PRINTK(2, warn, "Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: source address is not link-local\n"); return reason; } if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ndopts.nd_opts_rh) { ip6_redirect_no_header(skb, dev_net(skb->dev), skb->dev->ifindex); return reason; } hdr = (u8 *)ndopts.nd_opts_rh; hdr += 8; if (!pskb_pull(skb, hdr - skb_transport_header(skb))) return SKB_DROP_REASON_PKT_TOO_SMALL; return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, struct sk_buff *orig_skb, int rd_len) { u8 *opt = skb_put(skb, rd_len); memset(opt, 0, 8); *(opt++) = ND_OPT_REDIRECT_HDR; *(opt++) = (rd_len >> 3); opt += 6; skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net_rcu(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; struct rd_msg *msg; struct in6_addr saddr_buf; struct rt6_info *rt; struct dst_entry *dst; struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; if (netif_is_l3_master(dev)) { dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) return; } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: target address is not link-local unicast\n"); return; } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); return; } dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, "Redirect: destination is not a neighbour\n"); goto release; } peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (!ret) goto release; if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { ND_PRINTK(2, warn, "Redirect: no neigh for target address\n"); goto release; } read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) { memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; optlen += ndisc_redirect_opt_addr_space(dev, neigh, ops_data_buf, &ops_data); } else read_unlock_bh(&neigh->lock); neigh_release(neigh); } rd_len = min_t(unsigned int, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, skb->len + 8); rd_len &= ~0x7; optlen += rd_len; buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!buff) goto release; msg = skb_put(buff, sizeof(*msg)); *msg = (struct rd_msg) { .icmph = { .icmp6_type = NDISC_REDIRECT, }, .target = *target, .dest = ipv6_hdr(skb)->daddr, }; /* * include target_address option */ if (ha) ndisc_fill_redirect_addr_option(buff, ha, ops_data); /* * build redirect option and copy skb over to the new packet. */ if (rd_len) ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: dst_release(dst); } static void pndisc_redo(struct sk_buff *skb) { enum skb_drop_reason reason = ndisc_recv_ns(skb); kfree_skb_reason(skb, reason); } static int ndisc_is_multicast(const void *pkey) { return ipv6_addr_is_multicast((struct in6_addr *)pkey); } static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } return false; } enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; SKB_DR(reason); if (ndisc_suppress_frag_ndisc(skb)) return SKB_DROP_REASON_IPV6_NDISC_FRAG; if (skb_linearize(skb)) return SKB_DROP_REASON_NOMEM; msg = (struct nd_msg *)skb_transport_header(skb); __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); reason = ndisc_recv_ns(skb); break; case NDISC_NEIGHBOUR_ADVERTISEMENT: reason = ndisc_recv_na(skb); break; case NDISC_ROUTER_SOLICITATION: reason = ndisc_recv_rs(skb); break; case NDISC_ROUTER_ADVERTISEMENT: reason = ndisc_router_discovery(skb); break; case NDISC_REDIRECT: reason = ndisc_redirect_rcv(skb); break; } return reason; } static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); fallthrough; case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) break; if (READ_ONCE(idev->cnf.ndisc_notify) || READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; case NETDEV_CHANGE: idev = in6_dev_get(dev); if (!idev) evict_nocarrier = true; else { evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block ndisc_netdev_notifier = { .notifier_call = ndisc_netdev_event, .priority = ADDRCONF_NOTIFY_PRIORITY - 5, }; #ifdef CONFIG_SYSCTL static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl, const char *func, const char *dev_name) { static char warncomm[TASK_COMM_LEN]; static int warned; if (strcmp(warncomm, current->comm) && warned < 5) { strscpy(warncomm, current->comm); pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n", warncomm, func, dev_name, ctl->procname, dev_name, ctl->procname); warned++; } } int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; int ret; if ((strcmp(ctl->procname, "retrans_time") == 0) || (strcmp(ctl->procname, "base_reachable_time") == 0)) ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); else ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) idev->nd_parms->reachable_time = neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); } return ret; } #endif static int __net_init ndisc_net_init(struct net *net) { struct ipv6_pinfo *np; struct sock *sk; int err; err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { ND_PRINTK(0, err, "NDISC: Failed to initialize the control socket (err %d)\n", err); return err; } net->ipv6.ndisc_sk = sk; np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ inet6_clear_bit(MC6_LOOP, sk); return 0; } static void __net_exit ndisc_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.ndisc_sk); } static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, }; int __init ndisc_init(void) { int err; err = register_pernet_subsys(&ndisc_net_ops); if (err) return err; /* * Initialize the neighbour table */ neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; out: #endif return err; #ifdef CONFIG_SYSCTL out_unregister_pernet: unregister_pernet_subsys(&ndisc_net_ops); goto out; #endif } int __init ndisc_late_init(void) { return register_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_late_cleanup(void) { unregister_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_cleanup(void) { #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); }
410 369 354 354 368 354 352 368 18 5 368 5 353 353 353 354 351 353 256 463 369 53 438 353 362 353 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef __SOUND_PCM_PARAMS_H #define __SOUND_PCM_PARAMS_H /* * PCM params helpers * Copyright (c) by Abramo Bagnara <abramo@alsa-project.org> */ #include <sound/pcm.h> int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); int snd_pcm_hw_param_value(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); #define SNDRV_MASK_BITS 64 /* we use so far 64bits only */ #define SNDRV_MASK_SIZE (SNDRV_MASK_BITS / 32) #define MASK_OFS(i) ((i) >> 5) #define MASK_BIT(i) (1U << ((i) & 31)) static inline void snd_mask_none(struct snd_mask *mask) { memset(mask, 0, sizeof(*mask)); } static inline void snd_mask_any(struct snd_mask *mask) { memset(mask, 0xff, SNDRV_MASK_SIZE * sizeof(u_int32_t)); } static inline int snd_mask_empty(const struct snd_mask *mask) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) if (mask->bits[i]) return 0; return 1; } static inline unsigned int snd_mask_min(const struct snd_mask *mask) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) { if (mask->bits[i]) return __ffs(mask->bits[i]) + (i << 5); } return 0; } static inline unsigned int snd_mask_max(const struct snd_mask *mask) { int i; for (i = SNDRV_MASK_SIZE - 1; i >= 0; i--) { if (mask->bits[i]) return __fls(mask->bits[i]) + (i << 5); } return 0; } static inline void snd_mask_set(struct snd_mask *mask, unsigned int val) { mask->bits[MASK_OFS(val)] |= MASK_BIT(val); } /* Most of drivers need only this one */ static inline void snd_mask_set_format(struct snd_mask *mask, snd_pcm_format_t format) { snd_mask_set(mask, (__force unsigned int)format); } static inline void snd_mask_reset(struct snd_mask *mask, unsigned int val) { mask->bits[MASK_OFS(val)] &= ~MASK_BIT(val); } static inline void snd_mask_set_range(struct snd_mask *mask, unsigned int from, unsigned int to) { unsigned int i; for (i = from; i <= to; i++) mask->bits[MASK_OFS(i)] |= MASK_BIT(i); } static inline void snd_mask_reset_range(struct snd_mask *mask, unsigned int from, unsigned int to) { unsigned int i; for (i = from; i <= to; i++) mask->bits[MASK_OFS(i)] &= ~MASK_BIT(i); } static inline void snd_mask_leave(struct snd_mask *mask, unsigned int val) { unsigned int v; v = mask->bits[MASK_OFS(val)] & MASK_BIT(val); snd_mask_none(mask); mask->bits[MASK_OFS(val)] = v; } static inline void snd_mask_intersect(struct snd_mask *mask, const struct snd_mask *v) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) mask->bits[i] &= v->bits[i]; } static inline int snd_mask_eq(const struct snd_mask *mask, const struct snd_mask *v) { return ! memcmp(mask, v, SNDRV_MASK_SIZE * sizeof(u_int32_t)); } static inline void snd_mask_copy(struct snd_mask *mask, const struct snd_mask *v) { *mask = *v; } static inline int snd_mask_test(const struct snd_mask *mask, unsigned int val) { return mask->bits[MASK_OFS(val)] & MASK_BIT(val); } /* Most of drivers need only this one */ static inline int snd_mask_test_format(const struct snd_mask *mask, snd_pcm_format_t format) { return snd_mask_test(mask, (__force unsigned int)format); } static inline int snd_mask_single(const struct snd_mask *mask) { int i, c = 0; for (i = 0; i < SNDRV_MASK_SIZE; i++) { if (! mask->bits[i]) continue; if (mask->bits[i] & (mask->bits[i] - 1)) return 0; if (c) return 0; c++; } return 1; } static inline int snd_mask_refine(struct snd_mask *mask, const struct snd_mask *v) { struct snd_mask old; snd_mask_copy(&old, mask); snd_mask_intersect(mask, v); if (snd_mask_empty(mask)) return -EINVAL; return !snd_mask_eq(mask, &old); } static inline int snd_mask_refine_first(struct snd_mask *mask) { if (snd_mask_single(mask)) return 0; snd_mask_leave(mask, snd_mask_min(mask)); return 1; } static inline int snd_mask_refine_last(struct snd_mask *mask) { if (snd_mask_single(mask)) return 0; snd_mask_leave(mask, snd_mask_max(mask)); return 1; } static inline int snd_mask_refine_min(struct snd_mask *mask, unsigned int val) { if (snd_mask_min(mask) >= val) return 0; snd_mask_reset_range(mask, 0, val - 1); if (snd_mask_empty(mask)) return -EINVAL; return 1; } static inline int snd_mask_refine_max(struct snd_mask *mask, unsigned int val) { if (snd_mask_max(mask) <= val) return 0; snd_mask_reset_range(mask, val + 1, SNDRV_MASK_BITS); if (snd_mask_empty(mask)) return -EINVAL; return 1; } static inline int snd_mask_refine_set(struct snd_mask *mask, unsigned int val) { int changed; changed = !snd_mask_single(mask); snd_mask_leave(mask, val); if (snd_mask_empty(mask)) return -EINVAL; return changed; } static inline int snd_mask_value(const struct snd_mask *mask) { return snd_mask_min(mask); } static inline void snd_interval_any(struct snd_interval *i) { i->min = 0; i->openmin = 0; i->max = UINT_MAX; i->openmax = 0; i->integer = 0; i->empty = 0; } static inline void snd_interval_none(struct snd_interval *i) { i->empty = 1; } static inline int snd_interval_checkempty(const struct snd_interval *i) { return (i->min > i->max || (i->min == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_empty(const struct snd_interval *i) { return i->empty; } static inline int snd_interval_single(const struct snd_interval *i) { return (i->min == i->max || (i->min + 1 == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_value(const struct snd_interval *i) { if (i->openmin && !i->openmax) return i->max; return i->min; } static inline int snd_interval_min(const struct snd_interval *i) { return i->min; } static inline int snd_interval_max(const struct snd_interval *i) { unsigned int v; v = i->max; if (i->openmax) v--; return v; } static inline int snd_interval_test(const struct snd_interval *i, unsigned int val) { return !((i->min > val || (i->min == val && i->openmin) || i->max < val || (i->max == val && i->openmax))); } static inline void snd_interval_copy(struct snd_interval *d, const struct snd_interval *s) { *d = *s; } static inline int snd_interval_setinteger(struct snd_interval *i) { if (i->integer) return 0; if (i->openmin && i->openmax && i->min == i->max) return -EINVAL; i->integer = 1; return 1; } static inline int snd_interval_eq(const struct snd_interval *i1, const struct snd_interval *i2) { if (i1->empty) return i2->empty; if (i2->empty) return i1->empty; return i1->min == i2->min && i1->openmin == i2->openmin && i1->max == i2->max && i1->openmax == i2->openmax; } /** * params_access - get the access type from the hw params * @p: hw params */ static inline snd_pcm_access_t params_access(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_access_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_ACCESS)); } /** * params_format - get the sample format from the hw params * @p: hw params */ static inline snd_pcm_format_t params_format(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_format_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_FORMAT)); } /** * params_subformat - get the sample subformat from the hw params * @p: hw params */ static inline snd_pcm_subformat_t params_subformat(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_subformat_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_SUBFORMAT)); } /** * params_period_bytes - get the period size (in bytes) from the hw params * @p: hw params */ static inline unsigned int params_period_bytes(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIOD_BYTES)->min; } /** * params_width - get the number of bits of the sample format from the hw params * @p: hw params * * This function returns the number of bits per sample that the selected sample * format of the hw params has. */ static inline int params_width(const struct snd_pcm_hw_params *p) { return snd_pcm_format_width(params_format(p)); } /* * params_physical_width - get the storage size of the sample format from the hw params * @p: hw params * * This functions returns the number of bits per sample that the selected sample * format of the hw params takes up in memory. This will be equal or larger than * params_width(). */ static inline int params_physical_width(const struct snd_pcm_hw_params *p) { return snd_pcm_format_physical_width(params_format(p)); } int snd_pcm_hw_params_bits(const struct snd_pcm_hw_params *p); static inline void params_set_format(struct snd_pcm_hw_params *p, snd_pcm_format_t fmt) { snd_mask_set_format(hw_param_mask(p, SNDRV_PCM_HW_PARAM_FORMAT), fmt); } #endif /* __SOUND_PCM_PARAMS_H */
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 /* SPDX-License-Identifier: GPL-2.0 */ /* * TTY core internal functions */ #ifndef _TTY_INTERNAL_H #define _TTY_INTERNAL_H #define tty_msg(fn, tty, f, ...) \ fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) #define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) #define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) #define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) #define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) /* * Lock subclasses for tty locks * * TTY_LOCK_NORMAL is for normal ttys and master ptys. * TTY_LOCK_SLAVE is for slave ptys only. * * Lock subclasses are necessary for handling nested locking with pty pairs. * tty locks which use nested locking: * * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. * The stable lock order is master pty first, then slave pty. * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. * Subclassing this lock enables the slave pty to hold its * termios_rwsem when claiming the master tty_buffer lock. * tty_buffer lock - slave ptys can claim nested buffer lock when handling * signal chars. The stable lock order is slave pty, then * master. */ enum { TTY_LOCK_NORMAL = 0, TTY_LOCK_SLAVE, }; /* Values for tty->flow_change */ enum tty_flow_change { TTY_FLOW_NO_CHANGE, TTY_THROTTLE_SAFE, TTY_UNTHROTTLE_SAFE, }; static inline void __tty_set_flow_change(struct tty_struct *tty, enum tty_flow_change val) { tty->flow_change = val; } static inline void tty_set_flow_change(struct tty_struct *tty, enum tty_flow_change val) { tty->flow_change = val; smp_mb(); } int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); void tty_ldisc_unlock(struct tty_struct *tty); int __tty_check_change(struct tty_struct *tty, int sig); int tty_check_change(struct tty_struct *tty); void __stop_tty(struct tty_struct *tty); void __start_tty(struct tty_struct *tty); void tty_write_unlock(struct tty_struct *tty); int tty_write_lock(struct tty_struct *tty, bool ndelay); void tty_vhangup_session(struct tty_struct *tty); void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); int tty_signal_session_leader(struct tty_struct *tty, int exit_session); void session_clear_tty(struct pid *session); void tty_buffer_free_all(struct tty_port *port); void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); void tty_buffer_init(struct tty_port *port); void tty_buffer_set_lock_subclass(struct tty_port *port); bool tty_buffer_restart_work(struct tty_port *port); bool tty_buffer_cancel_work(struct tty_port *port); void tty_buffer_flush_work(struct tty_port *port); speed_t tty_termios_input_baud_rate(const struct ktermios *termios); void tty_ldisc_hangup(struct tty_struct *tty, bool reset); int tty_ldisc_reinit(struct tty_struct *tty, int disc); long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg); void tty_default_fops(struct file_operations *fops); struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); int tty_alloc_file(struct file *file); void tty_add_file(struct tty_struct *tty, struct file *file); void tty_free_file(struct file *file); int tty_release(struct inode *inode, struct file *filp); #define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); void tty_ldisc_release(struct tty_struct *tty); int __must_check tty_ldisc_init(struct tty_struct *tty); void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_autoload; /* tty_audit.c */ #ifdef CONFIG_AUDIT void tty_audit_add_data(const struct tty_struct *tty, const void *data, size_t size); void tty_audit_tiocsti(const struct tty_struct *tty, u8 ch); #else static inline void tty_audit_add_data(const struct tty_struct *tty, const void *data, size_t size) { } static inline void tty_audit_tiocsti(const struct tty_struct *tty, u8 ch) { } #endif ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); int tty_insert_flip_string_and_push_buffer(struct tty_port *port, const u8 *chars, size_t cnt); #endif
30 30 30 30 30 30 43 39 39 43 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 // SPDX-License-Identifier: GPL-2.0 /* * Componentized device handling. */ #include <linux/component.h> #include <linux/device.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/debugfs.h> /** * DOC: overview * * The component helper allows drivers to collect a pile of sub-devices, * including their bound drivers, into an aggregate driver. Various subsystems * already provide functions to get hold of such components, e.g. * of_clk_get_by_name(). The component helper can be used when such a * subsystem-specific way to find a device is not available: The component * helper fills the niche of aggregate drivers for specific hardware, where * further standardization into a subsystem would not be practical. The common * example is when a logical device (e.g. a DRM display driver) is spread around * the SoC on various components (scanout engines, blending blocks, transcoders * for various outputs and so on). * * The component helper also doesn't solve runtime dependencies, e.g. for system * suspend and resume operations. See also :ref:`device links<device_link>`. * * Components are registered using component_add() and unregistered with * component_del(), usually from the driver's probe and disconnect functions. * * Aggregate drivers first assemble a component match list of what they need * using component_match_add(). This is then registered as an aggregate driver * using component_master_add_with_match(), and unregistered using * component_master_del(). */ struct component; struct component_match_array { void *data; int (*compare)(struct device *, void *); int (*compare_typed)(struct device *, int, void *); void (*release)(struct device *, void *); struct component *component; bool duplicate; }; struct component_match { size_t alloc; size_t num; struct component_match_array *compare; }; struct aggregate_device { struct list_head node; bool bound; const struct component_master_ops *ops; struct device *parent; struct component_match *match; }; struct component { struct list_head node; struct aggregate_device *adev; bool bound; const struct component_ops *ops; int subcomponent; struct device *dev; }; static DEFINE_MUTEX(component_mutex); static LIST_HEAD(component_list); static LIST_HEAD(aggregate_devices); #ifdef CONFIG_DEBUG_FS static struct dentry *component_debugfs_dir; static int component_devices_show(struct seq_file *s, void *data) { struct aggregate_device *m = s->private; struct component_match *match = m->match; size_t i; mutex_lock(&component_mutex); seq_printf(s, "%-50s %20s\n", "aggregate_device name", "status"); seq_puts(s, "-----------------------------------------------------------------------\n"); seq_printf(s, "%-50s %20s\n\n", dev_name(m->parent), m->bound ? "bound" : "not bound"); seq_printf(s, "%-50s %20s\n", "device name", "status"); seq_puts(s, "-----------------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { struct component *component = match->compare[i].component; seq_printf(s, "%-50s %20s\n", component ? dev_name(component->dev) : "(unknown)", component ? (component->bound ? "bound" : "not bound") : "not registered"); } mutex_unlock(&component_mutex); return 0; } DEFINE_SHOW_ATTRIBUTE(component_devices); static int __init component_debug_init(void) { component_debugfs_dir = debugfs_create_dir("device_component", NULL); return 0; } core_initcall(component_debug_init); static void component_debugfs_add(struct aggregate_device *m) { debugfs_create_file(dev_name(m->parent), 0444, component_debugfs_dir, m, &component_devices_fops); } static void component_debugfs_del(struct aggregate_device *m) { debugfs_lookup_and_remove(dev_name(m->parent), component_debugfs_dir); } #else static void component_debugfs_add(struct aggregate_device *m) { } static void component_debugfs_del(struct aggregate_device *m) { } #endif static struct aggregate_device *__aggregate_find(struct device *parent, const struct component_master_ops *ops) { struct aggregate_device *m; list_for_each_entry(m, &aggregate_devices, node) if (m->parent == parent && (!ops || m->ops == ops)) return m; return NULL; } static struct component *find_component(struct aggregate_device *adev, struct component_match_array *mc) { struct component *c; list_for_each_entry(c, &component_list, node) { if (c->adev && c->adev != adev) continue; if (mc->compare && mc->compare(c->dev, mc->data)) return c; if (mc->compare_typed && mc->compare_typed(c->dev, c->subcomponent, mc->data)) return c; } return NULL; } static int find_components(struct aggregate_device *adev) { struct component_match *match = adev->match; size_t i; int ret = 0; /* * Scan the array of match functions and attach * any components which are found to this adev. */ for (i = 0; i < match->num; i++) { struct component_match_array *mc = &match->compare[i]; struct component *c; dev_dbg(adev->parent, "Looking for component %zu\n", i); if (match->compare[i].component) continue; c = find_component(adev, mc); if (!c) { ret = -ENXIO; break; } dev_dbg(adev->parent, "found component %s, duplicate %u\n", dev_name(c->dev), !!c->adev); /* Attach this component to the adev */ match->compare[i].duplicate = !!c->adev; match->compare[i].component = c; c->adev = adev; } return ret; } /* Detach component from associated aggregate_device */ static void remove_component(struct aggregate_device *adev, struct component *c) { size_t i; /* Detach the component from this adev. */ for (i = 0; i < adev->match->num; i++) if (adev->match->compare[i].component == c) adev->match->compare[i].component = NULL; } /* * Try to bring up an aggregate device. If component is NULL, we're interested * in this aggregate device, otherwise it's a component which must be present * to try and bring up the aggregate device. * * Returns 1 for successful bringup, 0 if not ready, or -ve errno. */ static int try_to_bring_up_aggregate_device(struct aggregate_device *adev, struct component *component) { int ret; dev_dbg(adev->parent, "trying to bring up adev\n"); if (find_components(adev)) { dev_dbg(adev->parent, "master has incomplete components\n"); return 0; } if (component && component->adev != adev) { dev_dbg(adev->parent, "master is not for this component (%s)\n", dev_name(component->dev)); return 0; } if (!devres_open_group(adev->parent, adev, GFP_KERNEL)) return -ENOMEM; /* Found all components */ ret = adev->ops->bind(adev->parent); if (ret < 0) { devres_release_group(adev->parent, NULL); if (ret != -EPROBE_DEFER) dev_info(adev->parent, "adev bind failed: %d\n", ret); return ret; } devres_close_group(adev->parent, NULL); adev->bound = true; return 1; } static int try_to_bring_up_masters(struct component *component) { struct aggregate_device *adev; int ret = 0; list_for_each_entry(adev, &aggregate_devices, node) { if (!adev->bound) { ret = try_to_bring_up_aggregate_device(adev, component); if (ret != 0) break; } } return ret; } static void take_down_aggregate_device(struct aggregate_device *adev) { if (adev->bound) { adev->ops->unbind(adev->parent); devres_release_group(adev->parent, adev); adev->bound = false; } } /** * component_compare_of - A common component compare function for of_node * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is device of_node. e.g. * component_match_add_release(masterdev, &match, component_release_of, * component_compare_of, component_dev_of_node) */ int component_compare_of(struct device *dev, void *data) { return device_match_of_node(dev, data); } EXPORT_SYMBOL_GPL(component_compare_of); /** * component_release_of - A common component release function for of_node * @dev: component device * @data: @compare_data from component_match_add_release() * * About the example, Please see component_compare_of(). */ void component_release_of(struct device *dev, void *data) { of_node_put(data); } EXPORT_SYMBOL_GPL(component_release_of); /** * component_compare_dev - A common component compare function for dev * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is struce device. e.g. * component_match_add(masterdev, &match, component_compare_dev, component_dev) */ int component_compare_dev(struct device *dev, void *data) { return dev == data; } EXPORT_SYMBOL_GPL(component_compare_dev); /** * component_compare_dev_name - A common component compare function for device name * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is device name string. e.g. * component_match_add(masterdev, &match, component_compare_dev_name, * "component_dev_name") */ int component_compare_dev_name(struct device *dev, void *data) { return device_match_name(dev, data); } EXPORT_SYMBOL_GPL(component_compare_dev_name); static void devm_component_match_release(struct device *parent, void *res) { struct component_match *match = res; unsigned int i; for (i = 0; i < match->num; i++) { struct component_match_array *mc = &match->compare[i]; if (mc->release) mc->release(parent, mc->data); } kfree(match->compare); } static int component_match_realloc(struct component_match *match, size_t num) { struct component_match_array *new; if (match->alloc == num) return 0; new = kmalloc_array(num, sizeof(*new), GFP_KERNEL); if (!new) return -ENOMEM; if (match->compare) { memcpy(new, match->compare, sizeof(*new) * min(match->num, num)); kfree(match->compare); } match->compare = new; match->alloc = num; return 0; } static void __component_match_add(struct device *parent, struct component_match **matchptr, void (*release)(struct device *, void *), int (*compare)(struct device *, void *), int (*compare_typed)(struct device *, int, void *), void *compare_data) { struct component_match *match = *matchptr; if (IS_ERR(match)) return; if (!match) { match = devres_alloc(devm_component_match_release, sizeof(*match), GFP_KERNEL); if (!match) { *matchptr = ERR_PTR(-ENOMEM); return; } devres_add(parent, match); *matchptr = match; } if (match->num == match->alloc) { size_t new_size = match->alloc + 16; int ret; ret = component_match_realloc(match, new_size); if (ret) { *matchptr = ERR_PTR(ret); return; } } match->compare[match->num].compare = compare; match->compare[match->num].compare_typed = compare_typed; match->compare[match->num].release = release; match->compare[match->num].data = compare_data; match->compare[match->num].component = NULL; match->num++; } /** * component_match_add_release - add a component match entry with release callback * @parent: parent device of the aggregate driver * @matchptr: pointer to the list of component matches * @release: release function for @compare_data * @compare: compare function to match against all components * @compare_data: opaque pointer passed to the @compare function * * Adds a new component match to the list stored in @matchptr, which the * aggregate driver needs to function. The list of component matches pointed to * by @matchptr must be initialized to NULL before adding the first match. This * only matches against components added with component_add(). * * The allocated match list in @matchptr is automatically released using devm * actions, where upon @release will be called to free any references held by * @compare_data, e.g. when @compare_data is a &device_node that must be * released with of_node_put(). * * See also component_match_add() and component_match_add_typed(). */ void component_match_add_release(struct device *parent, struct component_match **matchptr, void (*release)(struct device *, void *), int (*compare)(struct device *, void *), void *compare_data) { __component_match_add(parent, matchptr, release, compare, NULL, compare_data); } EXPORT_SYMBOL(component_match_add_release); /** * component_match_add_typed - add a component match entry for a typed component * @parent: parent device of the aggregate driver * @matchptr: pointer to the list of component matches * @compare_typed: compare function to match against all typed components * @compare_data: opaque pointer passed to the @compare function * * Adds a new component match to the list stored in @matchptr, which the * aggregate driver needs to function. The list of component matches pointed to * by @matchptr must be initialized to NULL before adding the first match. This * only matches against components added with component_add_typed(). * * The allocated match list in @matchptr is automatically released using devm * actions. * * See also component_match_add_release() and component_match_add_typed(). */ void component_match_add_typed(struct device *parent, struct component_match **matchptr, int (*compare_typed)(struct device *, int, void *), void *compare_data) { __component_match_add(parent, matchptr, NULL, NULL, compare_typed, compare_data); } EXPORT_SYMBOL(component_match_add_typed); static void free_aggregate_device(struct aggregate_device *adev) { struct component_match *match = adev->match; int i; component_debugfs_del(adev); list_del(&adev->node); if (match) { for (i = 0; i < match->num; i++) { struct component *c = match->compare[i].component; if (c) c->adev = NULL; } } kfree(adev); } /** * component_master_add_with_match - register an aggregate driver * @parent: parent device of the aggregate driver * @ops: callbacks for the aggregate driver * @match: component match list for the aggregate driver * * Registers a new aggregate driver consisting of the components added to @match * by calling one of the component_match_add() functions. Once all components in * @match are available, it will be assembled by calling * &component_master_ops.bind from @ops. Must be unregistered by calling * component_master_del(). */ int component_master_add_with_match(struct device *parent, const struct component_master_ops *ops, struct component_match *match) { struct aggregate_device *adev; int ret; /* Reallocate the match array for its true size */ ret = component_match_realloc(match, match->num); if (ret) return ret; adev = kzalloc(sizeof(*adev), GFP_KERNEL); if (!adev) return -ENOMEM; adev->parent = parent; adev->ops = ops; adev->match = match; component_debugfs_add(adev); /* Add to the list of available aggregate devices. */ mutex_lock(&component_mutex); list_add(&adev->node, &aggregate_devices); ret = try_to_bring_up_aggregate_device(adev, NULL); if (ret < 0) free_aggregate_device(adev); mutex_unlock(&component_mutex); return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(component_master_add_with_match); /** * component_master_del - unregister an aggregate driver * @parent: parent device of the aggregate driver * @ops: callbacks for the aggregate driver * * Unregisters an aggregate driver registered with * component_master_add_with_match(). If necessary the aggregate driver is first * disassembled by calling &component_master_ops.unbind from @ops. */ void component_master_del(struct device *parent, const struct component_master_ops *ops) { struct aggregate_device *adev; mutex_lock(&component_mutex); adev = __aggregate_find(parent, ops); if (adev) { take_down_aggregate_device(adev); free_aggregate_device(adev); } mutex_unlock(&component_mutex); } EXPORT_SYMBOL_GPL(component_master_del); bool component_master_is_bound(struct device *parent, const struct component_master_ops *ops) { struct aggregate_device *adev; guard(mutex)(&component_mutex); adev = __aggregate_find(parent, ops); if (!adev) return 0; return adev->bound; } EXPORT_SYMBOL_GPL(component_master_is_bound); static void component_unbind(struct component *component, struct aggregate_device *adev, void *data) { WARN_ON(!component->bound); dev_dbg(adev->parent, "unbinding %s component %p (ops %ps)\n", dev_name(component->dev), component, component->ops); if (component->ops && component->ops->unbind) component->ops->unbind(component->dev, adev->parent, data); component->bound = false; /* Release all resources claimed in the binding of this component */ devres_release_group(component->dev, component); } /** * component_unbind_all - unbind all components of an aggregate driver * @parent: parent device of the aggregate driver * @data: opaque pointer, passed to all components * * Unbinds all components of the aggregate device by passing @data to their * &component_ops.unbind functions. Should be called from * &component_master_ops.unbind. */ void component_unbind_all(struct device *parent, void *data) { struct aggregate_device *adev; struct component *c; size_t i; WARN_ON(!mutex_is_locked(&component_mutex)); adev = __aggregate_find(parent, NULL); if (!adev) return; /* Unbind components in reverse order */ for (i = adev->match->num; i--; ) if (!adev->match->compare[i].duplicate) { c = adev->match->compare[i].component; component_unbind(c, adev, data); } } EXPORT_SYMBOL_GPL(component_unbind_all); static int component_bind(struct component *component, struct aggregate_device *adev, void *data) { int ret; /* * Each component initialises inside its own devres group. * This allows us to roll-back a failed component without * affecting anything else. */ if (!devres_open_group(adev->parent, NULL, GFP_KERNEL)) return -ENOMEM; /* * Also open a group for the device itself: this allows us * to release the resources claimed against the sub-device * at the appropriate moment. */ if (!devres_open_group(component->dev, component, GFP_KERNEL)) { devres_release_group(adev->parent, NULL); return -ENOMEM; } dev_dbg(adev->parent, "binding %s (ops %ps)\n", dev_name(component->dev), component->ops); ret = component->ops->bind(component->dev, adev->parent, data); if (!ret) { component->bound = true; /* * Close the component device's group so that resources * allocated in the binding are encapsulated for removal * at unbind. Remove the group on the DRM device as we * can clean those resources up independently. */ devres_close_group(component->dev, NULL); devres_remove_group(adev->parent, NULL); dev_info(adev->parent, "bound %s (ops %ps)\n", dev_name(component->dev), component->ops); } else { devres_release_group(component->dev, NULL); devres_release_group(adev->parent, NULL); if (ret != -EPROBE_DEFER) dev_err(adev->parent, "failed to bind %s (ops %ps): %d\n", dev_name(component->dev), component->ops, ret); } return ret; } /** * component_bind_all - bind all components of an aggregate driver * @parent: parent device of the aggregate driver * @data: opaque pointer, passed to all components * * Binds all components of the aggregate @dev by passing @data to their * &component_ops.bind functions. Should be called from * &component_master_ops.bind. */ int component_bind_all(struct device *parent, void *data) { struct aggregate_device *adev; struct component *c; size_t i; int ret = 0; WARN_ON(!mutex_is_locked(&component_mutex)); adev = __aggregate_find(parent, NULL); if (!adev) return -EINVAL; /* Bind components in match order */ for (i = 0; i < adev->match->num; i++) if (!adev->match->compare[i].duplicate) { c = adev->match->compare[i].component; ret = component_bind(c, adev, data); if (ret) break; } if (ret != 0) { for (; i > 0; i--) if (!adev->match->compare[i - 1].duplicate) { c = adev->match->compare[i - 1].component; component_unbind(c, adev, data); } } return ret; } EXPORT_SYMBOL_GPL(component_bind_all); static int __component_add(struct device *dev, const struct component_ops *ops, int subcomponent) { struct component *component; int ret; component = kzalloc(sizeof(*component), GFP_KERNEL); if (!component) return -ENOMEM; component->ops = ops; component->dev = dev; component->subcomponent = subcomponent; dev_dbg(dev, "adding component (ops %ps)\n", ops); mutex_lock(&component_mutex); list_add_tail(&component->node, &component_list); ret = try_to_bring_up_masters(component); if (ret < 0) { if (component->adev) remove_component(component->adev, component); list_del(&component->node); kfree(component); } mutex_unlock(&component_mutex); return ret < 0 ? ret : 0; } /** * component_add_typed - register a component * @dev: component device * @ops: component callbacks * @subcomponent: nonzero identifier for subcomponents * * Register a new component for @dev. Functions in @ops will be call when the * aggregate driver is ready to bind the overall driver by calling * component_bind_all(). See also &struct component_ops. * * @subcomponent must be nonzero and is used to differentiate between multiple * components registered on the same device @dev. These components are match * using component_match_add_typed(). * * The component needs to be unregistered at driver unload/disconnect by * calling component_del(). * * See also component_add(). */ int component_add_typed(struct device *dev, const struct component_ops *ops, int subcomponent) { if (WARN_ON(subcomponent == 0)) return -EINVAL; return __component_add(dev, ops, subcomponent); } EXPORT_SYMBOL_GPL(component_add_typed); /** * component_add - register a component * @dev: component device * @ops: component callbacks * * Register a new component for @dev. Functions in @ops will be called when the * aggregate driver is ready to bind the overall driver by calling * component_bind_all(). See also &struct component_ops. * * The component needs to be unregistered at driver unload/disconnect by * calling component_del(). * * See also component_add_typed() for a variant that allows multiple different * components on the same device. */ int component_add(struct device *dev, const struct component_ops *ops) { return __component_add(dev, ops, 0); } EXPORT_SYMBOL_GPL(component_add); /** * component_del - unregister a component * @dev: component device * @ops: component callbacks * * Unregister a component added with component_add(). If the component is bound * into an aggregate driver, this will force the entire aggregate driver, including * all its components, to be unbound. */ void component_del(struct device *dev, const struct component_ops *ops) { struct component *c, *component = NULL; mutex_lock(&component_mutex); list_for_each_entry(c, &component_list, node) if (c->dev == dev && c->ops == ops) { list_del(&c->node); component = c; break; } if (component && component->adev) { take_down_aggregate_device(component->adev); remove_component(component->adev, component); } mutex_unlock(&component_mutex); WARN_ON(!component); kfree(component); } EXPORT_SYMBOL_GPL(component_del);
1 1 1 1 6 1 5 2 2 3 3 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 /* * ipheth.c - Apple iPhone USB Ethernet driver * * Copyright (c) 2009 Diego Giagio <diego@giagio.com> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of GIAGIO.COM nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * * Attention: iPhone device must be paired, otherwise it won't respond to our * driver. For more info: http://giagio.com/wiki/moin.cgi/iPhoneEthernetDriver * */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/usb.h> #include <linux/workqueue.h> #include <linux/usb/cdc.h> #define USB_VENDOR_APPLE 0x05ac #define IPHETH_USBINTF_CLASS 255 #define IPHETH_USBINTF_SUBCLASS 253 #define IPHETH_USBINTF_PROTO 1 #define IPHETH_IP_ALIGN 2 /* padding at front of URB */ /* On iOS devices, NCM headers in RX have a fixed size regardless of DPE count: * - NTH16 (NCMH): 12 bytes, as per CDC NCM 1.0 spec * - NDP16 (NCM0): 96 bytes, of which * - NDP16 fixed header: 8 bytes * - maximum of 22 DPEs (21 datagrams + trailer), 4 bytes each */ #define IPHETH_NDP16_MAX_DPE 22 #define IPHETH_NDP16_HEADER_SIZE (sizeof(struct usb_cdc_ncm_ndp16) + \ IPHETH_NDP16_MAX_DPE * \ sizeof(struct usb_cdc_ncm_dpe16)) #define IPHETH_NCM_HEADER_SIZE (sizeof(struct usb_cdc_ncm_nth16) + \ IPHETH_NDP16_HEADER_SIZE) #define IPHETH_TX_BUF_SIZE ETH_FRAME_LEN #define IPHETH_RX_BUF_SIZE_LEGACY (IPHETH_IP_ALIGN + ETH_FRAME_LEN) #define IPHETH_RX_BUF_SIZE_NCM 65536 #define IPHETH_TX_TIMEOUT (5 * HZ) #define IPHETH_INTFNUM 2 #define IPHETH_ALT_INTFNUM 1 #define IPHETH_CTRL_ENDP 0x00 #define IPHETH_CTRL_BUF_SIZE 0x40 #define IPHETH_CTRL_TIMEOUT (5 * HZ) #define IPHETH_CMD_GET_MACADDR 0x00 #define IPHETH_CMD_ENABLE_NCM 0x04 #define IPHETH_CMD_CARRIER_CHECK 0x45 #define IPHETH_CARRIER_CHECK_TIMEOUT round_jiffies_relative(1 * HZ) #define IPHETH_CARRIER_ON 0x04 static const struct usb_device_id ipheth_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(USB_VENDOR_APPLE, IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, IPHETH_USBINTF_PROTO) }, { } }; MODULE_DEVICE_TABLE(usb, ipheth_table); struct ipheth_device { struct usb_device *udev; struct usb_interface *intf; struct net_device *net; struct urb *tx_urb; struct urb *rx_urb; unsigned char *tx_buf; unsigned char *rx_buf; unsigned char *ctrl_buf; u8 bulk_in; u8 bulk_out; struct delayed_work carrier_work; bool confirmed_pairing; int (*rcvbulk_callback)(struct urb *urb); size_t rx_buf_len; }; static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags); static int ipheth_alloc_urbs(struct ipheth_device *iphone) { struct urb *tx_urb = NULL; struct urb *rx_urb = NULL; u8 *tx_buf = NULL; u8 *rx_buf = NULL; tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (tx_urb == NULL) goto error_nomem; rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (rx_urb == NULL) goto free_tx_urb; tx_buf = usb_alloc_coherent(iphone->udev, IPHETH_TX_BUF_SIZE, GFP_KERNEL, &tx_urb->transfer_dma); if (tx_buf == NULL) goto free_rx_urb; rx_buf = usb_alloc_coherent(iphone->udev, iphone->rx_buf_len, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; iphone->tx_urb = tx_urb; iphone->rx_urb = rx_urb; iphone->tx_buf = tx_buf; iphone->rx_buf = rx_buf; return 0; free_tx_buf: usb_free_coherent(iphone->udev, IPHETH_TX_BUF_SIZE, tx_buf, tx_urb->transfer_dma); free_rx_urb: usb_free_urb(rx_urb); free_tx_urb: usb_free_urb(tx_urb); error_nomem: return -ENOMEM; } static void ipheth_free_urbs(struct ipheth_device *iphone) { usb_free_coherent(iphone->udev, iphone->rx_buf_len, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_TX_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); usb_free_urb(iphone->rx_urb); usb_free_urb(iphone->tx_urb); } static void ipheth_kill_urbs(struct ipheth_device *dev) { usb_kill_urb(dev->tx_urb); usb_kill_urb(dev->rx_urb); } static int ipheth_consume_skb(char *buf, int len, struct ipheth_device *dev) { struct sk_buff *skb; skb = dev_alloc_skb(len); if (!skb) { dev->net->stats.rx_dropped++; return -ENOMEM; } skb_put_data(skb, buf, len); skb->dev = dev->net; skb->protocol = eth_type_trans(skb, dev->net); dev->net->stats.rx_packets++; dev->net->stats.rx_bytes += len; netif_rx(skb); return 0; } static int ipheth_rcvbulk_callback_legacy(struct urb *urb) { struct ipheth_device *dev; char *buf; int len; dev = urb->context; if (urb->actual_length <= IPHETH_IP_ALIGN) { dev->net->stats.rx_length_errors++; return -EINVAL; } len = urb->actual_length - IPHETH_IP_ALIGN; buf = urb->transfer_buffer + IPHETH_IP_ALIGN; return ipheth_consume_skb(buf, len, dev); } /* In "NCM mode", the iOS device encapsulates RX (phone->computer) traffic * in NCM Transfer Blocks (similarly to CDC NCM). However, unlike reverse * tethering (handled by the `cdc_ncm` driver), regular tethering is not * compliant with the CDC NCM spec, as the device is missing the necessary * descriptors, and TX (computer->phone) traffic is not encapsulated * at all. Thus `ipheth` implements a very limited subset of the spec with * the sole purpose of parsing RX URBs. */ static int ipheth_rcvbulk_callback_ncm(struct urb *urb) { struct usb_cdc_ncm_nth16 *ncmh; struct usb_cdc_ncm_ndp16 *ncm0; struct usb_cdc_ncm_dpe16 *dpe; struct ipheth_device *dev; u16 dg_idx, dg_len; int retval = -EINVAL; char *buf; dev = urb->context; if (urb->actual_length < IPHETH_NCM_HEADER_SIZE) { dev->net->stats.rx_length_errors++; return retval; } ncmh = urb->transfer_buffer; if (ncmh->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN) || /* On iOS, NDP16 directly follows NTH16 */ ncmh->wNdpIndex != cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16))) goto rx_error; ncm0 = urb->transfer_buffer + sizeof(struct usb_cdc_ncm_nth16); if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)) goto rx_error; dpe = ncm0->dpe16; for (int dpe_i = 0; dpe_i < IPHETH_NDP16_MAX_DPE; ++dpe_i, ++dpe) { dg_idx = le16_to_cpu(dpe->wDatagramIndex); dg_len = le16_to_cpu(dpe->wDatagramLength); /* Null DPE must be present after last datagram pointer entry * (3.3.1 USB CDC NCM spec v1.0) */ if (dg_idx == 0 && dg_len == 0) return 0; if (dg_idx < IPHETH_NCM_HEADER_SIZE || dg_idx >= urb->actual_length || dg_len > urb->actual_length - dg_idx) { dev->net->stats.rx_length_errors++; return retval; } buf = urb->transfer_buffer + dg_idx; retval = ipheth_consume_skb(buf, dg_len, dev); if (retval != 0) return retval; } rx_error: dev->net->stats.rx_errors++; return retval; } static void ipheth_rcvbulk_callback(struct urb *urb) { struct ipheth_device *dev; int retval, status; dev = urb->context; if (dev == NULL) return; status = urb->status; switch (status) { case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: case -EPROTO: return; case 0: break; default: dev_err(&dev->intf->dev, "%s: urb status: %d\n", __func__, status); return; } /* iPhone may periodically send URBs with no payload * on the "bulk in" endpoint. It is safe to ignore them. */ if (urb->actual_length == 0) goto rx_submit; /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and * they don't affect driver functionality, okay to drop them. * There is usually just one 4-byte control frame as the very first * URB received from the bulk IN endpoint. */ if (unlikely (urb->actual_length == 4 && ((char *)urb->transfer_buffer)[0] == 0 && ((char *)urb->transfer_buffer)[1] == 1)) goto rx_submit; retval = dev->rcvbulk_callback(urb); if (retval != 0) { dev_err(&dev->intf->dev, "%s: callback retval: %d\n", __func__, retval); } rx_submit: dev->confirmed_pairing = true; ipheth_rx_submit(dev, GFP_ATOMIC); } static void ipheth_sndbulk_callback(struct urb *urb) { struct ipheth_device *dev; int status = urb->status; dev = urb->context; if (dev == NULL) return; if (status != 0 && status != -ENOENT && status != -ECONNRESET && status != -ESHUTDOWN) dev_err(&dev->intf->dev, "%s: urb status: %d\n", __func__, status); if (status == 0) netif_wake_queue(dev->net); else // on URB error, trigger immediate poll schedule_delayed_work(&dev->carrier_work, 0); } static int ipheth_carrier_set(struct ipheth_device *dev) { struct usb_device *udev; int retval; if (!dev->confirmed_pairing) return 0; udev = dev->udev; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, IPHETH_CTRL_ENDP), IPHETH_CMD_CARRIER_CHECK, /* request */ 0xc0, /* request type */ 0x00, /* value */ 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); if (retval <= 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } if ((retval == 1 && dev->ctrl_buf[0] == IPHETH_CARRIER_ON) || (retval >= 2 && dev->ctrl_buf[1] == IPHETH_CARRIER_ON)) { netif_carrier_on(dev->net); if (dev->tx_urb->status != -EINPROGRESS) netif_wake_queue(dev->net); } else { netif_carrier_off(dev->net); netif_stop_queue(dev->net); } return 0; } static void ipheth_carrier_check_work(struct work_struct *work) { struct ipheth_device *dev = container_of(work, struct ipheth_device, carrier_work.work); ipheth_carrier_set(dev); schedule_delayed_work(&dev->carrier_work, IPHETH_CARRIER_CHECK_TIMEOUT); } static int ipheth_get_macaddr(struct ipheth_device *dev) { struct usb_device *udev = dev->udev; struct net_device *net = dev->net; int retval; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, IPHETH_CTRL_ENDP), IPHETH_CMD_GET_MACADDR, /* request */ 0xc0, /* request type */ 0x00, /* value */ 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); if (retval < 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); } else if (retval < ETH_ALEN) { dev_err(&dev->intf->dev, "%s: usb_control_msg: short packet: %d bytes\n", __func__, retval); retval = -EINVAL; } else { eth_hw_addr_set(net, dev->ctrl_buf); retval = 0; } return retval; } static int ipheth_enable_ncm(struct ipheth_device *dev) { struct usb_device *udev = dev->udev; int retval; retval = usb_control_msg(udev, usb_sndctrlpipe(udev, IPHETH_CTRL_ENDP), IPHETH_CMD_ENABLE_NCM, /* request */ 0x41, /* request type */ 0x00, /* value */ 0x02, /* index */ NULL, 0, IPHETH_CTRL_TIMEOUT); dev_info(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) { struct usb_device *udev = dev->udev; int retval; usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), dev->rx_buf, dev->rx_buf_len, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; retval = usb_submit_urb(dev->rx_urb, mem_flags); if (retval) dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); return retval; } static int ipheth_open(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); struct usb_device *udev = dev->udev; int retval = 0; usb_set_interface(udev, IPHETH_INTFNUM, IPHETH_ALT_INTFNUM); retval = ipheth_carrier_set(dev); if (retval) return retval; retval = ipheth_rx_submit(dev, GFP_KERNEL); if (retval) return retval; schedule_delayed_work(&dev->carrier_work, IPHETH_CARRIER_CHECK_TIMEOUT); return retval; } static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); netif_stop_queue(net); cancel_delayed_work_sync(&dev->carrier_work); return 0; } static netdev_tx_t ipheth_tx(struct sk_buff *skb, struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); struct usb_device *udev = dev->udev; int retval; /* Paranoid */ if (skb->len > IPHETH_TX_BUF_SIZE) { WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len); dev->net->stats.tx_dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } memcpy(dev->tx_buf, skb->data, skb->len); usb_fill_bulk_urb(dev->tx_urb, udev, usb_sndbulkpipe(udev, dev->bulk_out), dev->tx_buf, skb->len, ipheth_sndbulk_callback, dev); dev->tx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; netif_stop_queue(net); retval = usb_submit_urb(dev->tx_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); dev->net->stats.tx_errors++; dev_kfree_skb_any(skb); netif_wake_queue(net); } else { dev->net->stats.tx_packets++; dev->net->stats.tx_bytes += skb->len; dev_consume_skb_any(skb); } return NETDEV_TX_OK; } static void ipheth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct ipheth_device *dev = netdev_priv(net); dev_err(&dev->intf->dev, "%s: TX timeout\n", __func__); dev->net->stats.tx_errors++; usb_unlink_urb(dev->tx_urb); } static u32 ipheth_ethtool_op_get_link(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); return netif_carrier_ok(dev->net); } static const struct ethtool_ops ops = { .get_link = ipheth_ethtool_op_get_link }; static const struct net_device_ops ipheth_netdev_ops = { .ndo_open = ipheth_open, .ndo_stop = ipheth_close, .ndo_start_xmit = ipheth_tx, .ndo_tx_timeout = ipheth_tx_timeout, }; static int ipheth_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_host_interface *hintf; struct usb_endpoint_descriptor *endp; struct ipheth_device *dev; struct net_device *netdev; int i; int retval; netdev = alloc_etherdev(sizeof(struct ipheth_device)); if (!netdev) return -ENOMEM; netdev->netdev_ops = &ipheth_netdev_ops; netdev->watchdog_timeo = IPHETH_TX_TIMEOUT; strscpy(netdev->name, "eth%d", sizeof(netdev->name)); dev = netdev_priv(netdev); dev->udev = udev; dev->net = netdev; dev->intf = intf; dev->confirmed_pairing = false; dev->rx_buf_len = IPHETH_RX_BUF_SIZE_LEGACY; dev->rcvbulk_callback = ipheth_rcvbulk_callback_legacy; /* Set up endpoints */ hintf = usb_altnum_to_altsetting(intf, IPHETH_ALT_INTFNUM); if (hintf == NULL) { retval = -ENODEV; dev_err(&intf->dev, "Unable to find alternate settings interface\n"); goto err_endpoints; } for (i = 0; i < hintf->desc.bNumEndpoints; i++) { endp = &hintf->endpoint[i].desc; if (usb_endpoint_is_bulk_in(endp)) dev->bulk_in = endp->bEndpointAddress; else if (usb_endpoint_is_bulk_out(endp)) dev->bulk_out = endp->bEndpointAddress; } if (!(dev->bulk_in && dev->bulk_out)) { retval = -ENODEV; dev_err(&intf->dev, "Unable to find endpoints\n"); goto err_endpoints; } dev->ctrl_buf = kmalloc(IPHETH_CTRL_BUF_SIZE, GFP_KERNEL); if (dev->ctrl_buf == NULL) { retval = -ENOMEM; goto err_alloc_ctrl_buf; } retval = ipheth_get_macaddr(dev); if (retval) goto err_get_macaddr; retval = ipheth_enable_ncm(dev); if (!retval) { dev->rx_buf_len = IPHETH_RX_BUF_SIZE_NCM; dev->rcvbulk_callback = ipheth_rcvbulk_callback_ncm; } INIT_DELAYED_WORK(&dev->carrier_work, ipheth_carrier_check_work); retval = ipheth_alloc_urbs(dev); if (retval) { dev_err(&intf->dev, "error allocating urbs: %d\n", retval); goto err_alloc_urbs; } usb_set_intfdata(intf, dev); SET_NETDEV_DEV(netdev, &intf->dev); netdev->ethtool_ops = &ops; retval = register_netdev(netdev); if (retval) { dev_err(&intf->dev, "error registering netdev: %d\n", retval); retval = -EIO; goto err_register_netdev; } // carrier down and transmit queues stopped until packet from device netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); dev_info(&intf->dev, "Apple iPhone USB Ethernet device attached\n"); return 0; err_register_netdev: ipheth_free_urbs(dev); err_alloc_urbs: err_get_macaddr: kfree(dev->ctrl_buf); err_alloc_ctrl_buf: err_endpoints: free_netdev(netdev); return retval; } static void ipheth_disconnect(struct usb_interface *intf) { struct ipheth_device *dev; dev = usb_get_intfdata(intf); if (dev != NULL) { unregister_netdev(dev->net); ipheth_kill_urbs(dev); ipheth_free_urbs(dev); kfree(dev->ctrl_buf); free_netdev(dev->net); } usb_set_intfdata(intf, NULL); dev_info(&intf->dev, "Apple iPhone USB Ethernet now disconnected\n"); } static struct usb_driver ipheth_driver = { .name = "ipheth", .probe = ipheth_probe, .disconnect = ipheth_disconnect, .id_table = ipheth_table, .disable_hub_initiated_lpm = 1, }; module_usb_driver(ipheth_driver); MODULE_AUTHOR("Diego Giagio <diego@giagio.com>"); MODULE_DESCRIPTION("Apple iPhone USB Ethernet driver"); MODULE_LICENSE("Dual BSD/GPL");
10606 10603 10607 10603 10610 10604 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * internal.h - printk internal definitions */ #include <linux/console.h> #include <linux/percpu.h> #include <linux/types.h> #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) struct ctl_table; void __init printk_sysctl_init(void); int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #else #define printk_sysctl_init() do { } while (0) #endif #define con_printk(lvl, con, fmt, ...) \ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ (con->flags & CON_NBCON) ? "" : "legacy ", \ (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) /* * Identify if legacy printing is forced in a dedicated kthread. If * true, all printing via console lock occurs within a dedicated * legacy printer thread. The only exception is on panic, after the * nbcon consoles have had their chance to print the panic messages * first. */ #ifdef CONFIG_PREEMPT_RT # define force_legacy_kthread() (true) #else # define force_legacy_kthread() (false) #endif #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER #define PRINTK_PREFIX_MAX 48 #else #define PRINTK_PREFIX_MAX 32 #endif /* * the maximum size of a formatted record (i.e. with prefix added * per line and dropped messages or in extended message format) */ #define PRINTK_MESSAGE_MAX 2048 /* the maximum size allowed to be reserved for a record */ #define PRINTKRB_RECORD_MAX 1024 /* Flags for a single printk record. */ enum printk_info_flags { /* always show on console, ignore console_loglevel */ LOG_FORCE_CON = 1, LOG_NEWLINE = 2, /* text ended with a newline */ LOG_CONT = 8, /* text is a fragment of a continuation line */ }; struct printk_ringbuffer; struct dev_printk_info; extern struct printk_ringbuffer *prb; extern bool printk_kthreads_running; extern bool debug_non_panic_cpus; __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args); __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); void __printk_safe_enter(void); void __printk_safe_exit(void); bool printk_percpu_data_ready(void); #define printk_safe_enter_irqsave(flags) \ do { \ local_irq_save(flags); \ __printk_safe_enter(); \ } while (0) #define printk_safe_exit_irqrestore(flags) \ do { \ __printk_safe_exit(); \ local_irq_restore(flags); \ } while (0) void defer_console_output(void); bool is_printk_legacy_deferred(void); bool is_printk_force_console(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); void console_lock_spinning_enable(void); int console_lock_spinning_disable_and_check(int cookie); u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_free(struct console *con); enum nbcon_prio nbcon_get_default_prio(void); void nbcon_atomic_flush_pending(void); bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic); bool nbcon_kthread_create(struct console *con); void nbcon_kthread_stop(struct console *con); void nbcon_kthreads_wake(void); /* * Check if the given console is currently capable and allowed to print * records. Note that this function does not consider the current context, * which can also play a role in deciding if @con can be used to print * records. */ static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) { if (!(flags & CON_ENABLED)) return false; if ((flags & CON_SUSPENDED)) return false; if (flags & CON_NBCON) { /* The write_atomic() callback is optional. */ if (use_atomic && !con->write_atomic) return false; /* * For the !use_atomic case, @printk_kthreads_running is not * checked because the write_thread() callback is also used * via the legacy loop when the printer threads are not * available. */ } else { if (!con->write) return false; } /* * Console drivers may assume that per-cpu resources have been * allocated. So unless they're explicitly marked as being able to * cope (CON_ANYTIME) don't call them until this CPU is officially up. */ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) return false; return true; } /** * nbcon_kthread_wake - Wake up a console printing thread * @con: Console to operate on */ static inline void nbcon_kthread_wake(struct console *con) { /* * Guarantee any new records can be seen by tasks preparing to wait * before this context checks if the rcuwait is empty. * * The full memory barrier in rcuwait_wake_up() pairs with the full * memory barrier within set_current_state() of * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() * adds the waiter but before it has checked the wait condition. * * This pairs with nbcon_kthread_func:A. */ rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ } #else #define PRINTK_PREFIX_MAX 0 #define PRINTK_MESSAGE_MAX 0 #define PRINTKRB_RECORD_MAX 0 #define printk_kthreads_running (false) /* * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so * printk-safe must preserve the existing local IRQ guarantees. */ #define printk_safe_enter_irqsave(flags) local_irq_save(flags) #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } static inline void defer_console_output(void) { } static inline bool is_printk_legacy_deferred(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_free(struct console *con) { } static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } static inline void nbcon_atomic_flush_pending(void) { } static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic) { return false; } static inline void nbcon_kthread_wake(struct console *con) { } static inline void nbcon_kthreads_wake(void) { } static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) { return false; } #endif /* CONFIG_PRINTK */ extern bool have_boot_console; extern bool have_nbcon_console; extern bool have_legacy_console; extern bool legacy_allow_panic_sync; /** * struct console_flush_type - Define available console flush methods * @nbcon_atomic: Flush directly using nbcon_atomic() callback * @nbcon_offload: Offload flush to printer thread * @legacy_direct: Call the legacy loop in this context * @legacy_offload: Offload the legacy loop into IRQ or legacy thread * * Note that the legacy loop also flushes the nbcon consoles. */ struct console_flush_type { bool nbcon_atomic; bool nbcon_offload; bool legacy_direct; bool legacy_offload; }; /* * Identify which console flushing methods should be used in the context of * the caller. */ static inline void printk_get_console_flush_type(struct console_flush_type *ft) { memset(ft, 0, sizeof(*ft)); switch (nbcon_get_default_prio()) { case NBCON_PRIO_NORMAL: if (have_nbcon_console && !have_boot_console) { if (printk_kthreads_running) ft->nbcon_offload = true; else ft->nbcon_atomic = true; } /* Legacy consoles are flushed directly when possible. */ if (have_legacy_console || have_boot_console) { if (!is_printk_legacy_deferred()) ft->legacy_direct = true; else ft->legacy_offload = true; } break; case NBCON_PRIO_EMERGENCY: if (have_nbcon_console && !have_boot_console) ft->nbcon_atomic = true; /* Legacy consoles are flushed directly when possible. */ if (have_legacy_console || have_boot_console) { if (!is_printk_legacy_deferred()) ft->legacy_direct = true; else ft->legacy_offload = true; } break; case NBCON_PRIO_PANIC: /* * In panic, the nbcon consoles will directly print. But * only allowed if there are no boot consoles. */ if (have_nbcon_console && !have_boot_console) ft->nbcon_atomic = true; if (have_legacy_console || have_boot_console) { /* * This is the same decision as NBCON_PRIO_NORMAL * except that offloading never occurs in panic. * * Note that console_flush_on_panic() will flush * legacy consoles anyway, even if unsafe. */ if (!is_printk_legacy_deferred()) ft->legacy_direct = true; /* * In panic, if nbcon atomic printing occurs, * the legacy consoles must remain silent until * explicitly allowed. */ if (ft->nbcon_atomic && !legacy_allow_panic_sync) ft->legacy_direct = false; } break; default: WARN_ON_ONCE(1); break; } } extern struct printk_buffers printk_shared_pbufs; /** * struct printk_buffers - Buffers to read/format/output printk messages. * @outbuf: After formatting, contains text to output. * @scratchbuf: Used as temporary ringbuffer reading and string-print space. */ struct printk_buffers { char outbuf[PRINTK_MESSAGE_MAX]; char scratchbuf[PRINTKRB_RECORD_MAX]; }; /** * struct printk_message - Container for a prepared printk message. * @pbufs: printk buffers used to prepare the message. * @outbuf_len: The length of prepared text in @pbufs->outbuf to output. This * does not count the terminator. A value of 0 means there is * nothing to output and this record should be skipped. * @seq: The sequence number of the record used for @pbufs->outbuf. * @dropped: The number of dropped records from reading @seq. */ struct printk_message { struct printk_buffers *pbufs; unsigned int outbuf_len; u64 seq; unsigned long dropped; }; bool other_cpu_in_panic(void); bool printk_get_next_message(struct printk_message *pmsg, u64 seq, bool is_extended, bool may_supress); #ifdef CONFIG_PRINTK void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); void console_prepend_replay(struct printk_message *pmsg); #endif #ifdef CONFIG_SMP bool is_printk_cpu_sync_owner(void); #else static inline bool is_printk_cpu_sync_owner(void) { return false; } #endif
23 561 660 22 22 661 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 #ifndef _LINUX_GENERIC_RADIX_TREE_H #define _LINUX_GENERIC_RADIX_TREE_H /** * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to * GENRADIX_NODE_SIZE. * * A genradix is defined with the type it will store, like so: * * static GENRADIX(struct foo) foo_genradix; * * The main operations are: * * - genradix_init(radix) - initialize an empty genradix * * - genradix_free(radix) - free all memory owned by the genradix and * reinitialize it * * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning * NULL if that entry does not exist * * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry, * allocating it if necessary * * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix * * The radix tree allocates one page of entries at a time, so entries may exist * that were never explicitly allocated - they will be initialized to all * zeroes. * * Internally, a genradix is just a radix tree of pages, and indexing works in * terms of byte offsets. The wrappers in this header file use sizeof on the * type the radix contains to calculate a byte offset from the index - see * __idx_to_offset. */ #include <asm/page.h> #include <linux/bug.h> #include <linux/limits.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/slab.h> #include <linux/types.h> struct genradix_root; #define GENRADIX_NODE_SHIFT 9 #define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) #define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) /* depth that's needed for a genradix that can address up to ULONG_MAX: */ #define GENRADIX_MAX_DEPTH \ DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT) #define GENRADIX_DEPTH_MASK \ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) static inline int genradix_depth_shift(unsigned depth) { return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth; } /* * Returns size (of data, in bytes) that a tree of a given depth holds: */ static inline size_t genradix_depth_size(unsigned depth) { return 1UL << genradix_depth_shift(depth); } static inline unsigned genradix_root_to_depth(struct genradix_root *r) { return (unsigned long) r & GENRADIX_DEPTH_MASK; } static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) { return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); } struct __genradix { struct genradix_root *root; }; struct genradix_node { union { /* Interior node: */ struct genradix_node *children[GENRADIX_ARY]; /* Leaf: */ u8 data[GENRADIX_NODE_SIZE]; }; }; static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) { return kzalloc(GENRADIX_NODE_SIZE, gfp_mask); } static inline void genradix_free_node(struct genradix_node *node) { kfree(node); } /* * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ #define __GENRADIX_INITIALIZER \ { \ .tree = { \ .root = NULL, \ } \ } /* * We use a 0 size array to stash the type we're storing without taking any * space at runtime - then the various accessor macros can use typeof() to get * to it for casts/sizeof - we also force the alignment so that storing a type * with a ridiculous alignment doesn't blow up the alignment or size of the * genradix. */ #define GENRADIX(_type) \ struct { \ struct __genradix tree; \ _type type[0] __aligned(1); \ } #define DEFINE_GENRADIX(_name, _type) \ GENRADIX(_type) _name = __GENRADIX_INITIALIZER /** * genradix_init - initialize a genradix * @_radix: genradix to initialize * * Does not fail */ #define genradix_init(_radix) \ do { \ *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \ } while (0) void __genradix_free(struct __genradix *); /** * genradix_free: free all memory owned by a genradix * @_radix: the genradix to free * * After freeing, @_radix will be reinitialized and empty */ #define genradix_free(_radix) __genradix_free(&(_radix)->tree) static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE); else BUG_ON(obj_size > GENRADIX_NODE_SIZE); if (!is_power_of_2(obj_size)) { size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size; return (idx / objs_per_page) * GENRADIX_NODE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; } } #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) #define __genradix_objs_per_page(_radix) \ (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0])) #define __genradix_page_remainder(_radix) \ (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0])) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset) { struct genradix_root *r = READ_ONCE(radix->root); struct genradix_node *n = genradix_root_to_node(r); unsigned level = genradix_root_to_depth(r); unsigned shift = genradix_depth_shift(level); if (unlikely(ilog2(offset) >= genradix_depth_shift(level))) return NULL; while (n && shift > GENRADIX_NODE_SHIFT) { shift -= GENRADIX_ARY_SHIFT; n = n->children[offset >> shift]; offset &= (1UL << shift) - 1; } return n ? &n->data[offset] : NULL; } #define genradix_ptr_inlined(_radix, _idx) \ (__genradix_cast(_radix) \ __genradix_ptr_inlined(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx))) void *__genradix_ptr(struct __genradix *, size_t); /** * genradix_ptr - get a pointer to a genradix entry * @_radix: genradix to access * @_idx: index to fetch * * Returns a pointer to entry at @_idx, or NULL if that entry does not exist. */ #define genradix_ptr(_radix, _idx) \ (__genradix_cast(_radix) \ __genradix_ptr(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx))) void *__genradix_ptr_alloc(struct __genradix *, size_t, struct genradix_node **, gfp_t); #define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \ (__genradix_cast(_radix) \ (__genradix_ptr_inlined(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx)) ?: \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ NULL, _gfp))) #define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\ (__genradix_cast(_radix) \ (__genradix_ptr_inlined(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx)) ?: \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ _new_node, _gfp))) /** * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it * if necessary * @_radix: genradix to access * @_idx: index to fetch * @_gfp: gfp mask * * Returns a pointer to entry at @_idx, or NULL on allocation failure */ #define genradix_ptr_alloc(_radix, _idx, _gfp) \ (__genradix_cast(_radix) \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ NULL, _gfp)) #define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\ (__genradix_cast(_radix) \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ _new_node, _gfp)) struct genradix_iter { size_t offset; size_t pos; }; /** * genradix_iter_init - initialize a genradix_iter * @_radix: genradix that will be iterated over * @_idx: index to start iterating from */ #define genradix_iter_init(_radix, _idx) \ ((struct genradix_iter) { \ .pos = (_idx), \ .offset = __genradix_idx_to_offset((_radix), (_idx)),\ }) void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); /** * genradix_iter_peek - get first entry at or above iterator's current * position * @_iter: a genradix_iter * @_radix: genradix being iterated over * * If no more entries exist at or above @_iter's current position, returns NULL */ #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ __genradix_objs_per_page(_radix))) void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, size_t, size_t); /** * genradix_iter_peek_prev - get first entry at or below iterator's current * position * @_iter: a genradix_iter * @_radix: genradix being iterated over * * If no more entries exist at or below @_iter's current position, returns NULL */ #define genradix_iter_peek_prev(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ __genradix_objs_per_page(_radix), \ __genradix_obj_size(_radix) + \ __genradix_page_remainder(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { if (iter->offset + obj_size < iter->offset) { iter->offset = SIZE_MAX; iter->pos = SIZE_MAX; return; } iter->offset += obj_size; if (!is_power_of_2(obj_size) && (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE) iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE); iter->pos++; } #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) static inline void __genradix_iter_rewind(struct genradix_iter *iter, size_t obj_size) { if (iter->offset == 0 || iter->offset == SIZE_MAX) { iter->offset = SIZE_MAX; return; } if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0) iter->offset -= GENRADIX_NODE_SIZE % obj_size; iter->offset -= obj_size; iter->pos--; } #define genradix_iter_rewind(_iter, _radix) \ __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ genradix_iter_advance(&_iter, _radix)) /** * genradix_for_each - iterate over entry in a genradix * @_radix: genradix to iterate over * @_iter: a genradix_iter to track current position * @_p: pointer to genradix entry type * * On every iteration, @_p will point to the current entry, and @_iter.pos * will be the current entry's index. */ #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) #define genradix_last_pos(_radix) \ (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1) /** * genradix_for_each_reverse - iterate over entry in a genradix, reverse order * @_radix: genradix to iterate over * @_iter: a genradix_iter to track current position * @_p: pointer to genradix entry type * * On every iteration, @_p will point to the current entry, and @_iter.pos * will be the current entry's index. */ #define genradix_for_each_reverse(_radix, _iter, _p) \ for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ genradix_iter_rewind(&_iter, _radix)) int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** * genradix_prealloc - preallocate entries in a generic radix tree * @_radix: genradix to preallocate * @_nr: number of entries to preallocate * @_gfp: gfp mask * * Returns 0 on success, -ENOMEM on failure */ #define genradix_prealloc(_radix, _nr, _gfp) \ __genradix_prealloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _nr + 1),\ _gfp) #endif /* _LINUX_GENERIC_RADIX_TREE_H */
210 182 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_ENCODER_H__ #define __DRM_ENCODER_H__ #include <linux/list.h> #include <linux/ctype.h> #include <drm/drm_crtc.h> #include <drm/drm_mode.h> #include <drm/drm_mode_object.h> #include <drm/drm_util.h> struct drm_encoder; /** * struct drm_encoder_funcs - encoder controls * * Encoders sit between CRTCs and connectors. */ struct drm_encoder_funcs { /** * @reset: * * Reset encoder hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. */ void (*reset)(struct drm_encoder *encoder); /** * @destroy: * * Clean up encoder resources. This is only called at driver unload time * through drm_mode_config_cleanup() since an encoder cannot be * hotplugged in DRM. */ void (*destroy)(struct drm_encoder *encoder); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the encoder. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_encoder *encoder); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the encoder from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_encoder *encoder); /** * @debugfs_init: * * Allows encoders to create encoder-specific debugfs files. */ void (*debugfs_init)(struct drm_encoder *encoder, struct dentry *root); }; /** * struct drm_encoder - central DRM encoder structure * @dev: parent DRM device * @head: list management * @base: base KMS object * @name: human readable name, can be overwritten by the driver * @funcs: control functions, can be NULL for simple managed encoders * @helper_private: mid-layer private data * * CRTCs drive pixels to encoders, which convert them into signals * appropriate for a given connector or set of connectors. */ struct drm_encoder { struct drm_device *dev; struct list_head head; struct drm_mode_object base; char *name; /** * @encoder_type: * * One of the DRM_MODE_ENCODER_<foo> types in drm_mode.h. The following * encoder types are defined thus far: * * - DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A. * * - DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort. * * - DRM_MODE_ENCODER_LVDS for display panels, or in general any panel * with a proprietary parallel connector. * * - DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video, * Component, SCART). * * - DRM_MODE_ENCODER_VIRTUAL for virtual machine displays * * - DRM_MODE_ENCODER_DSI for panels connected using the DSI serial bus. * * - DRM_MODE_ENCODER_DPI for panels connected using the DPI parallel * bus. * * - DRM_MODE_ENCODER_DPMST for special fake encoders used to allow * mutliple DP MST streams to share one physical encoder. */ int encoder_type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the encoder. */ unsigned index; /** * @possible_crtcs: Bitmask of potential CRTC bindings, using * drm_crtc_index() as the index into the bitfield. The driver must set * the bits for all &drm_crtc objects this encoder can be connected to * before calling drm_dev_register(). * * You will get a WARN if you get this wrong in the driver. * * Note that since CRTC objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_crtcs; /** * @possible_clones: Bitmask of potential sibling encoders for cloning, * using drm_encoder_index() as the index into the bitfield. The driver * must set the bits for all &drm_encoder objects which can clone a * &drm_crtc together with this encoder before calling * drm_dev_register(). Drivers should set the bit representing the * encoder itself, too. Cloning bits should be set such that when two * encoders can be used in a cloned configuration, they both should have * each another bits set. * * As an exception to the above rule if the driver doesn't implement * any cloning it can leave @possible_clones set to 0. The core will * automagically fix this up by setting the bit for the encoder itself. * * You will get a WARN if you get this wrong in the driver. * * Note that since encoder objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_clones; /** * @crtc: Currently bound CRTC, only really meaningful for non-atomic * drivers. Atomic drivers should instead check * &drm_connector_state.crtc. */ struct drm_crtc *crtc; /** * @bridge_chain: Bridges attached to this encoder. Drivers shall not * access this field directly. */ struct list_head bridge_chain; const struct drm_encoder_funcs *funcs; const struct drm_encoder_helper_funcs *helper_private; /** * @debugfs_entry: * * Debugfs directory for this CRTC. */ struct dentry *debugfs_entry; }; #define obj_to_encoder(x) container_of(x, struct drm_encoder, base) __printf(5, 6) int drm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); __printf(5, 6) int drmm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); __printf(6, 7) void *__drmm_encoder_alloc(struct drm_device *dev, size_t size, size_t offset, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); /** * drmm_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @type: the type of the struct which contains struct &drm_encoder * @member: the name of the &drm_encoder within @type * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * Allocates and initializes an encoder. Encoder should be subclassed as part of * driver encoder objects. Cleanup is automatically handled through registering * drm_encoder_cleanup() with drmm_add_action(). * * The @drm_encoder_funcs.destroy hook must be NULL. * * Returns: * Pointer to new encoder, or ERR_PTR on failure. */ #define drmm_encoder_alloc(dev, type, member, funcs, encoder_type, name, ...) \ ((type *)__drmm_encoder_alloc(dev, sizeof(type), \ offsetof(type, member), funcs, \ encoder_type, name, ##__VA_ARGS__)) /** * drmm_plain_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * This is a simplified version of drmm_encoder_alloc(), which only allocates * and returns a struct drm_encoder instance, with no subclassing. * * Returns: * Pointer to the new drm_encoder struct, or ERR_PTR on failure. */ #define drmm_plain_encoder_alloc(dev, funcs, encoder_type, name, ...) \ ((struct drm_encoder *) \ __drmm_encoder_alloc(dev, sizeof(struct drm_encoder), \ 0, funcs, encoder_type, name, ##__VA_ARGS__)) /** * drm_encoder_index - find the index of a registered encoder * @encoder: encoder to find index for * * Given a registered encoder, return the index of that encoder within a DRM * device's list of encoders. */ static inline unsigned int drm_encoder_index(const struct drm_encoder *encoder) { return encoder->index; } /** * drm_encoder_mask - find the mask of a registered encoder * @encoder: encoder to find mask for * * Given a registered encoder, return the mask bit of that encoder for an * encoder's possible_clones field. */ static inline u32 drm_encoder_mask(const struct drm_encoder *encoder) { return 1 << drm_encoder_index(encoder); } /** * drm_encoder_crtc_ok - can a given crtc drive a given encoder? * @encoder: encoder to test * @crtc: crtc to test * * Returns false if @encoder can't be driven by @crtc, true otherwise. */ static inline bool drm_encoder_crtc_ok(struct drm_encoder *encoder, struct drm_crtc *crtc) { return !!(encoder->possible_crtcs & drm_crtc_mask(crtc)); } /** * drm_encoder_find - find a &drm_encoder * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: encoder id * * Returns the encoder with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_encoder *drm_encoder_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_ENCODER); return mo ? obj_to_encoder(mo) : NULL; } void drm_encoder_cleanup(struct drm_encoder *encoder); /** * drm_for_each_encoder_mask - iterate over encoders specified by bitmask * @encoder: the loop cursor * @dev: the DRM device * @encoder_mask: bitmask of encoder indices * * Iterate over all encoders specified by bitmask. */ #define drm_for_each_encoder_mask(encoder, dev, encoder_mask) \ list_for_each_entry((encoder), &(dev)->mode_config.encoder_list, head) \ for_each_if ((encoder_mask) & drm_encoder_mask(encoder)) /** * drm_for_each_encoder - iterate over all encoders * @encoder: the loop cursor * @dev: the DRM device * * Iterate over all encoders of @dev. */ #define drm_for_each_encoder(encoder, dev) \ list_for_each_entry(encoder, &(dev)->mode_config.encoder_list, head) #endif
6 199 198 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* * Copyright (C) 2011-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef DRM_RECT_H #define DRM_RECT_H #include <linux/types.h> /** * DOC: rect utils * * Utility functions to help manage rectangular areas for * clipping, scaling, etc. calculations. */ /** * struct drm_rect - two dimensional rectangle * @x1: horizontal starting coordinate (inclusive) * @x2: horizontal ending coordinate (exclusive) * @y1: vertical starting coordinate (inclusive) * @y2: vertical ending coordinate (exclusive) * * Note that this must match the layout of struct drm_mode_rect or the damage * helpers like drm_atomic_helper_damage_iter_init() break. */ struct drm_rect { int x1, y1, x2, y2; }; /** * DRM_RECT_INIT - initialize a rectangle from x/y/w/h * @x: x coordinate * @y: y coordinate * @w: width * @h: height * * RETURNS: * A new rectangle of the specified size. */ #define DRM_RECT_INIT(x, y, w, h) ((struct drm_rect){ \ .x1 = (x), \ .y1 = (y), \ .x2 = (x) + (w), \ .y2 = (y) + (h) }) /** * DRM_RECT_FMT - printf string for &struct drm_rect */ #define DRM_RECT_FMT "%dx%d%+d%+d" /** * DRM_RECT_ARG - printf arguments for &struct drm_rect * @r: rectangle struct */ #define DRM_RECT_ARG(r) drm_rect_width(r), drm_rect_height(r), (r)->x1, (r)->y1 /** * DRM_RECT_FP_FMT - printf string for &struct drm_rect in 16.16 fixed point */ #define DRM_RECT_FP_FMT "%d.%06ux%d.%06u%+d.%06u%+d.%06u" /** * DRM_RECT_FP_ARG - printf arguments for &struct drm_rect in 16.16 fixed point * @r: rectangle struct * * This is useful for e.g. printing plane source rectangles, which are in 16.16 * fixed point. */ #define DRM_RECT_FP_ARG(r) \ drm_rect_width(r) >> 16, ((drm_rect_width(r) & 0xffff) * 15625) >> 10, \ drm_rect_height(r) >> 16, ((drm_rect_height(r) & 0xffff) * 15625) >> 10, \ (r)->x1 >> 16, (((r)->x1 & 0xffff) * 15625) >> 10, \ (r)->y1 >> 16, (((r)->y1 & 0xffff) * 15625) >> 10 /** * drm_rect_init - initialize the rectangle from x/y/w/h * @r: rectangle * @x: x coordinate * @y: y coordinate * @width: width * @height: height */ static inline void drm_rect_init(struct drm_rect *r, int x, int y, int width, int height) { r->x1 = x; r->y1 = y; r->x2 = x + width; r->y2 = y + height; } /** * drm_rect_adjust_size - adjust the size of the rectangle * @r: rectangle to be adjusted * @dw: horizontal adjustment * @dh: vertical adjustment * * Change the size of rectangle @r by @dw in the horizontal direction, * and by @dh in the vertical direction, while keeping the center * of @r stationary. * * Positive @dw and @dh increase the size, negative values decrease it. */ static inline void drm_rect_adjust_size(struct drm_rect *r, int dw, int dh) { r->x1 -= dw >> 1; r->y1 -= dh >> 1; r->x2 += (dw + 1) >> 1; r->y2 += (dh + 1) >> 1; } /** * drm_rect_translate - translate the rectangle * @r: rectangle to be translated * @dx: horizontal translation * @dy: vertical translation * * Move rectangle @r by @dx in the horizontal direction, * and by @dy in the vertical direction. */ static inline void drm_rect_translate(struct drm_rect *r, int dx, int dy) { r->x1 += dx; r->y1 += dy; r->x2 += dx; r->y2 += dy; } /** * drm_rect_translate_to - translate the rectangle to an absolute position * @r: rectangle to be translated * @x: horizontal position * @y: vertical position * * Move rectangle @r to @x in the horizontal direction, * and to @y in the vertical direction. */ static inline void drm_rect_translate_to(struct drm_rect *r, int x, int y) { drm_rect_translate(r, x - r->x1, y - r->y1); } /** * drm_rect_downscale - downscale a rectangle * @r: rectangle to be downscaled * @horz: horizontal downscale factor * @vert: vertical downscale factor * * Divide the coordinates of rectangle @r by @horz and @vert. */ static inline void drm_rect_downscale(struct drm_rect *r, int horz, int vert) { r->x1 /= horz; r->y1 /= vert; r->x2 /= horz; r->y2 /= vert; } /** * drm_rect_width - determine the rectangle width * @r: rectangle whose width is returned * * RETURNS: * The width of the rectangle. */ static inline int drm_rect_width(const struct drm_rect *r) { return r->x2 - r->x1; } /** * drm_rect_height - determine the rectangle height * @r: rectangle whose height is returned * * RETURNS: * The height of the rectangle. */ static inline int drm_rect_height(const struct drm_rect *r) { return r->y2 - r->y1; } /** * drm_rect_visible - determine if the rectangle is visible * @r: rectangle whose visibility is returned * * RETURNS: * %true if the rectangle is visible, %false otherwise. */ static inline bool drm_rect_visible(const struct drm_rect *r) { return drm_rect_width(r) > 0 && drm_rect_height(r) > 0; } /** * drm_rect_equals - determine if two rectangles are equal * @r1: first rectangle * @r2: second rectangle * * RETURNS: * %true if the rectangles are equal, %false otherwise. */ static inline bool drm_rect_equals(const struct drm_rect *r1, const struct drm_rect *r2) { return r1->x1 == r2->x1 && r1->x2 == r2->x2 && r1->y1 == r2->y1 && r1->y2 == r2->y2; } /** * drm_rect_fp_to_int - Convert a rect in 16.16 fixed point form to int form. * @dst: rect to be stored the converted value * @src: rect in 16.16 fixed point form */ static inline void drm_rect_fp_to_int(struct drm_rect *dst, const struct drm_rect *src) { drm_rect_init(dst, src->x1 >> 16, src->y1 >> 16, drm_rect_width(src) >> 16, drm_rect_height(src) >> 16); } /** * drm_rect_overlap - Check if two rectangles overlap * @a: first rectangle * @b: second rectangle * * RETURNS: * %true if the rectangles overlap, %false otherwise. */ static inline bool drm_rect_overlap(const struct drm_rect *a, const struct drm_rect *b) { return (a->x2 > b->x1 && b->x2 > a->x1 && a->y2 > b->y1 && b->y2 > a->y1); } bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, const struct drm_rect *clip); int drm_rect_calc_hscale(const struct drm_rect *src, const struct drm_rect *dst, int min_hscale, int max_hscale); int drm_rect_calc_vscale(const struct drm_rect *src, const struct drm_rect *dst, int min_vscale, int max_vscale); void drm_rect_debug_print(const char *prefix, const struct drm_rect *r, bool fixed_point); void drm_rect_rotate(struct drm_rect *r, int width, int height, unsigned int rotation); void drm_rect_rotate_inv(struct drm_rect *r, int width, int height, unsigned int rotation); #endif
19 12 59 103 67 109 83 74 51 59 33 2 38 1 38 13 13 15 13 13 13 13 13 13 29 60 41 29 19 50 57 33 3 57 57 14 14 14 14 1 1 1 1 34 1 34 34 34 1 33 3 3 3 2 16 16 15 1 1 23 23 23 23 23 23 23 37 37 32 32 1 32 32 8 2 6 66 64 63 60 63 26 40 40 4 63 63 7 59 3 1 1 62 58 4 1 4 24 51 7 25 56 6 54 29 33 5 5 5 4 5 3 1 1 39 37 41 38 40 2 38 41 41 41 41 38 39 39 1 39 39 41 41 38 4 82 1 82 82 83 1 1 82 30 29 30 30 30 30 30 29 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 1 3 3 7 7 7 2 6 6 5 1 6 74 74 51 1 29 29 1 28 1 1 29 19 19 16 4 4 2 4 16 16 19 18 19 2 16 16 1 3 16 19 11 11 11 11 11 1 1 23 1 2 25 26 26 26 10 1 1 24 22 23 21 2 2 5 1 4 4 5 5 1 4 14 14 5 5 4 2 2 4 31 26 10 26 26 26 1 22 4 18 18 17 1 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. * * The iopt_pages is the center of the storage and motion of PFNs. Each * iopt_pages represents a logical linear array of full PFNs. The array is 0 * based and has npages in it. Accessors use 'index' to refer to the entry in * this logical array, regardless of its storage location. * * PFNs are stored in a tiered scheme: * 1) iopt_pages::pinned_pfns xarray * 2) An iommu_domain * 3) The origin of the PFNs, i.e. the userspace pointer * * PFN have to be copied between all combinations of tiers, depending on the * configuration. * * When a PFN is taken out of the userspace pointer it is pinned exactly once. * The storage locations of the PFN's index are tracked in the two interval * trees. If no interval includes the index then it is not pinned. * * If access_itree includes the PFN's index then an in-kernel access has * requested the page. The PFN is stored in the xarray so other requestors can * continue to find it. * * If the domains_itree includes the PFN's index then an iommu_domain is storing * the PFN and it can be read back using iommu_iova_to_phys(). To avoid * duplicating storage the xarray is not used if only iommu_domains are using * the PFN's index. * * As a general principle this is designed so that destroy never fails. This * means removing an iommu_domain or releasing a in-kernel access will not fail * due to insufficient memory. In practice this means some cases have to hold * PFNs in the xarray even though they are also being stored in an iommu_domain. * * While the iopt_pages can use an iommu_domain as storage, it does not have an * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages * and reference their own slice of the PFN array, with sub page granularity. * * In this file the term 'last' indicates an inclusive and closed interval, eg * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to * no PFNs. * * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ #include <linux/file.h> #include <linux/highmem.h> #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/kthread.h> #include <linux/overflow.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include "double_span.h" #include "io_pagetable.h" #ifndef CONFIG_IOMMUFD_TEST #define TEMP_MEMORY_LIMIT 65536 #else #define TEMP_MEMORY_LIMIT iommufd_test_memory_limit #endif #define BATCH_BACKUP_SIZE 32 /* * More memory makes pin_user_pages() and the batching more efficient, but as * this is only a performance optimization don't try too hard to get it. A 64k * allocation can hold about 26M of 4k pages and 13G of 2M pages in an * pfn_batch. Various destroy paths cannot fail and provide a small amount of * stack memory as a backup contingency. If backup_len is given this cannot * fail. */ static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) { void *res; if (WARN_ON(*size == 0)) return NULL; if (*size < backup_len) return backup; if (!backup && iommufd_should_fail()) return NULL; *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (res) return res; *size = PAGE_SIZE; if (backup_len) { res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (res) return res; *size = backup_len; return backup; } return kmalloc(*size, GFP_KERNEL); } void interval_tree_double_span_iter_update( struct interval_tree_double_span_iter *iter) { unsigned long last_hole = ULONG_MAX; unsigned int i; for (i = 0; i != ARRAY_SIZE(iter->spans); i++) { if (interval_tree_span_iter_done(&iter->spans[i])) { iter->is_used = -1; return; } if (iter->spans[i].is_hole) { last_hole = min(last_hole, iter->spans[i].last_hole); continue; } iter->is_used = i + 1; iter->start_used = iter->spans[i].start_used; iter->last_used = min(iter->spans[i].last_used, last_hole); return; } iter->is_used = 0; iter->start_hole = iter->spans[0].start_hole; iter->last_hole = min(iter->spans[0].last_hole, iter->spans[1].last_hole); } void interval_tree_double_span_iter_first( struct interval_tree_double_span_iter *iter, struct rb_root_cached *itree1, struct rb_root_cached *itree2, unsigned long first_index, unsigned long last_index) { unsigned int i; iter->itrees[0] = itree1; iter->itrees[1] = itree2; for (i = 0; i != ARRAY_SIZE(iter->spans); i++) interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i], first_index, last_index); interval_tree_double_span_iter_update(iter); } void interval_tree_double_span_iter_next( struct interval_tree_double_span_iter *iter) { unsigned int i; if (iter->is_used == -1 || iter->last_hole == iter->spans[0].last_index) { iter->is_used = -1; return; } for (i = 0; i != ARRAY_SIZE(iter->spans); i++) interval_tree_span_iter_advance( &iter->spans[i], iter->itrees[i], iter->last_hole + 1); interval_tree_double_span_iter_update(iter); } static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) { int rc; rc = check_add_overflow(pages->npinned, npages, &pages->npinned); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) { int rc; rc = check_sub_overflow(pages->npinned, npages, &pages->npinned); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_err_unpin(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **page_list) { unsigned long npages = last_index - start_index + 1; unpin_user_pages(page_list, npages); iopt_pages_sub_npinned(pages, npages); } /* * index is the number of PAGE_SIZE units from the start of the area's * iopt_pages. If the iova is sub page-size then the area has an iova that * covers a portion of the first and last pages in the range. */ static unsigned long iopt_area_index_to_iova(struct iopt_area *area, unsigned long index) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(index < iopt_area_index(area) || index > iopt_area_last_index(area)); index -= iopt_area_index(area); if (index == 0) return iopt_area_iova(area); return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE; } static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, unsigned long index) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(index < iopt_area_index(area) || index > iopt_area_last_index(area)); if (index == iopt_area_last_index(area)) return iopt_area_last_iova(area); return iopt_area_iova(area) - area->page_offset + (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1; } static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, size_t size) { size_t ret; ret = iommu_unmap(domain, iova, size); /* * It is a logic error in this code or a driver bug if the IOMMU unmaps * something other than exactly as requested. This implies that the * iommu driver may not fail unmap for reasons beyond bad agruments. * Particularly, the iommu driver may not do a memory allocation on the * unmap path. */ WARN_ON(ret != size); } static void iopt_area_unmap_domain_range(struct iopt_area *area, struct iommu_domain *domain, unsigned long start_index, unsigned long last_index) { unsigned long start_iova = iopt_area_index_to_iova(area, start_index); iommu_unmap_nofail(domain, start_iova, iopt_area_index_to_iova_last(area, last_index) - start_iova + 1); } static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, unsigned long index) { struct interval_tree_node *node; node = interval_tree_iter_first(&pages->domains_itree, index, index); if (!node) return NULL; return container_of(node, struct iopt_area, pages_node); } /* * A simple datastructure to hold a vector of PFNs, optimized for contiguous * PFNs. This is used as a temporary holding memory for shuttling pfns from one * place to another. Generally everything is made more efficient if operations * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles, * better cache locality, etc */ struct pfn_batch { unsigned long *pfns; u32 *npfns; unsigned int array_size; unsigned int end; unsigned int total_pfns; }; static void batch_clear(struct pfn_batch *batch) { batch->total_pfns = 0; batch->end = 0; batch->pfns[0] = 0; batch->npfns[0] = 0; } /* * Carry means we carry a portion of the final hugepage over to the front of the * batch */ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) { if (!keep_pfns) return batch_clear(batch); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(!batch->end || batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns); batch->npfns[0] = keep_pfns; batch->end = 1; } static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) { if (!batch->total_pfns) return; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(batch->total_pfns != batch->npfns[0]); skip_pfns = min(batch->total_pfns, skip_pfns); batch->pfns[0] += skip_pfns; batch->npfns[0] -= skip_pfns; batch->total_pfns -= skip_pfns; } static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, size_t backup_len) { const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns); size_t size = max_pages * elmsz; batch->pfns = temp_kmalloc(&size, backup, backup_len); if (!batch->pfns) return -ENOMEM; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz)) return -EINVAL; batch->array_size = size / elmsz; batch->npfns = (u32 *)(batch->pfns + batch->array_size); batch_clear(batch); return 0; } static int batch_init(struct pfn_batch *batch, size_t max_pages) { return __batch_init(batch, max_pages, NULL, 0); } static void batch_init_backup(struct pfn_batch *batch, size_t max_pages, void *backup, size_t backup_len) { __batch_init(batch, max_pages, backup, backup_len); } static void batch_destroy(struct pfn_batch *batch, void *backup) { if (batch->pfns != backup) kfree(batch->pfns); } static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn, u32 nr) { const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); unsigned int end = batch->end; if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] && nr <= MAX_NPFNS - batch->npfns[end - 1]) { batch->npfns[end - 1] += nr; } else if (end < batch->array_size) { batch->pfns[end] = pfn; batch->npfns[end] = nr; batch->end++; } else { return false; } batch->total_pfns += nr; return true; } static void batch_remove_pfn_num(struct pfn_batch *batch, unsigned long nr) { batch->npfns[batch->end - 1] -= nr; if (batch->npfns[batch->end - 1] == 0) batch->end--; batch->total_pfns -= nr; } /* true if the pfn was added, false otherwise */ static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) { return batch_add_pfn_num(batch, pfn, 1); } /* * Fill the batch with pfns from the domain. When the batch is full, or it * reaches last_index, the function will return. The caller should use * batch->total_pfns to determine the starting point for the next iteration. */ static void batch_from_domain(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index) { unsigned int page_offset = 0; unsigned long iova; phys_addr_t phys; iova = iopt_area_index_to_iova(area, start_index); if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { /* * This is pretty slow, it would be nice to get the page size * back from the driver, or have the driver directly fill the * batch. */ phys = iommu_iova_to_phys(domain, iova) - page_offset; if (!batch_add_pfn(batch, PHYS_PFN(phys))) return; iova += PAGE_SIZE - page_offset; page_offset = 0; start_index++; } } static struct page **raw_pages_from_domain(struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index, struct page **out_pages) { unsigned int page_offset = 0; unsigned long iova; phys_addr_t phys; iova = iopt_area_index_to_iova(area, start_index); if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { phys = iommu_iova_to_phys(domain, iova) - page_offset; *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); iova += PAGE_SIZE - page_offset; page_offset = 0; start_index++; } return out_pages; } /* Continues reading a domain until we reach a discontinuity in the pfns. */ static void batch_from_domain_continue(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index) { unsigned int array_size = batch->array_size; batch->array_size = batch->end; batch_from_domain(batch, domain, area, start_index, last_index); batch->array_size = array_size; } /* * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That * mode permits splitting a mapped area up, and then one of the splits is * unmapped. Doing this normally would cause us to violate our invariant of * pairing map/unmap. Thus, to support old VFIO compatibility disable support * for batching consecutive PFNs. All PFNs mapped into the iommu are done in * PAGE_SIZE units, not larger or smaller. */ static int batch_iommu_map_small(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { unsigned long start_iova = iova; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE || size % PAGE_SIZE); while (size) { rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; paddr += PAGE_SIZE; size -= PAGE_SIZE; } return 0; err_unmap: if (start_iova != iova) iommu_unmap_nofail(domain, start_iova, iova - start_iova); return rc; } static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index) { bool disable_large_pages = area->iopt->disable_large_pages; unsigned long last_iova = iopt_area_last_iova(area); unsigned int page_offset = 0; unsigned long start_iova; unsigned long next_iova; unsigned int cur = 0; unsigned long iova; int rc; /* The first index might be a partial page */ if (start_index == iopt_area_index(area)) page_offset = area->page_offset; next_iova = iova = start_iova = iopt_area_index_to_iova(area, start_index); while (cur < batch->end) { next_iova = min(last_iova + 1, next_iova + batch->npfns[cur] * PAGE_SIZE - page_offset); if (disable_large_pages) rc = batch_iommu_map_small( domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, next_iova - iova, area->iommu_prot); else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, next_iova - iova, area->iommu_prot, GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; page_offset = 0; cur++; } return 0; err_unmap: if (start_iova != iova) iommu_unmap_nofail(domain, start_iova, iova - start_iova); return rc; } static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; rcu_read_lock(); while (true) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); if (!batch_add_pfn(batch, xa_to_value(entry)) || start_index == last_index) break; start_index++; } rcu_read_unlock(); } static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; xas_lock(&xas); while (true) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); if (!batch_add_pfn(batch, xa_to_value(entry))) break; xas_store(&xas, NULL); if (start_index == last_index) break; start_index++; } xas_unlock(&xas); } static void clear_xarray(struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; xas_lock(&xas); xas_for_each(&xas, entry, last_index) xas_store(&xas, NULL); xas_unlock(&xas); } static int pages_to_xarray(struct xarray *xa, unsigned long start_index, unsigned long last_index, struct page **pages) { struct page **end_pages = pages + (last_index - start_index) + 1; struct page **half_pages = pages + (end_pages - pages) / 2; XA_STATE(xas, xa, start_index); do { void *old; xas_lock(&xas); while (pages != end_pages) { /* xarray does not participate in fault injection */ if (pages == half_pages && iommufd_should_fail()) { xas_set_err(&xas, -EINVAL); xas_unlock(&xas); /* aka xas_destroy() */ xas_nomem(&xas, GFP_KERNEL); goto err_clear; } old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); if (xas_error(&xas)) break; WARN_ON(old); pages++; xas_next(&xas); } xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); err_clear: if (xas_error(&xas)) { if (xas.xa_index != start_index) clear_xarray(xa, start_index, xas.xa_index - 1); return xas_error(&xas); } return 0; } static void batch_from_pages(struct pfn_batch *batch, struct page **pages, size_t npages) { struct page **end = pages + npages; for (; pages != end; pages++) if (!batch_add_pfn(batch, page_to_pfn(*pages))) break; } static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p, unsigned long *offset_p, unsigned long npages) { int rc = 0; struct folio **folios = *folios_p; unsigned long offset = *offset_p; while (npages) { struct folio *folio = *folios; unsigned long nr = folio_nr_pages(folio) - offset; unsigned long pfn = page_to_pfn(folio_page(folio, offset)); nr = min(nr, npages); npages -= nr; if (!batch_add_pfn_num(batch, pfn, nr)) break; if (nr > 1) { rc = folio_add_pins(folio, nr - 1); if (rc) { batch_remove_pfn_num(batch, nr); goto out; } } folios++; offset = 0; } out: *folios_p = folios; *offset_p = offset; return rc; } static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages, unsigned int first_page_off, size_t npages) { unsigned int cur = 0; while (first_page_off) { if (batch->npfns[cur] > first_page_off) break; first_page_off -= batch->npfns[cur]; cur++; } while (npages) { size_t to_unpin = min_t(size_t, npages, batch->npfns[cur] - first_page_off); unpin_user_page_range_dirty_lock( pfn_to_page(batch->pfns[cur] + first_page_off), to_unpin, pages->writable); iopt_pages_sub_npinned(pages, to_unpin); cur++; first_page_off = 0; npages -= to_unpin; } } static void copy_data_page(struct page *page, void *data, unsigned long offset, size_t length, unsigned int flags) { void *mem; mem = kmap_local_page(page); if (flags & IOMMUFD_ACCESS_RW_WRITE) { memcpy(mem + offset, data, length); set_page_dirty_lock(page); } else { memcpy(data, mem + offset, length); } kunmap_local(mem); } static unsigned long batch_rw(struct pfn_batch *batch, void *data, unsigned long offset, unsigned long length, unsigned int flags) { unsigned long copied = 0; unsigned int npage = 0; unsigned int cur = 0; while (cur < batch->end) { unsigned long bytes = min(length, PAGE_SIZE - offset); copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data, offset, bytes, flags); offset = 0; length -= bytes; data += bytes; copied += bytes; npage++; if (npage == batch->npfns[cur]) { npage = 0; cur++; } if (!length) break; } return copied; } /* pfn_reader_user is just the pin_user_pages() path */ struct pfn_reader_user { struct page **upages; size_t upages_len; unsigned long upages_start; unsigned long upages_end; unsigned int gup_flags; /* * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is * neither */ int locked; /* The following are only valid if file != NULL. */ struct file *file; struct folio **ufolios; size_t ufolios_len; unsigned long ufolios_offset; struct folio **ufolios_next; }; static void pfn_reader_user_init(struct pfn_reader_user *user, struct iopt_pages *pages) { user->upages = NULL; user->upages_len = 0; user->upages_start = 0; user->upages_end = 0; user->locked = -1; user->gup_flags = FOLL_LONGTERM; if (pages->writable) user->gup_flags |= FOLL_WRITE; user->file = (pages->type == IOPT_ADDRESS_FILE) ? pages->file : NULL; user->ufolios = NULL; user->ufolios_len = 0; user->ufolios_next = NULL; user->ufolios_offset = 0; } static void pfn_reader_user_destroy(struct pfn_reader_user *user, struct iopt_pages *pages) { if (user->locked != -1) { if (user->locked) mmap_read_unlock(pages->source_mm); if (!user->file && pages->source_mm != current->mm) mmput(pages->source_mm); user->locked = -1; } kfree(user->upages); user->upages = NULL; kfree(user->ufolios); user->ufolios = NULL; } static long pin_memfd_pages(struct pfn_reader_user *user, unsigned long start, unsigned long npages) { unsigned long i; unsigned long offset; unsigned long npages_out = 0; struct page **upages = user->upages; unsigned long end = start + (npages << PAGE_SHIFT) - 1; long nfolios = user->ufolios_len / sizeof(*user->ufolios); /* * todo: memfd_pin_folios should return the last pinned offset so * we can compute npages pinned, and avoid looping over folios here * if upages == NULL. */ nfolios = memfd_pin_folios(user->file, start, end, user->ufolios, nfolios, &offset); if (nfolios <= 0) return nfolios; offset >>= PAGE_SHIFT; user->ufolios_next = user->ufolios; user->ufolios_offset = offset; for (i = 0; i < nfolios; i++) { struct folio *folio = user->ufolios[i]; unsigned long nr = folio_nr_pages(folio); unsigned long npin = min(nr - offset, npages); npages -= npin; npages_out += npin; if (upages) { if (npin == 1) { *upages++ = folio_page(folio, offset); } else { int rc = folio_add_pins(folio, npin - 1); if (rc) return rc; while (npin--) *upages++ = folio_page(folio, offset++); } } offset = 0; } return npages_out; } static int pfn_reader_user_pin(struct pfn_reader_user *user, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { bool remote_mm = pages->source_mm != current->mm; unsigned long npages = last_index - start_index + 1; unsigned long start; unsigned long unum; uintptr_t uptr; long rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(last_index < start_index)) return -EINVAL; if (!user->file && !user->upages) { /* All undone in pfn_reader_destroy() */ user->upages_len = npages * sizeof(*user->upages); user->upages = temp_kmalloc(&user->upages_len, NULL, 0); if (!user->upages) return -ENOMEM; } if (user->file && !user->ufolios) { user->ufolios_len = npages * sizeof(*user->ufolios); user->ufolios = temp_kmalloc(&user->ufolios_len, NULL, 0); if (!user->ufolios) return -ENOMEM; } if (user->locked == -1) { /* * The majority of usages will run the map task within the mm * providing the pages, so we can optimize into * get_user_pages_fast() */ if (!user->file && remote_mm) { if (!mmget_not_zero(pages->source_mm)) return -EFAULT; } user->locked = 0; } unum = user->file ? user->ufolios_len / sizeof(*user->ufolios) : user->upages_len / sizeof(*user->upages); npages = min_t(unsigned long, npages, unum); if (iommufd_should_fail()) return -EFAULT; if (user->file) { start = pages->start + (start_index * PAGE_SIZE); rc = pin_memfd_pages(user, start, npages); } else if (!remote_mm) { uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); rc = pin_user_pages_fast(uptr, npages, user->gup_flags, user->upages); } else { uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); if (!user->locked) { mmap_read_lock(pages->source_mm); user->locked = 1; } rc = pin_user_pages_remote(pages->source_mm, uptr, npages, user->gup_flags, user->upages, &user->locked); } if (rc <= 0) { if (WARN_ON(!rc)) return -EFAULT; return rc; } iopt_pages_add_npinned(pages, rc); user->upages_start = start_index; user->upages_end = start_index + rc; return 0; } /* This is the "modern" and faster accounting method used by io_uring */ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) { unsigned long lock_limit; unsigned long cur_pages; unsigned long new_pages; lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; cur_pages = atomic_long_read(&pages->source_user->locked_vm); do { new_pages = cur_pages + npages; if (new_pages > lock_limit) return -ENOMEM; } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm, &cur_pages, new_pages)); return 0; } static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) { if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages)) return; atomic_long_sub(npages, &pages->source_user->locked_vm); } /* This is the accounting method used for compatibility with VFIO */ static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages, bool inc, struct pfn_reader_user *user) { bool do_put = false; int rc; if (user && user->locked) { mmap_read_unlock(pages->source_mm); user->locked = 0; /* If we had the lock then we also have a get */ } else if ((!user || (!user->upages && !user->ufolios)) && pages->source_mm != current->mm) { if (!mmget_not_zero(pages->source_mm)) return -EINVAL; do_put = true; } mmap_write_lock(pages->source_mm); rc = __account_locked_vm(pages->source_mm, npages, inc, pages->source_task, false); mmap_write_unlock(pages->source_mm); if (do_put) mmput(pages->source_mm); return rc; } int iopt_pages_update_pinned(struct iopt_pages *pages, unsigned long npages, bool inc, struct pfn_reader_user *user) { int rc = 0; switch (pages->account_mode) { case IOPT_PAGES_ACCOUNT_NONE: break; case IOPT_PAGES_ACCOUNT_USER: if (inc) rc = incr_user_locked_vm(pages, npages); else decr_user_locked_vm(pages, npages); break; case IOPT_PAGES_ACCOUNT_MM: rc = update_mm_locked_vm(pages, npages, inc, user); break; } if (rc) return rc; pages->last_npinned = pages->npinned; if (inc) atomic64_add(npages, &pages->source_mm->pinned_vm); else atomic64_sub(npages, &pages->source_mm->pinned_vm); return 0; } static void update_unpinned(struct iopt_pages *pages) { if (WARN_ON(pages->npinned > pages->last_npinned)) return; if (pages->npinned == pages->last_npinned) return; iopt_pages_update_pinned(pages, pages->last_npinned - pages->npinned, false, NULL); } /* * Changes in the number of pages pinned is done after the pages have been read * and processed. If the user lacked the limit then the error unwind will unpin * everything that was just pinned. This is because it is expensive to calculate * how many pages we have already pinned within a range to generate an accurate * prediction in advance of doing the work to actually pin them. */ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, struct iopt_pages *pages) { unsigned long npages; bool inc; lockdep_assert_held(&pages->mutex); if (pages->npinned == pages->last_npinned) return 0; if (pages->npinned < pages->last_npinned) { npages = pages->last_npinned - pages->npinned; inc = false; } else { if (iommufd_should_fail()) return -ENOMEM; npages = pages->npinned - pages->last_npinned; inc = true; } return iopt_pages_update_pinned(pages, npages, inc, user); } /* * PFNs are stored in three places, in order of preference: * - The iopt_pages xarray. This is only populated if there is a * iopt_pages_access * - The iommu_domain under an area * - The original PFN source, ie pages->source_mm * * This iterator reads the pfns optimizing to load according to the * above order. */ struct pfn_reader { struct iopt_pages *pages; struct interval_tree_double_span_iter span; struct pfn_batch batch; unsigned long batch_start_index; unsigned long batch_end_index; unsigned long last_index; struct pfn_reader_user user; }; static int pfn_reader_update_pinned(struct pfn_reader *pfns) { return pfn_reader_user_update_pinned(&pfns->user, pfns->pages); } /* * The batch can contain a mixture of pages that are still in use and pages that * need to be unpinned. Unpin only pages that are not held anywhere else. */ static void pfn_reader_unpin(struct pfn_reader *pfns) { unsigned long last = pfns->batch_end_index - 1; unsigned long start = pfns->batch_start_index; struct interval_tree_double_span_iter span; struct iopt_pages *pages = pfns->pages; lockdep_assert_held(&pages->mutex); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start, last) { if (span.is_used) continue; batch_unpin(&pfns->batch, pages, span.start_hole - start, span.last_hole - span.start_hole + 1); } } /* Process a single span to load it from the proper storage */ static int pfn_reader_fill_span(struct pfn_reader *pfns) { struct interval_tree_double_span_iter *span = &pfns->span; unsigned long start_index = pfns->batch_end_index; struct pfn_reader_user *user = &pfns->user; unsigned long npages; struct iopt_area *area; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(span->last_used < start_index)) return -EINVAL; if (span->is_used == 1) { batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, start_index, span->last_used); return 0; } if (span->is_used == 2) { /* * Pull as many pages from the first domain we find in the * target span. If it is too small then we will be called again * and we'll find another area. */ area = iopt_pages_find_domain_area(pfns->pages, start_index); if (WARN_ON(!area)) return -EINVAL; /* The storage_domain cannot change without the pages mutex */ batch_from_domain( &pfns->batch, area->storage_domain, area, start_index, min(iopt_area_last_index(area), span->last_used)); return 0; } if (start_index >= pfns->user.upages_end) { rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, span->last_hole); if (rc) return rc; } npages = user->upages_end - start_index; start_index -= user->upages_start; rc = 0; if (!user->file) batch_from_pages(&pfns->batch, user->upages + start_index, npages); else rc = batch_from_folios(&pfns->batch, &user->ufolios_next, &user->ufolios_offset, npages); return rc; } static bool pfn_reader_done(struct pfn_reader *pfns) { return pfns->batch_start_index == pfns->last_index + 1; } static int pfn_reader_next(struct pfn_reader *pfns) { int rc; batch_clear(&pfns->batch); pfns->batch_start_index = pfns->batch_end_index; while (pfns->batch_end_index != pfns->last_index + 1) { unsigned int npfns = pfns->batch.total_pfns; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(interval_tree_double_span_iter_done(&pfns->span))) return -EINVAL; rc = pfn_reader_fill_span(pfns); if (rc) return rc; if (WARN_ON(!pfns->batch.total_pfns)) return -EINVAL; pfns->batch_end_index = pfns->batch_start_index + pfns->batch.total_pfns; if (pfns->batch_end_index == pfns->span.last_used + 1) interval_tree_double_span_iter_next(&pfns->span); /* Batch is full */ if (npfns == pfns->batch.total_pfns) return 0; } return 0; } static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { int rc; lockdep_assert_held(&pages->mutex); pfns->pages = pages; pfns->batch_start_index = start_index; pfns->batch_end_index = start_index; pfns->last_index = last_index; pfn_reader_user_init(&pfns->user, pages); rc = batch_init(&pfns->batch, last_index - start_index + 1); if (rc) return rc; interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree, &pages->domains_itree, start_index, last_index); return 0; } /* * There are many assertions regarding the state of pages->npinned vs * pages->last_pinned, for instance something like unmapping a domain must only * decrement the npinned, and pfn_reader_destroy() must be called only after all * the pins are updated. This is fine for success flows, but error flows * sometimes need to release the pins held inside the pfn_reader before going on * to complete unmapping and releasing pins held in domains. */ static void pfn_reader_release_pins(struct pfn_reader *pfns) { struct iopt_pages *pages = pfns->pages; struct pfn_reader_user *user = &pfns->user; if (user->upages_end > pfns->batch_end_index) { /* Any pages not transferred to the batch are just unpinned */ unsigned long npages = user->upages_end - pfns->batch_end_index; unsigned long start_index = pfns->batch_end_index - user->upages_start; if (!user->file) { unpin_user_pages(user->upages + start_index, npages); } else { long n = user->ufolios_len / sizeof(*user->ufolios); unpin_folios(user->ufolios_next, user->ufolios + n - user->ufolios_next); } iopt_pages_sub_npinned(pages, npages); user->upages_end = pfns->batch_end_index; } if (pfns->batch_start_index != pfns->batch_end_index) { pfn_reader_unpin(pfns); pfns->batch_start_index = pfns->batch_end_index; } } static void pfn_reader_destroy(struct pfn_reader *pfns) { struct iopt_pages *pages = pfns->pages; pfn_reader_release_pins(pfns); pfn_reader_user_destroy(&pfns->user, pfns->pages); batch_destroy(&pfns->batch, NULL); WARN_ON(pages->last_npinned != pages->npinned); } static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(last_index < start_index)) return -EINVAL; rc = pfn_reader_init(pfns, pages, start_index, last_index); if (rc) return rc; rc = pfn_reader_next(pfns); if (rc) { pfn_reader_destroy(pfns); return rc; } return 0; } static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte, unsigned long length, bool writable) { struct iopt_pages *pages; /* * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP * below from overflow */ if (length > SIZE_MAX - PAGE_SIZE || length == 0) return ERR_PTR(-EINVAL); pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); if (!pages) return ERR_PTR(-ENOMEM); kref_init(&pages->kref); xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT); mutex_init(&pages->mutex); pages->source_mm = current->mm; mmgrab(pages->source_mm); pages->npages = DIV_ROUND_UP(length + start_byte, PAGE_SIZE); pages->access_itree = RB_ROOT_CACHED; pages->domains_itree = RB_ROOT_CACHED; pages->writable = writable; if (capable(CAP_IPC_LOCK)) pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; else pages->account_mode = IOPT_PAGES_ACCOUNT_USER; pages->source_task = current->group_leader; get_task_struct(current->group_leader); pages->source_user = get_uid(current_user()); return pages; } struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, unsigned long length, bool writable) { struct iopt_pages *pages; unsigned long end; void __user *uptr_down = (void __user *) ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); if (check_add_overflow((unsigned long)uptr, length, &end)) return ERR_PTR(-EOVERFLOW); pages = iopt_alloc_pages(uptr - uptr_down, length, writable); if (IS_ERR(pages)) return pages; pages->uptr = uptr_down; pages->type = IOPT_ADDRESS_USER; return pages; } struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, unsigned long length, bool writable) { struct iopt_pages *pages; unsigned long start_down = ALIGN_DOWN(start, PAGE_SIZE); unsigned long end; if (length && check_add_overflow(start, length - 1, &end)) return ERR_PTR(-EOVERFLOW); pages = iopt_alloc_pages(start - start_down, length, writable); if (IS_ERR(pages)) return pages; pages->file = get_file(file); pages->start = start_down; pages->type = IOPT_ADDRESS_FILE; return pages; } void iopt_release_pages(struct kref *kref) { struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root)); WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); WARN_ON(pages->npinned); WARN_ON(!xa_empty(&pages->pinned_pfns)); mmdrop(pages->source_mm); mutex_destroy(&pages->mutex); put_task_struct(pages->source_task); free_uid(pages->source_user); if (pages->type == IOPT_ADDRESS_FILE) fput(pages->file); kfree(pages); } static void iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long start_index, unsigned long last_index, unsigned long *unmapped_end_index, unsigned long real_last_index) { while (start_index <= last_index) { unsigned long batch_last_index; if (*unmapped_end_index <= last_index) { unsigned long start = max(start_index, *unmapped_end_index); if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && batch->total_pfns) WARN_ON(*unmapped_end_index - batch->total_pfns != start_index); batch_from_domain(batch, domain, area, start, last_index); batch_last_index = start_index + batch->total_pfns - 1; } else { batch_last_index = last_index; } if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(batch_last_index > real_last_index); /* * unmaps must always 'cut' at a place where the pfns are not * contiguous to pair with the maps that always install * contiguous pages. Thus, if we have to stop unpinning in the * middle of the domains we need to keep reading pfns until we * find a cut point to do the unmap. The pfns we read are * carried over and either skipped or integrated into the next * batch. */ if (batch_last_index == last_index && last_index != real_last_index) batch_from_domain_continue(batch, domain, area, last_index + 1, real_last_index); if (*unmapped_end_index <= batch_last_index) { iopt_area_unmap_domain_range( area, domain, *unmapped_end_index, start_index + batch->total_pfns - 1); *unmapped_end_index = start_index + batch->total_pfns; } /* unpin must follow unmap */ batch_unpin(batch, pages, 0, batch_last_index - start_index + 1); start_index = batch_last_index + 1; batch_clear_carry(batch, *unmapped_end_index - batch_last_index - 1); } } static void __iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long last_index) { struct interval_tree_double_span_iter span; unsigned long start_index = iopt_area_index(area); unsigned long unmapped_end_index = start_index; u64 backup[BATCH_BACKUP_SIZE]; struct pfn_batch batch; lockdep_assert_held(&pages->mutex); /* * For security we must not unpin something that is still DMA mapped, * so this must unmap any IOVA before we go ahead and unpin the pages. * This creates a complexity where we need to skip over unpinning pages * held in the xarray, but continue to unmap from the domain. * * The domain unmap cannot stop in the middle of a contiguous range of * PFNs. To solve this problem the unpinning step will read ahead to the * end of any contiguous span, unmap that whole span, and then only * unpin the leading part that does not have any accesses. The residual * PFNs that were unmapped but not unpinned are called a "carry" in the * batch as they are moved to the front of the PFN list and continue on * to the next iteration(s). */ batch_init_backup(&batch, last_index + 1, backup, sizeof(backup)); interval_tree_for_each_double_span(&span, &pages->domains_itree, &pages->access_itree, start_index, last_index) { if (span.is_used) { batch_skip_carry(&batch, span.last_used - span.start_used + 1); continue; } iopt_area_unpin_domain(&batch, area, pages, domain, span.start_hole, span.last_hole, &unmapped_end_index, last_index); } /* * If the range ends in a access then we do the residual unmap without * any unpins. */ if (unmapped_end_index != last_index + 1) iopt_area_unmap_domain_range(area, domain, unmapped_end_index, last_index); WARN_ON(batch.total_pfns); batch_destroy(&batch, backup); update_unpinned(pages); } static void iopt_area_unfill_partial_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long end_index) { if (end_index != iopt_area_index(area)) __iopt_area_unfill_domain(area, pages, domain, end_index - 1); } /** * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain * @area: The IOVA range to unmap * @domain: The domain to unmap * * The caller must know that unpinning is not required, usually because there * are other domains in the iopt. */ void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) { iommu_unmap_nofail(domain, iopt_area_iova(area), iopt_area_length(area)); } /** * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain * @area: IOVA area to use * @pages: page supplier for the area (area->pages is NULL) * @domain: Domain to unmap from * * The domain should be removed from the domains_itree before calling. The * domain will always be unmapped, but the PFNs may not be unpinned if there are * still accesses. */ void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain) { __iopt_area_unfill_domain(area, pages, domain, iopt_area_last_index(area)); } /** * iopt_area_fill_domain() - Map PFNs from the area into a domain * @area: IOVA area to use * @domain: Domain to load PFNs into * * Read the pfns from the area's underlying iopt_pages and map them into the * given domain. Called when attaching a new domain to an io_pagetable. */ int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) { unsigned long done_end_index; struct pfn_reader pfns; int rc; lockdep_assert_held(&area->pages->mutex); rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), iopt_area_last_index(area)); if (rc) return rc; while (!pfn_reader_done(&pfns)) { done_end_index = pfns.batch_start_index; rc = batch_to_domain(&pfns.batch, domain, area, pfns.batch_start_index); if (rc) goto out_unmap; done_end_index = pfns.batch_end_index; rc = pfn_reader_next(&pfns); if (rc) goto out_unmap; } rc = pfn_reader_update_pinned(&pfns); if (rc) goto out_unmap; goto out_destroy; out_unmap: pfn_reader_release_pins(&pfns); iopt_area_unfill_partial_domain(area, area->pages, domain, done_end_index); out_destroy: pfn_reader_destroy(&pfns); return rc; } /** * iopt_area_fill_domains() - Install PFNs into the area's domains * @area: The area to act on * @pages: The pages associated with the area (area->pages is NULL) * * Called during area creation. The area is freshly created and not inserted in * the domains_itree yet. PFNs are read and loaded into every domain held in the * area's io_pagetable and the area is installed in the domains_itree. * * On failure all domains are left unchanged. */ int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) { unsigned long done_first_end_index; unsigned long done_all_end_index; struct iommu_domain *domain; unsigned long unmap_index; struct pfn_reader pfns; unsigned long index; int rc; lockdep_assert_held(&area->iopt->domains_rwsem); if (xa_empty(&area->iopt->domains)) return 0; mutex_lock(&pages->mutex); rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), iopt_area_last_index(area)); if (rc) goto out_unlock; while (!pfn_reader_done(&pfns)) { done_first_end_index = pfns.batch_end_index; done_all_end_index = pfns.batch_start_index; xa_for_each(&area->iopt->domains, index, domain) { rc = batch_to_domain(&pfns.batch, domain, area, pfns.batch_start_index); if (rc) goto out_unmap; } done_all_end_index = done_first_end_index; rc = pfn_reader_next(&pfns); if (rc) goto out_unmap; } rc = pfn_reader_update_pinned(&pfns); if (rc) goto out_unmap; area->storage_domain = xa_load(&area->iopt->domains, 0); interval_tree_insert(&area->pages_node, &pages->domains_itree); goto out_destroy; out_unmap: pfn_reader_release_pins(&pfns); xa_for_each(&area->iopt->domains, unmap_index, domain) { unsigned long end_index; if (unmap_index < index) end_index = done_first_end_index; else end_index = done_all_end_index; /* * The area is not yet part of the domains_itree so we have to * manage the unpinning specially. The last domain does the * unpin, every other domain is just unmapped. */ if (unmap_index != area->iopt->next_domain_id - 1) { if (end_index != iopt_area_index(area)) iopt_area_unmap_domain_range( area, domain, iopt_area_index(area), end_index - 1); } else { iopt_area_unfill_partial_domain(area, pages, domain, end_index); } } out_destroy: pfn_reader_destroy(&pfns); out_unlock: mutex_unlock(&pages->mutex); return rc; } /** * iopt_area_unfill_domains() - unmap PFNs from the area's domains * @area: The area to act on * @pages: The pages associated with the area (area->pages is NULL) * * Called during area destruction. This unmaps the iova's covered by all the * area's domains and releases the PFNs. */ void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) { struct io_pagetable *iopt = area->iopt; struct iommu_domain *domain; unsigned long index; lockdep_assert_held(&iopt->domains_rwsem); mutex_lock(&pages->mutex); if (!area->storage_domain) goto out_unlock; xa_for_each(&iopt->domains, index, domain) if (domain != area->storage_domain) iopt_area_unmap_domain_range( area, domain, iopt_area_index(area), iopt_area_last_index(area)); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(RB_EMPTY_NODE(&area->pages_node.rb)); interval_tree_remove(&area->pages_node, &pages->domains_itree); iopt_area_unfill_domain(area, pages, area->storage_domain); area->storage_domain = NULL; out_unlock: mutex_unlock(&pages->mutex); } static void iopt_pages_unpin_xarray(struct pfn_batch *batch, struct iopt_pages *pages, unsigned long start_index, unsigned long end_index) { while (start_index <= end_index) { batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index, end_index); batch_unpin(batch, pages, 0, batch->total_pfns); start_index += batch->total_pfns; batch_clear(batch); } } /** * iopt_pages_unfill_xarray() - Update the xarry after removing an access * @pages: The pages to act on * @start_index: Starting PFN index * @last_index: Last PFN index * * Called when an iopt_pages_access is removed, removes pages from the itree. * The access should already be removed from the access_itree. */ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { struct interval_tree_double_span_iter span; u64 backup[BATCH_BACKUP_SIZE]; struct pfn_batch batch; bool batch_inited = false; lockdep_assert_held(&pages->mutex); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start_index, last_index) { if (!span.is_used) { if (!batch_inited) { batch_init_backup(&batch, last_index - start_index + 1, backup, sizeof(backup)); batch_inited = true; } iopt_pages_unpin_xarray(&batch, pages, span.start_hole, span.last_hole); } else if (span.is_used == 2) { /* Covered by a domain */ clear_xarray(&pages->pinned_pfns, span.start_used, span.last_used); } /* Otherwise covered by an existing access */ } if (batch_inited) batch_destroy(&batch, backup); update_unpinned(pages); } /** * iopt_pages_fill_from_xarray() - Fast path for reading PFNs * @pages: The pages to act on * @start_index: The first page index in the range * @last_index: The last page index in the range * @out_pages: The output array to return the pages * * This can be called if the caller is holding a refcount on an * iopt_pages_access that is known to have already been filled. It quickly reads * the pages directly from the xarray. * * This is part of the SW iommu interface to read pages for in-kernel use. */ void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { XA_STATE(xas, &pages->pinned_pfns, start_index); void *entry; rcu_read_lock(); while (start_index <= last_index) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); *(out_pages++) = pfn_to_page(xa_to_value(entry)); start_index++; } rcu_read_unlock(); } static int iopt_pages_fill_from_domain(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { while (start_index != last_index + 1) { unsigned long domain_last; struct iopt_area *area; area = iopt_pages_find_domain_area(pages, start_index); if (WARN_ON(!area)) return -EINVAL; domain_last = min(iopt_area_last_index(area), last_index); out_pages = raw_pages_from_domain(area->storage_domain, area, start_index, domain_last, out_pages); start_index = domain_last + 1; } return 0; } static int iopt_pages_fill(struct iopt_pages *pages, struct pfn_reader_user *user, unsigned long start_index, unsigned long last_index, struct page **out_pages) { unsigned long cur_index = start_index; int rc; while (cur_index != last_index + 1) { user->upages = out_pages + (cur_index - start_index); rc = pfn_reader_user_pin(user, pages, cur_index, last_index); if (rc) goto out_unpin; cur_index = user->upages_end; } return 0; out_unpin: if (start_index != cur_index) iopt_pages_err_unpin(pages, start_index, cur_index - 1, out_pages); return rc; } /** * iopt_pages_fill_xarray() - Read PFNs * @pages: The pages to act on * @start_index: The first page index in the range * @last_index: The last page index in the range * @out_pages: The output array to return the pages, may be NULL * * This populates the xarray and returns the pages in out_pages. As the slow * path this is able to copy pages from other storage tiers into the xarray. * * On failure the xarray is left unchanged. * * This is part of the SW iommu interface to read pages for in-kernel use. */ int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { struct interval_tree_double_span_iter span; unsigned long xa_end = start_index; struct pfn_reader_user user; int rc; lockdep_assert_held(&pages->mutex); pfn_reader_user_init(&user, pages); user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start_index, last_index) { struct page **cur_pages; if (span.is_used == 1) { cur_pages = out_pages + (span.start_used - start_index); iopt_pages_fill_from_xarray(pages, span.start_used, span.last_used, cur_pages); continue; } if (span.is_used == 2) { cur_pages = out_pages + (span.start_used - start_index); iopt_pages_fill_from_domain(pages, span.start_used, span.last_used, cur_pages); rc = pages_to_xarray(&pages->pinned_pfns, span.start_used, span.last_used, cur_pages); if (rc) goto out_clean_xa; xa_end = span.last_used + 1; continue; } /* hole */ cur_pages = out_pages + (span.start_hole - start_index); rc = iopt_pages_fill(pages, &user, span.start_hole, span.last_hole, cur_pages); if (rc) goto out_clean_xa; rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole, span.last_hole, cur_pages); if (rc) { iopt_pages_err_unpin(pages, span.start_hole, span.last_hole, cur_pages); goto out_clean_xa; } xa_end = span.last_hole + 1; } rc = pfn_reader_user_update_pinned(&user, pages); if (rc) goto out_clean_xa; user.upages = NULL; pfn_reader_user_destroy(&user, pages); return 0; out_clean_xa: if (start_index != xa_end) iopt_pages_unfill_xarray(pages, start_index, xa_end - 1); user.upages = NULL; pfn_reader_user_destroy(&user, pages); return rc; } /* * This uses the pfn_reader instead of taking a shortcut by using the mm. It can * do every scenario and is fully consistent with what an iommu_domain would * see. */ static int iopt_pages_rw_slow(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, unsigned long offset, void *data, unsigned long length, unsigned int flags) { struct pfn_reader pfns; int rc; mutex_lock(&pages->mutex); rc = pfn_reader_first(&pfns, pages, start_index, last_index); if (rc) goto out_unlock; while (!pfn_reader_done(&pfns)) { unsigned long done; done = batch_rw(&pfns.batch, data, offset, length, flags); data += done; length -= done; offset = 0; pfn_reader_unpin(&pfns); rc = pfn_reader_next(&pfns); if (rc) goto out_destroy; } if (WARN_ON(length != 0)) rc = -EINVAL; out_destroy: pfn_reader_destroy(&pfns); out_unlock: mutex_unlock(&pages->mutex); return rc; } /* * A medium speed path that still allows DMA inconsistencies, but doesn't do any * memory allocations or interval tree searches. */ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, unsigned long offset, void *data, unsigned long length, unsigned int flags) { struct page *page = NULL; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(pages->type != IOPT_ADDRESS_USER)) return -EINVAL; if (!mmget_not_zero(pages->source_mm)) return iopt_pages_rw_slow(pages, index, index, offset, data, length, flags); if (iommufd_should_fail()) { rc = -EINVAL; goto out_mmput; } mmap_read_lock(pages->source_mm); rc = pin_user_pages_remote( pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), 1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page, NULL); mmap_read_unlock(pages->source_mm); if (rc != 1) { if (WARN_ON(rc >= 0)) rc = -EINVAL; goto out_mmput; } copy_data_page(page, data, offset, length, flags); unpin_user_page(page); rc = 0; out_mmput: mmput(pages->source_mm); return rc; } /** * iopt_pages_rw_access - Copy to/from a linear slice of the pages * @pages: pages to act on * @start_byte: First byte of pages to copy to/from * @data: Kernel buffer to get/put the data * @length: Number of bytes to copy * @flags: IOMMUFD_ACCESS_RW_* flags * * This will find each page in the range, kmap it and then memcpy to/from * the given kernel buffer. */ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, void *data, unsigned long length, unsigned int flags) { unsigned long start_index = start_byte / PAGE_SIZE; unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE; bool change_mm = current->mm != pages->source_mm; int rc = 0; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH)) change_mm = true; if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; if (pages->type == IOPT_ADDRESS_FILE) return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(pages->type != IOPT_ADDRESS_USER)) return -EINVAL; if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { if (start_index == last_index) return iopt_pages_rw_page(pages, start_index, start_byte % PAGE_SIZE, data, length, flags); return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); } /* * Try to copy using copy_to_user(). We do this as a fast path and * ignore any pinning inconsistencies, unlike a real DMA path. */ if (change_mm) { if (!mmget_not_zero(pages->source_mm)) return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); kthread_use_mm(pages->source_mm); } if (flags & IOMMUFD_ACCESS_RW_WRITE) { if (copy_to_user(pages->uptr + start_byte, data, length)) rc = -EFAULT; } else { if (copy_from_user(data, pages->uptr + start_byte, length)) rc = -EFAULT; } if (change_mm) { kthread_unuse_mm(pages->source_mm); mmput(pages->source_mm); } return rc; } static struct iopt_pages_access * iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, unsigned long last) { struct interval_tree_node *node; lockdep_assert_held(&pages->mutex); /* There can be overlapping ranges in this interval tree */ for (node = interval_tree_iter_first(&pages->access_itree, index, last); node; node = interval_tree_iter_next(node, index, last)) if (node->start == index && node->last == last) return container_of(node, struct iopt_pages_access, node); return NULL; } /** * iopt_area_add_access() - Record an in-knerel access for PFNs * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index * @out_pages: Output list of struct page's representing the PFNs * @flags: IOMMUFD_ACCESS_RW_* flags * * Record that an in-kernel access will be accessing the pages, ensure they are * pinned, and return the PFNs as a simple list of 'struct page *'. * * This should be undone through a matching call to iopt_area_remove_access() */ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, unsigned long last_index, struct page **out_pages, unsigned int flags) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; int rc; if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; mutex_lock(&pages->mutex); access = iopt_pages_get_exact_access(pages, start_index, last_index); if (access) { area->num_accesses++; access->users++; iopt_pages_fill_from_xarray(pages, start_index, last_index, out_pages); mutex_unlock(&pages->mutex); return 0; } access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT); if (!access) { rc = -ENOMEM; goto err_unlock; } rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages); if (rc) goto err_free; access->node.start = start_index; access->node.last = last_index; access->users = 1; area->num_accesses++; interval_tree_insert(&access->node, &pages->access_itree); mutex_unlock(&pages->mutex); return 0; err_free: kfree(access); err_unlock: mutex_unlock(&pages->mutex); return rc; } /** * iopt_area_remove_access() - Release an in-kernel access for PFNs * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index * * Undo iopt_area_add_access() and unpin the pages if necessary. The caller * must stop using the PFNs before calling this. */ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, unsigned long last_index) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; mutex_lock(&pages->mutex); access = iopt_pages_get_exact_access(pages, start_index, last_index); if (WARN_ON(!access)) goto out_unlock; WARN_ON(area->num_accesses == 0 || access->users == 0); area->num_accesses--; access->users--; if (access->users) goto out_unlock; interval_tree_remove(&access->node, &pages->access_itree); iopt_pages_unfill_xarray(pages, start_index, last_index); kfree(access); out_unlock: mutex_unlock(&pages->mutex); }
52 94 145 231 108 8 33 265 9 368 4 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp #if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TCP_H #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/sock_diag.h> #include <net/rstreason.h> /* * tcp event with arguments sk and skb * * Note: this class requires a valid sk pointer; while skb pointer could * be NULL. */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); #undef FN #define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason); DEFINE_RST_REASON(FN, FN) #undef FN #undef FNe #define FN(reason) { SK_RST_REASON_##reason, #reason }, #define FNe(reason) { SK_RST_REASON_##reason, #reason } /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(enum sk_rst_reason, reason) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); if (sk && sk_fullsock(sk)) { const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); } else if (skb__nullable) { const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s", __entry->skbaddr, __entry->skaddr, __entry->saddr, __entry->daddr, __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN", __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe))) ); #undef FN #undef FNe /* * tcp event with arguments sk * * Note: this class requires a valid sk pointer. */ DECLARE_EVENT_CLASS(tcp_event_sk, TP_PROTO(struct sock *sk), TP_ARGS(sk), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), TP_ARGS(sk, req), TP_STRUCT__entry( __field(const void *, skaddr) __field(const void *, req) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); __be32 *p32; __entry->skaddr = sk; __entry->req = req; __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); DECLARE_TRACE(tcp_cwnd_reduction_tp, TP_PROTO(const struct sock *sk, int newly_acked_sacked, int newly_lost, int flag), TP_ARGS(sk, newly_acked_sacked, newly_lost, flag) ); #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, TP_PROTO(struct sock *sk, struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) __field(__u32, ssthresh) __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; __entry->family = sk->sk_family; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); __entry->skbaddr = skb; __entry->skaddr = sk; ), TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p", show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->skbaddr, __entry->skaddr) ); /* * tcp event with only skb */ DECLARE_EVENT_CLASS(tcp_event_skb, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field(const void *, skbaddr) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->skbaddr = skb; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); TRACE_EVENT(tcp_cong_state_set, TP_PROTO(struct sock *sk, const u8 ca_state), TP_ARGS(sk, ca_state), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u8, cong_state) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->cong_state = ca_state; ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->cong_state) ); DECLARE_EVENT_CLASS(tcp_hash_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ') ); DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DECLARE_EVENT_CLASS(tcp_ao_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) __field(__u8, keyid) __field(__u8, rnext) __field(__u8, maclen) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; __entry->keyid = keyid; __entry->rnext = rnext; __entry->maclen = maclen; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ', __entry->keyid, __entry->rnext, __entry->maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DECLARE_EVENT_CLASS(tcp_ao_event_sk, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u8, keyid) __field(__u8, rnext) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->keyid = keyid; __entry->rnext = rnext; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->keyid, __entry->rnext) ); DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext) ); DECLARE_EVENT_CLASS(tcp_ao_event_sne, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, new_sne) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->new_sne = new_sne; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
332 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_DRIVER_H #define _LINUX_TTY_DRIVER_H #include <linux/export.h> #include <linux/fs.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/cdev.h> #include <linux/uaccess.h> #include <linux/termios.h> #include <linux/seq_file.h> struct tty_struct; struct tty_driver; struct serial_icounter_struct; struct serial_struct; /** * enum tty_driver_flag -- TTY Driver Flags * * These are flags passed to tty_alloc_driver(). * * @TTY_DRIVER_INSTALLED: * Whether this driver was succesfully installed. This is a tty internal * flag. Do not touch. * * @TTY_DRIVER_RESET_TERMIOS: * Requests the tty layer to reset the termios setting when the last * process has closed the device. Used for PTYs, in particular. * * @TTY_DRIVER_REAL_RAW: * Indicates that the driver will guarantee not to set any special * character handling flags if this is set for the tty: * * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` * * That is, if there is no reason for the driver to * send notifications of parity and break characters up to the line * driver, it won't do so. This allows the line driver to optimize for * this case if this flag is set. (Note that there is also a promise, if * the above case is true, not to signal overruns, either.) * * @TTY_DRIVER_DYNAMIC_DEV: * The individual tty devices need to be registered with a call to * tty_register_device() when the device is found in the system and * unregistered with a call to tty_unregister_device() so the devices will * be show up properly in sysfs. If not set, all &tty_driver.num entries * will be created by the tty core in sysfs when tty_register_driver() is * called. This is to be used by drivers that have tty devices that can * appear and disappear while the main tty driver is registered with the * tty core. * * @TTY_DRIVER_DEVPTS_MEM: * Don't use the standard arrays (&tty_driver.ttys and * &tty_driver.termios), instead use dynamic memory keyed through the * devpts filesystem. This is only applicable to the PTY driver. * * @TTY_DRIVER_HARDWARE_BREAK: * Hardware handles break signals. Pass the requested timeout to the * &tty_operations.break_ctl instead of using a simple on/off interface. * * @TTY_DRIVER_DYNAMIC_ALLOC: * Do not allocate structures which are needed per line for this driver * (&tty_driver.ports) as it would waste memory. The driver will take * care. This is only applicable to the PTY driver. * * @TTY_DRIVER_UNNUMBERED_NODE: * Do not create numbered ``/dev`` nodes. For example, create * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a * driver for a single tty device is being allocated. */ enum tty_driver_flag { TTY_DRIVER_INSTALLED = BIT(0), TTY_DRIVER_RESET_TERMIOS = BIT(1), TTY_DRIVER_REAL_RAW = BIT(2), TTY_DRIVER_DYNAMIC_DEV = BIT(3), TTY_DRIVER_DEVPTS_MEM = BIT(4), TTY_DRIVER_HARDWARE_BREAK = BIT(5), TTY_DRIVER_DYNAMIC_ALLOC = BIT(6), TTY_DRIVER_UNNUMBERED_NODE = BIT(7), }; enum tty_driver_type { TTY_DRIVER_TYPE_SYSTEM, TTY_DRIVER_TYPE_CONSOLE, TTY_DRIVER_TYPE_SERIAL, TTY_DRIVER_TYPE_PTY, TTY_DRIVER_TYPE_SCC, TTY_DRIVER_TYPE_SYSCONS, }; enum tty_driver_subtype { SYSTEM_TYPE_TTY = 1, SYSTEM_TYPE_CONSOLE, SYSTEM_TYPE_SYSCONS, SYSTEM_TYPE_SYSPTMX, PTY_TYPE_MASTER = 1, PTY_TYPE_SLAVE, SERIAL_TYPE_NORMAL = 1, }; /** * struct tty_operations -- interface between driver and tty * * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *, * int idx)`` * * Return the tty device corresponding to @idx, %NULL if there is not * one currently in use and an %ERR_PTR value on error. Called under * %tty_mutex (for now!) * * Optional method. Default behaviour is to use the @self->ttys array. * * @install: ``int ()(struct tty_driver *self, struct tty_struct *tty)`` * * Install a new @tty into the @self's internal tables. Used in * conjunction with @lookup and @remove methods. * * Optional method. Default behaviour is to use the @self->ttys array. * * @remove: ``void ()(struct tty_driver *self, struct tty_struct *tty)`` * * Remove a closed @tty from the @self's internal tables. Used in * conjunction with @lookup and @remove methods. * * Optional method. Default behaviour is to use the @self->ttys array. * * @open: ``int ()(struct tty_struct *tty, struct file *)`` * * This routine is called when a particular @tty device is opened. This * routine is mandatory; if this routine is not filled in, the attempted * open will fail with %ENODEV. * * Required method. Called with tty lock held. May sleep. * * @close: ``void ()(struct tty_struct *tty, struct file *)`` * * This routine is called when a particular @tty device is closed. At the * point of return from this call the driver must make no further ldisc * calls of any kind. * * Remark: called even if the corresponding @open() failed. * * Required method. Called with tty lock held. May sleep. * * @shutdown: ``void ()(struct tty_struct *tty)`` * * This routine is called under the tty lock when a particular @tty device * is closed for the last time. It executes before the @tty resources * are freed so may execute while another function holds a @tty kref. * * @cleanup: ``void ()(struct tty_struct *tty)`` * * This routine is called asynchronously when a particular @tty device * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * * @write: ``ssize_t ()(struct tty_struct *tty, const u8 *buf, size_t count)`` * * This routine is called by the kernel to write a series (@count) of * characters (@buf) to the @tty device. The characters may come from * user space or kernel space. This routine will return the * number of characters actually accepted for writing. * * May occur in parallel in special cases. Because this includes panic * paths drivers generally shouldn't try and do clever locking here. * * Optional: Required for writable devices. May not sleep. * * @put_char: ``int ()(struct tty_struct *tty, u8 ch)`` * * This routine is called by the kernel to write a single character @ch to * the @tty device. If the kernel uses this routine, it must call the * @flush_chars() routine (if defined) when it is done stuffing characters * into the driver. If there is no room in the queue, the character is * ignored. * * Optional: Kernel will use the @write method if not provided. Do not * call this function directly, call tty_put_char(). * * @flush_chars: ``void ()(struct tty_struct *tty)`` * * This routine is called by the kernel after it has written a * series of characters to the tty device using @put_char(). * * Optional. Do not call this function directly, call * tty_driver_flush_chars(). * * @write_room: ``unsigned int ()(struct tty_struct *tty)`` * * This routine returns the numbers of characters the @tty driver * will accept for queuing to be written. This number is subject * to change as output buffers get emptied, or if the output flow * control is acted. * * The ldisc is responsible for being intelligent about multi-threading of * write_room/write calls * * Required if @write method is provided else not needed. Do not call this * function directly, call tty_write_room() * * @chars_in_buffer: ``unsigned int ()(struct tty_struct *tty)`` * * This routine returns the number of characters in the device private * output queue. Used in tty_wait_until_sent() and for poll() * implementation. * * Optional: if not provided, it is assumed there is no queue on the * device. Do not call this function directly, call tty_chars_in_buffer(). * * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * * This routine allows the @tty driver to implement device-specific * ioctls. If the ioctl number passed in @cmd is not recognized by the * driver, it should return %ENOIOCTLCMD. * * Optional. * * @compat_ioctl: ``long ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * * Implement ioctl processing for 32 bit process on 64 bit system. * * Optional. * * @set_termios: ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This routine allows the @tty driver to be notified when device's * termios settings have changed. New settings are in @tty->termios. * Previous settings are passed in the @old argument. * * The API is defined such that the driver should return the actual modes * selected. This means that the driver is responsible for modifying any * bits in @tty->termios it cannot fulfill to indicate the actual modes * being used. * * Optional. Called under the @tty->termios_rwsem. May sleep. * * @ldisc_ok: ``int ()(struct tty_struct *tty, int ldisc)`` * * This routine allows the @tty driver to decide if it can deal * with a particular @ldisc. * * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. * * @set_ldisc: ``void ()(struct tty_struct *tty)`` * * This routine allows the @tty driver to be notified when the device's * line discipline is being changed. At the point this is done the * discipline is not yet usable. * * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. * * @throttle: ``void ()(struct tty_struct *tty)`` * * This routine notifies the @tty driver that input buffers for the line * discipline are close to full, and it should somehow signal that no more * characters should be sent to the @tty. * * Serialization including with @unthrottle() is the job of the ldisc * layer. * * Optional: Always invoke via tty_throttle_safe(). Called under the * @tty->termios_rwsem. * * @unthrottle: ``void ()(struct tty_struct *tty)`` * * This routine notifies the @tty driver that it should signal that * characters can now be sent to the @tty without fear of overrunning the * input buffers of the line disciplines. * * Optional. Always invoke via tty_unthrottle(). Called under the * @tty->termios_rwsem. * * @stop: ``void ()(struct tty_struct *tty)`` * * This routine notifies the @tty driver that it should stop outputting * characters to the tty device. * * Called with @tty->flow.lock held. Serialized with @start() method. * * Optional. Always invoke via stop_tty(). * * @start: ``void ()(struct tty_struct *tty)`` * * This routine notifies the @tty driver that it resumed sending * characters to the @tty device. * * Called with @tty->flow.lock held. Serialized with stop() method. * * Optional. Always invoke via start_tty(). * * @hangup: ``void ()(struct tty_struct *tty)`` * * This routine notifies the @tty driver that it should hang up the @tty * device. * * Optional. Called with tty lock held. * * @break_ctl: ``int ()(struct tty_struct *tty, int state)`` * * This optional routine requests the @tty driver to turn on or off BREAK * status on the RS-232 port. If @state is -1, then the BREAK status * should be turned on; if @state is 0, then BREAK should be turned off. * * If this routine is implemented, the high-level tty driver will handle * the following ioctls: %TCSBRK, %TCSBRKP, %TIOCSBRK, %TIOCCBRK. * * If the driver sets %TTY_DRIVER_HARDWARE_BREAK in tty_alloc_driver(), * then the interface will also be called with actual times and the * hardware is expected to do the delay work itself. 0 and -1 are still * used for on/off. * * Optional: Required for %TCSBRK/%BRKP/etc. handling. May sleep. * * @flush_buffer: ``void ()(struct tty_struct *tty)`` * * This routine discards device private output buffer. Invoked on close, * hangup, to implement %TCOFLUSH ioctl and similar. * * Optional: if not provided, it is assumed there is no queue on the * device. Do not call this function directly, call * tty_driver_flush_buffer(). * * @wait_until_sent: ``void ()(struct tty_struct *tty, int timeout)`` * * This routine waits until the device has written out all of the * characters in its transmitter FIFO. Or until @timeout (in jiffies) is * reached. * * Optional: If not provided, the device is assumed to have no FIFO. * Usually correct to invoke via tty_wait_until_sent(). May sleep. * * @send_xchar: ``void ()(struct tty_struct *tty, u8 ch)`` * * This routine is used to send a high-priority XON/XOFF character (@ch) * to the @tty device. * * Optional: If not provided, then the @write method is called under * the @tty->atomic_write_lock to keep it serialized with the ldisc. * * @tiocmget: ``int ()(struct tty_struct *tty)`` * * This routine is used to obtain the modem status bits from the @tty * driver. * * Optional: If not provided, then %ENOTTY is returned from the %TIOCMGET * ioctl. Do not call this function directly, call tty_tiocmget(). * * @tiocmset: ``int ()(struct tty_struct *tty, * unsigned int set, unsigned int clear)`` * * This routine is used to set the modem status bits to the @tty driver. * First, @clear bits should be cleared, then @set bits set. * * Optional: If not provided, then %ENOTTY is returned from the %TIOCMSET * ioctl. Do not call this function directly, call tty_tiocmset(). * * @resize: ``int ()(struct tty_struct *tty, struct winsize *ws)`` * * Called when a termios request is issued which changes the requested * terminal geometry to @ws. * * Optional: the default action is to update the termios structure * without error. This is usually the correct behaviour. Drivers should * not force errors here if they are not resizable objects (e.g. a serial * line). See tty_do_resize() if you need to wrap the standard method * in your own logic -- the usual case. * * @get_icount: ``int ()(struct tty_struct *tty, * struct serial_icounter *icount)`` * * Called when the @tty device receives a %TIOCGICOUNT ioctl. Passed a * kernel structure @icount to complete. * * Optional: called only if provided, otherwise %ENOTTY will be returned. * * @get_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` * * Called when the @tty device receives a %TIOCGSERIAL ioctl. Passed a * kernel structure @p (&struct serial_struct) to complete. * * Optional: called only if provided, otherwise %ENOTTY will be returned. * Do not call this function directly, call tty_tiocgserial(). * * @set_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` * * Called when the @tty device receives a %TIOCSSERIAL ioctl. Passed a * kernel structure @p (&struct serial_struct) to set the values from. * * Optional: called only if provided, otherwise %ENOTTY will be returned. * Do not call this function directly, call tty_tiocsserial(). * * @show_fdinfo: ``void ()(struct tty_struct *tty, struct seq_file *m)`` * * Called when the @tty device file descriptor receives a fdinfo request * from VFS (to show in /proc/<pid>/fdinfo/). @m should be filled with * information. * * Optional: called only if provided, otherwise nothing is written to @m. * Do not call this function directly, call tty_show_fdinfo(). * * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` * * kgdboc support (Documentation/process/debugging/kgdb.rst). This routine is * called to initialize the HW for later use by calling @poll_get_char or * @poll_put_char. * * Optional: called only if provided, otherwise skipped as a non-polling * driver. * * @poll_get_char: ``int ()(struct tty_driver *driver, int line)`` * * kgdboc support (see @poll_init). @driver should read a character from a * tty identified by @line and return it. * * Optional: called only if @poll_init provided. * * @poll_put_char: ``void ()(struct tty_driver *driver, int line, char ch)`` * * kgdboc support (see @poll_init). @driver should write character @ch to * a tty identified by @line. * * Optional: called only if @poll_init provided. * * @proc_show: ``int ()(struct seq_file *m, void *driver)`` * * Driver @driver (cast to &struct tty_driver) can show additional info in * /proc/tty/driver/<driver_name>. It is enough to fill in the information * into @m. * * Optional: called only if provided, otherwise no /proc entry created. * * This structure defines the interface between the low-level tty driver and * the tty routines. These routines can be defined. Unless noted otherwise, * they are optional, and can be filled in with a %NULL pointer. */ struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, struct file *filp, int idx); int (*install)(struct tty_driver *driver, struct tty_struct *tty); void (*remove)(struct tty_driver *driver, struct tty_struct *tty); int (*open)(struct tty_struct * tty, struct file * filp); void (*close)(struct tty_struct * tty, struct file * filp); void (*shutdown)(struct tty_struct *tty); void (*cleanup)(struct tty_struct *tty); ssize_t (*write)(struct tty_struct *tty, const u8 *buf, size_t count); int (*put_char)(struct tty_struct *tty, u8 ch); void (*flush_chars)(struct tty_struct *tty); unsigned int (*write_room)(struct tty_struct *tty); unsigned int (*chars_in_buffer)(struct tty_struct *tty); int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); long (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); void (*set_termios)(struct tty_struct *tty, const struct ktermios *old); void (*throttle)(struct tty_struct * tty); void (*unthrottle)(struct tty_struct * tty); void (*stop)(struct tty_struct *tty); void (*start)(struct tty_struct *tty); void (*hangup)(struct tty_struct *tty); int (*break_ctl)(struct tty_struct *tty, int state); void (*flush_buffer)(struct tty_struct *tty); int (*ldisc_ok)(struct tty_struct *tty, int ldisc); void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); void (*send_xchar)(struct tty_struct *tty, u8 ch); int (*tiocmget)(struct tty_struct *tty); int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); int (*resize)(struct tty_struct *tty, struct winsize *ws); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); int (*get_serial)(struct tty_struct *tty, struct serial_struct *p); int (*set_serial)(struct tty_struct *tty, struct serial_struct *p); void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif int (*proc_show)(struct seq_file *m, void *driver); } __randomize_layout; /** * struct tty_driver -- driver for TTY devices * * @kref: reference counting. Reaching zero frees all the internals and the * driver. * @cdevs: allocated/registered character /dev devices * @owner: modules owning this driver. Used drivers cannot be rmmod'ed. * Automatically set by tty_alloc_driver(). * @driver_name: name of the driver used in /proc/tty * @name: used for constructing /dev node name * @name_base: used as a number base for constructing /dev node name * @major: major /dev device number (zero for autoassignment) * @minor_start: the first minor /dev device number * @num: number of devices allocated * @type: type of tty driver (enum tty_driver_type) * @subtype: subtype of tty driver (enum tty_driver_subtype) * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) * @flags: tty driver flags (%TTY_DRIVER_) * @proc_entry: proc fs entry, used internally * @other: driver of the linked tty; only used for the PTY driver * @ttys: array of active &struct tty_struct, set by tty_standard_install() * @ports: array of &struct tty_port; can be set during initialization by * tty_port_link_device() and similar * @termios: storage for termios at each TTY close for the next open * @driver_state: pointer to driver's arbitrary data * @ops: driver hooks for TTYs. Set them using tty_set_operations(). Use &struct * tty_port helpers in them as much as possible. * @tty_drivers: used internally to link tty_drivers together * * The usual handling of &struct tty_driver is to allocate it by * tty_alloc_driver(), set up all the necessary members, and register it by * tty_register_driver(). At last, the driver is torn down by calling * tty_unregister_driver() followed by tty_driver_kref_put(). * * The fields required to be set before calling tty_register_driver() include * @driver_name, @name, @type, @subtype, @init_termios, and @ops. */ struct tty_driver { struct kref kref; struct cdev **cdevs; struct module *owner; const char *driver_name; const char *name; int name_base; int major; int minor_start; unsigned int num; enum tty_driver_type type; enum tty_driver_subtype subtype; struct ktermios init_termios; unsigned long flags; struct proc_dir_entry *proc_entry; struct tty_driver *other; /* * Pointer to the tty data structures */ struct tty_struct **ttys; struct tty_port **ports; struct ktermios **termios; void *driver_state; /* * Driver methods */ const struct tty_operations *ops; struct list_head tty_drivers; } __randomize_layout; extern struct list_head tty_drivers; struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner, unsigned long flags); struct tty_driver *tty_find_polling_driver(char *name, int *line); void tty_driver_kref_put(struct tty_driver *driver); /** * tty_alloc_driver - allocate tty driver * @lines: count of lines this driver can handle at most * @flags: some of enum tty_driver_flag, will be set in driver->flags * * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). */ #define tty_alloc_driver(lines, flags) \ __tty_alloc_driver(lines, THIS_MODULE, flags) static inline struct tty_driver *tty_driver_kref_get(struct tty_driver *d) { kref_get(&d->kref); return d; } static inline void tty_set_operations(struct tty_driver *driver, const struct tty_operations *op) { driver->ops = op; } int tty_register_driver(struct tty_driver *driver); void tty_unregister_driver(struct tty_driver *driver); struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); struct device *tty_register_device_attr(struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); void tty_unregister_device(struct tty_driver *driver, unsigned index); #ifdef CONFIG_PROC_FS void proc_tty_register_driver(struct tty_driver *); void proc_tty_unregister_driver(struct tty_driver *); #else static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif #endif /* #ifdef _LINUX_TTY_DRIVER_H */
26 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0-only */ /* * fence-chain: chain fences together in a timeline * * Copyright (C) 2018 Advanced Micro Devices, Inc. * Authors: * Christian König <christian.koenig@amd.com> */ #ifndef __LINUX_DMA_FENCE_CHAIN_H #define __LINUX_DMA_FENCE_CHAIN_H #include <linux/dma-fence.h> #include <linux/irq_work.h> #include <linux/slab.h> /** * struct dma_fence_chain - fence to represent an node of a fence chain * @base: fence base class * @prev: previous fence of the chain * @prev_seqno: original previous seqno before garbage collection * @fence: encapsulated fence * @lock: spinlock for fence handling */ struct dma_fence_chain { struct dma_fence base; struct dma_fence __rcu *prev; u64 prev_seqno; struct dma_fence *fence; union { /** * @cb: callback for signaling * * This is used to add the callback for signaling the * complection of the fence chain. Never used at the same time * as the irq work. */ struct dma_fence_cb cb; /** * @work: irq work item for signaling * * Irq work structure to allow us to add the callback without * running into lock inversion. Never used at the same time as * the callback. */ struct irq_work work; }; spinlock_t lock; }; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain * @fence: fence to cast to a dma_fence_array * * Returns NULL if the fence is not a dma_fence_chain, * or the dma_fence_chain otherwise. */ static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); } /** * dma_fence_chain_contained - return the contained fence * @fence: the fence to test * * If the fence is a dma_fence_chain the function returns the fence contained * inside the chain object, otherwise it returns the fence itself. */ static inline struct dma_fence * dma_fence_chain_contained(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); return chain ? chain->fence : fence; } /** * dma_fence_chain_alloc * * Returns a new struct dma_fence_chain object or NULL on failure. * * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define dma_fence_chain_alloc() \ ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL)) /** * dma_fence_chain_free * @chain: chain node to free * * Frees up an allocated but not used struct dma_fence_chain object. This * doesn't need an RCU grace period since the fence was never initialized nor * published. After dma_fence_chain_init() has been called the fence must be * released by calling dma_fence_put(), and not through this function. */ static inline void dma_fence_chain_free(struct dma_fence_chain *chain) { kfree(chain); }; /** * dma_fence_chain_for_each - iterate over all fences in chain * @iter: current fence * @head: starting point * * Iterate over all fences in the chain. We keep a reference to the current * fence while inside the loop which must be dropped when breaking out. * * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_chain_for_each(iter, head) \ for (iter = dma_fence_get(head); iter; \ iter = dma_fence_chain_walk(iter)) struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence); int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno); void dma_fence_chain_init(struct dma_fence_chain *chain, struct dma_fence *prev, struct dma_fence *fence, uint64_t seqno); #endif /* __LINUX_DMA_FENCE_CHAIN_H */
14630 81 14582 14603 14580 14557 318 109 119 6 13 77 495 58 37 450 39 15 24 1630 1482 159 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 // SPDX-License-Identifier: GPL-2.0-only /* * Access kernel or user memory without faulting. */ #include <linux/export.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/tlb.h> bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return true; } /* * The below only uses kmsan_check_memory() to ensure uninitialized kernel * memory isn't leaked. */ #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ kmsan_check_memory(src, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); if (!(align & 7)) copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ instrument_write(dst, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; pagefault_disable(); if (!(align & 7)) copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_to_kernel_nofault); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) { const void *src = unsafe_addr; if (unlikely(count <= 0)) return 0; if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); do { __get_kernel_nofault(dst, src, u8, Efault); dst++; src++; } while (dst[-1] && src - unsafe_addr < count); pagefault_enable(); dst[-1] = '\0'; return src - unsafe_addr; Efault: pagefault_enable(); dst[0] = '\0'; return -EFAULT; } /** * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk * * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; if (!__access_ok(src, size)) return ret; if (!nmi_uaccess_okay()) return ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk * * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user * address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @unsafe_addr: Unsafe user address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe user address to kernel buffer. * * On success, returns the length of the string INCLUDING the trailing NUL. * * If access fails, returns -EFAULT (some data may have been copied * and the trailing NUL added). * * If @count is smaller than the length of the string, copies @count-1 bytes, * sets the last byte of @dst buffer to NUL and returns @count. */ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { long ret; if (unlikely(count <= 0)) return 0; pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); if (ret >= count) { ret = count; dst[ret - 1] = '\0'; } else if (ret > 0) { ret++; } return ret; } /** * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL. * @unsafe_addr: The string to measure. * @count: Maximum count (including NUL) * * Get the size of a NUL-terminated string in user space without pagefault. * * Returns the size of the string INCLUDING the terminating NUL. * * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * Unlike strnlen_user, this can be used from IRQ handler etc. because * it disables pagefaults. */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { int ret; pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); return ret; } void __copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } EXPORT_SYMBOL(__copy_overflow);
720 411 453 451 454 454 453 3 3 3 3 3 260 261 261 260 3 258 235 36 261 260 261 259 350 350 108 5 268 254 119 347 2 1 349 5 346 346 347 5 350 348 350 350 350 238 239 207 239 157 203 235 9 239 240 145 190 189 1 2 7 4 7 4 239 244 245 244 244 7 343 346 239 240 35 35 35 35 35 32 3 35 95 96 23 450 63 420 148 78 89 21 21 356 440 284 49 384 215 198 431 394 132 363 502 411 368 13 5 47 45 2 9 37 47 410 20 391 537 509 493 30 493 26 26 26 26 26 61 93 34 11 4 45 7 7 7 89 261 261 20 5 4 9 3 16 8 2 6 6 6 532 506 129 28 1 5 15 11 423 17 408 3 8 8 4 4 453 91 274 4 114 114 114 386 386 47 4 44 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * Copyright (c) 2002-2003 Intel Corp. * * This file is part of the SCTP kernel implementation * * SCTP over IPv6. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Le Yanqun <yanqun.le@nokia.com> * Hui Huang <hui.huang@nokia.com> * La Monte H.P. Yarroll <piggy@acm.org> * Sridhar Samudrala <sri@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * * Based on: * linux/net/ipv6/tcp_ipv6.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/ipsec.h> #include <linux/slab.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/seq_file.h> #include <net/protocol.h> #include <net/ndisc.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/inet_common.h> #include <net/inet_ecn.h> #include <net/sctp/sctp.h> #include <net/udp_tunnel.h> #include <linux/uaccess.h> static inline int sctp_v6_addr_match_len(union sctp_addr *s1, union sctp_addr *s2); static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, __be16 port); static int sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2); /* Event handler for inet6 address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same * time and thus corrupt the list. * The reader side is protected with RCU. */ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { case NETDEV_UP: addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr) && addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { found = 1; addr->valid = 0; list_del_rcu(&addr->list); sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; } return NOTIFY_DONE; } static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inet6addr_event, }; static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, __u8 type, __u8 code, __u32 info) { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; int err = 0; switch (type) { case ICMPV6_PKT_TOOBIG: if (ip6_sk_accept_pmtu(sk)) sctp_icmp_frag_needed(sk, asoc, t, info); return; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { sctp_icmp_proto_unreachable(sk, asoc, t); return; } break; case NDISC_REDIRECT: sctp_icmp_redirect(sk, t, skb); return; default: break; } icmpv6_err_convert(type, code, &err); if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } } /* ICMP error handler. */ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct sctp_transport *transport; struct sctp_association *asoc; __u16 saveip, savesctp; struct sock *sk; /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, offset); sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original pointers. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } sctp_v6_err_handle(transport, skb, type, code, ntohl(info)); sctp_err_finish(sk, transport); return 0; } int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct sctp_association *asoc; struct sctp_transport *t; struct icmp6hdr *hdr; __u32 info = 0; skb->transport_header += sizeof(struct udphdr); sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } skb->transport_header -= sizeof(struct udphdr); hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr)); if (hdr->icmp6_type == NDISC_REDIRECT) { /* can't be handled without outer ip6hdr known, leave it to udpv6_err */ sctp_err_finish(sk, t); return 0; } if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG) info = ntohl(hdr->icmp6_mtu); sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info); sctp_err_finish(sk, t); return 1; } static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) { struct dst_entry *dst = dst_clone(t->dst); struct flowi6 *fl6 = &t->fl.u.ip6; struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); __u8 tclass = np->tclass; __be32 label; pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); if (t->dscp & SCTP_DSCP_SET_MASK) tclass = t->dscp & SCTP_DSCP_VAL_MASK; if (INET_ECN_is_capable(tclass)) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(t->param_flags & SPP_PMTUD_ENABLE)) skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!t->encap_port || !sctp_sk(sk)->udp_port) { int res; skb_dst_set(skb, dst); rcu_read_lock(); res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } if (skb_is_gso(skb)) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; skb->encapsulation = 1; skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr, tclass, ip6_dst_hoplimit(dst), label, sctp_sk(sk)->udp_port, t->encap_port, false); } /* Returns the dst cache entry for the given source and destination ip * addresses. */ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct flowi *fl, struct sock *sk) { struct sctp_association *asoc = t->asoc; struct dst_entry *dst = NULL; struct flowi _fl; struct flowi6 *fl6 = &_fl.u.ip6; struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; enum sctp_scope scope; __u8 matchlen = 0; memset(&_fl, 0, sizeof(_fl)); fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl6->flowi6_oif = daddr->v6.sin6_scope_id; else if (asoc) fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if; if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); if (inet6_test_bit(SNDFLOW, sk) && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) goto out; fl6_sock_release(flowlabel); } pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr); if (asoc) fl6->fl6_sport = htons(asoc->base.bind_addr.port); if (saddr) { fl6->saddr = saddr->v6.sin6_addr; if (!fl6->fl6_sport) fl6->fl6_sport = saddr->v6.sin6_port; pr_debug("src=%pI6 - ", &fl6->saddr); } rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (!asoc || saddr) { t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); goto out; } bp = &asoc->base.bind_addr; scope = sctp_scope(daddr); /* ip6_dst_lookup has filled in the fl6->saddr for us. Check * to see if we can use it. */ if (!IS_ERR(dst)) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || (laddr->state != SCTP_ADDR_SRC && !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ if ((laddr->a.sa.sa_family == AF_INET6) && (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) { rcu_read_unlock(); t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); goto out; } } rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. */ dst_release(dst); dst = NULL; } /* Walk through the bind address list and try to get the * best source address for a given destination. */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { struct dst_entry *bdst; __u8 bmatchlen; if (!laddr->valid || laddr->state != SCTP_ADDR_SRC || laddr->a.sa.sa_family != AF_INET6 || scope > sctp_scope(&laddr->a)) continue; fl6->saddr = laddr->a.v6.sin6_addr; fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); bdst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(bdst)) continue; if (ipv6_chk_addr(dev_net(bdst->dev), &laddr->a.v6.sin6_addr, bdst->dev, 1)) { if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); break; } bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (matchlen > bmatchlen) { dst_release(bdst); continue; } if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; matchlen = bmatchlen; t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); } rcu_read_unlock(); out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; rt = dst_rt6_info(dst); t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, &fl->u.ip6.saddr); } else { t->dst = NULL; pr_debug("no route\n"); } } /* Returns the number of consecutive initial bits that match in the 2 ipv6 * addresses. */ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, union sctp_addr *s2) { return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr); } /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ static void sctp_v6_get_saddr(struct sctp_sock *sk, struct sctp_transport *t, struct flowi *fl) { struct flowi6 *fl6 = &fl->u.ip6; union sctp_addr *saddr = &t->saddr; pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); if (t->dst) { saddr->v6.sin6_family = AF_INET6; saddr->v6.sin6_addr = fl6->saddr; } } /* Make a copy of all potential local addresses. */ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct net_device *dev) { struct inet6_dev *in6_dev; struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; rcu_read_lock(); if ((in6_dev = __in6_dev_get(dev)) == NULL) { rcu_read_unlock(); return; } read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; INIT_LIST_HEAD(&addr->list); list_add_tail(&addr->list, addrlist); } } read_unlock_bh(&in6_dev->lock); rcu_read_unlock(); } /* Copy over any ip options */ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk) { struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct ipv6_txoptions *opt; newnp = inet6_sk(newsk); rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); if (!opt) pr_err("%s: Failed to copy ip options\n", __func__); } RCU_INIT_POINTER(newnp->opt, opt); rcu_read_unlock(); } /* Account for the IP options */ static int sctp_v6_ip_options_len(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt; int len = 0; rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) len = opt->opt_flen + opt->opt_nflen; rcu_read_unlock(); return len; } /* Initialize a sockaddr_storage from in incoming skb. */ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { /* Always called on head skb, so this is safe */ struct sctphdr *sh = sctp_hdr(skb); struct sockaddr_in6 *sa = &addr->v6; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; /* FIXME */ addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif; if (is_saddr) { sa->sin6_port = sh->source; sa->sin6_addr = ipv6_hdr(skb)->saddr; } else { sa->sin6_port = sh->dest; sa->sin6_addr = ipv6_hdr(skb)->daddr; } } /* Initialize an sctp_addr from a socket. */ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET) { sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); sk->sk_v6_rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { sk->sk_v6_rcv_saddr = addr->v6.sin6_addr; } } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET) { sk->sk_v6_daddr.s6_addr32[0] = 0; sk->sk_v6_daddr.s6_addr32[1] = 0; sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { sk->sk_v6_daddr = addr->v6.sin6_addr; } } /* Initialize a sctp_addr from an address parameter. */ static bool sctp_v6_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, __be16 port, int iif) { if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param)) return false; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; /* BUG */ addr->v6.sin6_addr = param->v6.addr; addr->v6.sin6_scope_id = iif; return true; } /* Initialize an address parameter from a sctp_addr and return the length * of the address parameter. */ static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); param->v6.addr = addr->v6.sin6_addr; return length; } /* Initialize a sctp_addr from struct in6_addr. */ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, __be16 port) { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; addr->v6.sin6_scope_id = 0; } static int __sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2) { if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr2->v6.sin6_addr) && addr2->v6.sin6_addr.s6_addr32[3] == addr1->v4.sin_addr.s_addr) return 1; if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr1->v6.sin6_addr) && addr1->v6.sin6_addr.s6_addr32[3] == addr2->v4.sin_addr.s_addr) return 1; return 0; } if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; /* If this is a linklocal address, compare the scope_id. */ if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) && addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id) return 0; return 1; } /* Compare addresses exactly. * v4-mapped-v6 is also in consideration. */ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2) { return __sctp_v6_cmp_addr(addr1, addr2) && addr1->v6.sin6_port == addr2->v6.sin6_port; } /* Initialize addr struct to INADDR_ANY. */ static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port) { memset(addr, 0x00, sizeof(union sctp_addr)); addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; } /* Is this a wildcard address? */ static int sctp_v6_is_any(const union sctp_addr *addr) { return ipv6_addr_any(&addr->v6.sin6_addr); } /* Should this be available for binding? */ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr; struct sock *sk = &sp->inet.sk; struct net *net = sock_net(sk); struct net_device *dev = NULL; int type, res, bound_dev_if; type = ipv6_addr_type(in6); if (IPV6_ADDR_ANY == type) return 1; if (type == IPV6_ADDR_MAPPED) { if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); return sctp_get_af_specific(AF_INET)->available(addr, sp); } if (!(type & IPV6_ADDR_UNICAST)) return 0; rcu_read_lock(); bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (bound_dev_if) { res = 0; dev = dev_get_by_index_rcu(net, bound_dev_if); if (!dev) goto out; } res = ipv6_can_nonlocal_bind(net, &sp->inet) || ipv6_chk_addr(net, in6, dev, 0); out: rcu_read_unlock(); return res; } /* This function checks if the address is a valid address to be used for * SCTP. * * Output: * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp, const struct sk_buff *skb) { int ret = ipv6_addr_type(&addr->v6.sin6_addr); /* Support v4-mapped-v6 address. */ if (ret == IPV6_ADDR_MAPPED) { /* Note: This routine is used in input, so v4-mapped-v6 * are disallowed here when there is no sctp_sock. */ if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); return sctp_get_af_specific(AF_INET)->addr_valid(addr, sp, skb); } /* Is this a non-unicast address */ if (!(ret & IPV6_ADDR_UNICAST)) return 0; return 1; } /* What is the scope of 'addr'? */ static enum sctp_scope sctp_v6_scope(union sctp_addr *addr) { enum sctp_scope retval; int v6scope; /* The IPv6 scope is really a set of bit fields. * See IFA_* in <net/if_inet6.h>. Map to a generic SCTP scope. */ v6scope = ipv6_addr_scope(&addr->v6.sin6_addr); switch (v6scope) { case IFA_HOST: retval = SCTP_SCOPE_LOOPBACK; break; case IFA_LINK: retval = SCTP_SCOPE_LINK; break; case IFA_SITE: retval = SCTP_SCOPE_PRIVATE; break; default: retval = SCTP_SCOPE_GLOBAL; break; } return retval; } /* Create and initialize a new sk for the socket to be returned by accept(). */ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sctp_association *asoc, bool kern) { struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) goto out; sock_init_data(NULL, newsk); sctp_copy_sock(newsk, sk, asoc); sock_reset_flag(sk, SOCK_ZAPPED); newsctp6sk = (struct sctp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; sctp_v6_copy_ip_options(sk, newsk); /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); newsk = NULL; } out: return newsk; } /* Format a sockaddr for return to user space. This makes sure the return is * AF_INET or AF_INET6 depending on the SCTP_I_WANT_MAPPED_V4_ADDR option. */ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { if (sp->v4mapped) { if (addr->sa.sa_family == AF_INET) sctp_v4_map_v6(addr); } else { if (addr->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr->v6.sin6_addr)) sctp_v6_map_v4(addr); } if (addr->sa.sa_family == AF_INET) { memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } return sizeof(struct sockaddr_in6); } /* Where did this skb come from? */ static int sctp_v6_skb_iif(const struct sk_buff *skb) { return inet6_iif(skb); } static int sctp_v6_skb_sdif(const struct sk_buff *skb) { return inet6_sdif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr); } static void sctp_v6_ecn_capable(struct sock *sk) { inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } /* Initialize a PF_INET msgname from a ulpevent. */ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event, char *msgname, int *addrlen) { union sctp_addr *addr; struct sctp_association *asoc; union sctp_addr *paddr; if (!msgname) return; addr = (union sctp_addr *)msgname; asoc = event->asoc; paddr = &asoc->peer.primary_addr; if (paddr->sa.sa_family == AF_INET) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = htons(asoc->peer.port); addr->v4.sin_addr = paddr->v4.sin_addr; } else { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; if (ipv6_addr_type(&paddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = paddr->v6.sin6_scope_id; else addr->v6.sin6_scope_id = 0; addr->v6.sin6_port = htons(asoc->peer.port); addr->v6.sin6_addr = paddr->v6.sin6_addr; } *addrlen = sctp_v6_addr_to_user(sctp_sk(asoc->base.sk), addr); } /* Initialize a msg_name from an inbound skb. */ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, int *addr_len) { union sctp_addr *addr; struct sctphdr *sh; if (!msgname) return; addr = (union sctp_addr *)msgname; sh = sctp_hdr(skb); if (ip_hdr(skb)->version == 4) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = sh->source; addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr; } else { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_port = sh->source; addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); else addr->v6.sin6_scope_id = 0; } *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); } /* Do we support this AF? */ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) { switch (family) { case AF_INET6: return 1; /* v4-mapped-v6 addresses */ case AF_INET: if (!ipv6_only_sock(sctp_opt2sk(sp))) return 1; fallthrough; default: return 0; } } /* Address matching with wildcards allowed. This extra level * of indirection lets us choose whether a PF_INET6 should * disallow any v4 addresses if we so choose. */ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, struct sctp_sock *opt) { struct sock *sk = sctp_opt2sk(opt); struct sctp_af *af1, *af2; af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); if (!af1 || !af2) return 0; /* If the socket is IPv6 only, v4 addrs will not match */ if (ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET) return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr; return __sctp_v6_cmp_addr(addr1, addr2); } /* Verify that the provided sockaddr looks bindable. Common verification, * has already been taken care of. */ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af; /* ASSERT: address family has already been verified. */ if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { int type = ipv6_addr_type(&addr->v6.sin6_addr); struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { struct net *net; if (!addr->v6.sin6_scope_id) return 0; net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); if (!dev || !(ipv6_can_nonlocal_bind(net, &opt->inet) || ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0))) { rcu_read_unlock(); return 0; } rcu_read_unlock(); } af = opt->pf->af; } return af->available(addr, opt); } /* Verify that the provided sockaddr looks sendable. Common verification, * has already been taken care of. */ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af = NULL; /* ASSERT: address family has already been verified. */ if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { int type = ipv6_addr_type(&addr->v6.sin6_addr); struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk), addr->v6.sin6_scope_id); rcu_read_unlock(); if (!dev) return 0; } af = opt->pf->af; } return af != NULL; } /* Fill in Supported Address Type information for INIT and INIT-ACK * chunks. Note: In the future, we may want to look at sock options * to determine whether a PF_INET6 socket really wants to have IPV4 * addresses. * Returns number of addresses supported. */ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, __be16 *types) { types[0] = SCTP_PARAM_IPV6_ADDRESS; if (!opt || !ipv6_only_sock(sctp_opt2sk(opt))) { types[1] = SCTP_PARAM_IPV4_ADDRESS; return 2; } return 1; } /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int rc; rc = inet6_getname(sock, uaddr, peer); if (rc < 0) return rc; rc = sctp_v6_addr_to_user(sctp_sk(sock->sk), (union sctp_addr *)uaddr); return rc; } static const struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, .poll = sctp_poll, .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = sctp_inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static struct inet_protosw sctpv6_seqpacket_protosw = { .type = SOCK_SEQPACKET, .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctpv6_stream_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, .flags = SCTP_PROTOSW_FLAG, }; static int sctp6_rcv(struct sk_buff *skb) { SCTP_INPUT_CB(skb)->encap_port = 0; return sctp_rcv(skb) ? -1 : 0; } static const struct inet6_protocol sctpv6_protocol = { .handler = sctp6_rcv, .err_handler = sctp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; static struct sctp_af sctp_af_inet6 = { .sa_family = AF_INET6, .sctp_xmit = sctp_v6_xmit, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .get_dst = sctp_v6_get_dst, .get_saddr = sctp_v6_get_saddr, .copy_addrlist = sctp_v6_copy_addrlist, .from_skb = sctp_v6_from_skb, .from_sk = sctp_v6_from_sk, .from_addr_param = sctp_v6_from_addr_param, .to_addr_param = sctp_v6_to_addr_param, .cmp_addr = sctp_v6_cmp_addr, .scope = sctp_v6_scope, .addr_valid = sctp_v6_addr_valid, .inaddr_any = sctp_v6_inaddr_any, .is_any = sctp_v6_is_any, .available = sctp_v6_available, .skb_iif = sctp_v6_skb_iif, .skb_sdif = sctp_v6_skb_sdif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), .ip_options_len = sctp_v6_ip_options_len, }; static struct sctp_pf sctp_pf_inet6 = { .event_msgname = sctp_inet6_event_msgname, .skb_msgname = sctp_inet6_skb_msgname, .af_supported = sctp_inet6_af_supported, .cmp_addr = sctp_inet6_cmp_addr, .bind_verify = sctp_inet6_bind_verify, .send_verify = sctp_inet6_send_verify, .supported_addrs = sctp_inet6_supported_addrs, .create_accept_sk = sctp_v6_create_accept_sk, .addr_to_user = sctp_v6_addr_to_user, .to_sk_saddr = sctp_v6_to_sk_saddr, .to_sk_daddr = sctp_v6_to_sk_daddr, .copy_ip_options = sctp_v6_copy_ip_options, .af = &sctp_af_inet6, }; /* Initialize IPv6 support and register with socket layer. */ void sctp_v6_pf_init(void) { /* Register the SCTP specific PF_INET6 functions. */ sctp_register_pf(&sctp_pf_inet6, PF_INET6); /* Register the SCTP specific AF_INET6 functions. */ sctp_register_af(&sctp_af_inet6); } void sctp_v6_pf_exit(void) { list_del(&sctp_af_inet6.list); } /* Initialize IPv6 support and register with socket layer. */ int sctp_v6_protosw_init(void) { int rc; rc = proto_register(&sctpv6_prot, 1); if (rc) return rc; /* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */ inet6_register_protosw(&sctpv6_seqpacket_protosw); inet6_register_protosw(&sctpv6_stream_protosw); return 0; } void sctp_v6_protosw_exit(void) { inet6_unregister_protosw(&sctpv6_seqpacket_protosw); inet6_unregister_protosw(&sctpv6_stream_protosw); proto_unregister(&sctpv6_prot); } /* Register with inet6 layer. */ int sctp_v6_add_protocol(void) { /* Register notifier for inet6 address additions/deletions. */ register_inet6addr_notifier(&sctp_inet6addr_notifier); if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) return -EAGAIN; return 0; } /* Unregister with inet6 layer. */ void sctp_v6_del_protocol(void) { inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP); unregister_inet6addr_notifier(&sctp_inet6addr_notifier); }
36 870 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 /* * Linux Security Module interfaces * * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com> * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com> * Copyright (C) 2001 James Morris <jmorris@intercode.com.au> * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) * Copyright (C) 2015 Intel Corporation. * Copyright (C) 2015 Casey Schaufler <casey@schaufler-ca.com> * Copyright (C) 2016 Mellanox Techonologies * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Due to this file being licensed under the GPL there is controversy over * whether this permits you to write a module that #includes this file * without placing your module under the GPL. Please consult a lawyer for * advice before doing this. * */ #ifndef __LINUX_LSM_HOOKS_H #define __LINUX_LSM_HOOKS_H #include <uapi/linux/lsm.h> #include <linux/security.h> #include <linux/init.h> #include <linux/rculist.h> #include <linux/xattr.h> #include <linux/static_call.h> #include <linux/unroll.h> #include <linux/jump_label.h> #include <linux/lsm_count.h> union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); #include "lsm_hook_defs.h" #undef LSM_HOOK void *lsm_func_addr; }; /* * @key: static call key as defined by STATIC_CALL_KEY * @trampoline: static call trampoline as defined by STATIC_CALL_TRAMP * @hl: The security_hook_list as initialized by the owning LSM. * @active: Enabled when the static call has an LSM hook associated. */ struct lsm_static_call { struct static_call_key *key; void *trampoline; struct security_hook_list *hl; /* this needs to be true or false based on what the key defaults to */ struct static_key_false *active; } __randomize_layout; /* * Table of the static calls for each LSM hook. * Once the LSMs are initialized, their callbacks will be copied to these * tables such that the calls are filled backwards (from last to first). * This way, we can jump directly to the first used static call, and execute * all of them after. This essentially makes the entry point * dynamic to adapt the number of static calls to the number of callbacks. */ struct lsm_static_calls_table { #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ struct lsm_static_call NAME[MAX_LSM_COUNT]; #include <linux/lsm_hook_defs.h> #undef LSM_HOOK } __packed __randomize_layout; /** * struct lsm_id - Identify a Linux Security Module. * @lsm: name of the LSM, must be approved by the LSM maintainers * @id: LSM ID number from uapi/linux/lsm.h * * Contains the information that identifies the LSM. */ struct lsm_id { const char *name; u64 id; }; /* * Security module hook list structure. * For use with generic list macros for common operations. * * struct security_hook_list - Contents of a cacheable, mappable object. * @scalls: The beginning of the array of static calls assigned to this hook. * @hook: The callback for the hook. * @lsm: The name of the lsm that owns this hook. */ struct security_hook_list { struct lsm_static_call *scalls; union security_list_options hook; const struct lsm_id *lsmid; } __randomize_layout; /* * Security blob size or offset data. */ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_ib; int lbs_inode; int lbs_sock; int lbs_superblock; int lbs_ipc; int lbs_key; int lbs_msg_msg; int lbs_perf_event; int lbs_task; int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ int lbs_tun_dev; int lbs_bdev; }; /* * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void * LSM hooks (in include/linux/lsm_hook_defs.h). */ #define LSM_RET_VOID ((void) 0) /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes * care of the common case and reduces the amount of * text involved. */ #define LSM_HOOK_INIT(NAME, HOOK) \ { \ .scalls = static_calls_table.NAME, \ .hook = { .NAME = HOOK } \ } extern void security_add_hooks(struct security_hook_list *hooks, int count, const struct lsm_id *lsmid); #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) enum lsm_order { LSM_ORDER_FIRST = -1, /* This is only for capabilities. */ LSM_ORDER_MUTABLE = 0, LSM_ORDER_LAST = 1, /* This is only for integrity. */ }; struct lsm_info { const char *name; /* Required. */ enum lsm_order order; /* Optional: default is LSM_ORDER_MUTABLE */ unsigned long flags; /* Optional: flags describing LSM */ int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */ }; #define DEFINE_LSM(lsm) \ static struct lsm_info __lsm_##lsm \ __used __section(".lsm_info.init") \ __aligned(sizeof(unsigned long)) #define DEFINE_EARLY_LSM(lsm) \ static struct lsm_info __early_lsm_##lsm \ __used __section(".early_lsm_info.init") \ __aligned(sizeof(unsigned long)) /* DO NOT tamper with these variables outside of the LSM framework */ extern char *lsm_names; extern struct lsm_static_calls_table static_calls_table __ro_after_init; extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; /** * lsm_get_xattr_slot - Return the next available slot and increment the index * @xattrs: array storing LSM-provided xattrs * @xattr_count: number of already stored xattrs (updated) * * Retrieve the first available slot in the @xattrs array to fill with an xattr, * and increment @xattr_count. * * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. */ static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, int *xattr_count) { if (unlikely(!xattrs)) return NULL; return &xattrs[(*xattr_count)++]; } #endif /* ! __LINUX_LSM_HOOKS_H */
13 13 2 1 4 15 21 17 37 2 1 8 26 18 6 35 17 14 3 3 3 43 29 15 43 1 1 1 1 58 47 46 6 3 1 1 43 33 11 35 8 57 58 29 1 14 11 4 57 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 // SPDX-License-Identifier: GPL-2.0 #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/exportfs.h> #include <linux/fs_struct.h> #include <linux/fsnotify.h> #include <linux/personality.h> #include <linux/uaccess.h> #include <linux/compat.h> #include "internal.h" #include "mount.h" static long do_sys_name_to_handle(const struct path *path, struct file_handle __user *ufh, void __user *mnt_id, bool unique_mntid, int fh_flags) { long retval; struct file_handle f_handle; int handle_dwords, handle_bytes; struct file_handle *handle = NULL; /* * We need to make sure whether the file system support decoding of * the file handle if decodeable file handle was requested. */ if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags)) return -EOPNOTSUPP; /* * A request to encode a connectable handle for a disconnected dentry * is unexpected since AT_EMPTY_PATH is not allowed. */ if (fh_flags & EXPORT_FH_CONNECTABLE && WARN_ON(path->dentry->d_flags & DCACHE_DISCONNECTED)) return -EINVAL; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) return -EFAULT; if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) return -ENOMEM; /* convert handle size to multiple of sizeof(u32) */ handle_dwords = f_handle.handle_bytes >> 2; /* Encode a possibly decodeable/connectable file handle */ retval = exportfs_encode_fh(path->dentry, (struct fid *)handle->f_handle, &handle_dwords, fh_flags); handle->handle_type = retval; /* convert handle size to bytes */ handle_bytes = handle_dwords * sizeof(u32); handle->handle_bytes = handle_bytes; if ((handle->handle_bytes > f_handle.handle_bytes) || (retval == FILEID_INVALID) || (retval < 0)) { /* As per old exportfs_encode_fh documentation * we could return ENOSPC to indicate overflow * But file system returned 255 always. So handle * both the values */ if (retval == FILEID_INVALID || retval == -ENOSPC) retval = -EOVERFLOW; /* * set the handle size to zero so we copy only * non variable part of the file_handle */ handle_bytes = 0; } else { /* * When asked to encode a connectable file handle, encode this * property in the file handle itself, so that we later know * how to decode it. * For sanity, also encode in the file handle if the encoded * object is a directory and verify this during decode, because * decoding directory file handles is quite different than * decoding connectable non-directory file handles. */ if (fh_flags & EXPORT_FH_CONNECTABLE) { handle->handle_type |= FILEID_IS_CONNECTABLE; if (d_is_dir(path->dentry)) fh_flags |= FILEID_IS_DIR; } retval = 0; } /* copy the mount id */ if (unique_mntid) { if (put_user(real_mount(path->mnt)->mnt_id_unique, (u64 __user *) mnt_id)) retval = -EFAULT; } else { if (put_user(real_mount(path->mnt)->mnt_id, (int __user *) mnt_id)) retval = -EFAULT; } /* copy the handle */ if (retval != -EFAULT && copy_to_user(ufh, handle, struct_size(handle, f_handle, handle_bytes))) retval = -EFAULT; kfree(handle); return retval; } /** * sys_name_to_handle_at: convert name to handle * @dfd: directory relative to which name is interpreted if not absolute * @name: name that should be converted to handle. * @handle: resulting file handle * @mnt_id: mount id of the file system containing the file * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int) * @flag: flag value to indicate whether to follow symlink or not * and whether a decodable file handle is required. * * @handle->handle_size indicate the space available to store the * variable part of the file handle in bytes. If there is not * enough space, the field is updated to return the minimum * value required. */ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, void __user *, mnt_id, int, flag) { struct path path; int lookup_flags; int fh_flags = 0; int err; if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID | AT_HANDLE_MNT_ID_UNIQUE | AT_HANDLE_CONNECTABLE)) return -EINVAL; /* * AT_HANDLE_FID means there is no intention to decode file handle * AT_HANDLE_CONNECTABLE means there is an intention to decode a * connected fd (with known path), so these flags are conflicting. * AT_EMPTY_PATH could be used along with a dfd that refers to a * disconnected non-directory, which cannot be used to encode a * connectable file handle, because its parent is unknown. */ if (flag & AT_HANDLE_CONNECTABLE && flag & (AT_HANDLE_FID | AT_EMPTY_PATH)) return -EINVAL; else if (flag & AT_HANDLE_FID) fh_flags |= EXPORT_FH_FID; else if (flag & AT_HANDLE_CONNECTABLE) fh_flags |= EXPORT_FH_CONNECTABLE; lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; err = user_path_at(dfd, name, lookup_flags, &path); if (!err) { err = do_sys_name_to_handle(&path, handle, mnt_id, flag & AT_HANDLE_MNT_ID_UNIQUE, fh_flags); path_put(&path); } return err; } static int get_path_from_fd(int fd, struct path *root) { if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); *root = fs->pwd; path_get(root); spin_unlock(&fs->lock); } else { CLASS(fd, f)(fd); if (fd_empty(f)) return -EBADF; *root = fd_file(f)->f_path; path_get(root); } return 0; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { struct handle_to_path_ctx *ctx = context; struct user_namespace *user_ns = current_user_ns(); struct dentry *d, *root = ctx->root.dentry; struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt); int retval = 0; if (!root) return 1; /* Old permission model with global CAP_DAC_READ_SEARCH. */ if (!ctx->flags) return 1; /* * It's racy as we're not taking rename_lock but we're able to ignore * permissions and we just need an approximation whether we were able * to follow a path to the file. * * It's also potentially expensive on some filesystems especially if * there is a deep path. */ d = dget(dentry); while (d != root && !IS_ROOT(d)) { struct dentry *parent = dget_parent(d); /* * We know that we have the ability to override DAC permissions * as we've verified this earlier via CAP_DAC_READ_SEARCH. But * we also need to make sure that there aren't any unmapped * inodes in the path that would prevent us from reaching the * file. */ if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(parent))) { dput(d); dput(parent); return retval; } dput(d); d = parent; } if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root) retval = 1; /* * exportfs_decode_fh_raw() does not call acceptable() callback with * a disconnected directory dentry, so we should have reached either * mount fd directory or sb root. */ if (ctx->fh_flags & EXPORT_FH_DIR_ONLY) WARN_ON_ONCE(d != root && d != root->d_sb->s_root); dput(d); return retval; } static int do_handle_to_path(struct file_handle *handle, struct path *path, struct handle_to_path_ctx *ctx) { int handle_dwords; struct vfsmount *mnt = ctx->root.mnt; struct dentry *dentry; /* change the handle size to multiple of sizeof(u32) */ handle_dwords = handle->handle_bytes >> 2; dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle, handle_dwords, handle->handle_type, ctx->fh_flags, vfs_dentry_acceptable, ctx); if (IS_ERR_OR_NULL(dentry)) { if (dentry == ERR_PTR(-ENOMEM)) return -ENOMEM; return -ESTALE; } path->dentry = dentry; path->mnt = mntget(mnt); return 0; } static inline int may_decode_fh(struct handle_to_path_ctx *ctx, unsigned int o_flags) { struct path *root = &ctx->root; if (capable(CAP_DAC_READ_SEARCH)) return 0; /* * Allow relaxed permissions of file handles if the caller has * the ability to mount the filesystem or create a bind-mount of * the provided @mountdirfd. * * In both cases the caller may be able to get an unobstructed * way to the encoded file handle. If the caller is only able to * create a bind-mount we need to verify that there are no * locked mounts on top of it that could prevent us from getting * to the encoded file. * * In principle, locked mounts can prevent the caller from * mounting the filesystem but that only applies to procfs and * sysfs neither of which support decoding file handles. * * Restrict to O_DIRECTORY to provide a deterministic API that * avoids a confusing api in the face of disconnected non-dir * dentries. * * There's only one dentry for each directory inode (VFS rule)... */ if (!(o_flags & O_DIRECTORY)) return -EPERM; if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) ctx->flags = HANDLE_CHECK_PERMS; else if (is_mounted(root->mnt) && ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN) && !has_locked_children(real_mount(root->mnt), root->dentry)) ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; else return -EPERM; /* Are we able to override DAC permissions? */ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) return -EPERM; ctx->fh_flags = EXPORT_FH_DIR_ONLY; return 0; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, struct path *path, unsigned int o_flags) { int retval = 0; struct file_handle f_handle; struct file_handle *handle = NULL; struct handle_to_path_ctx ctx = {}; const struct export_operations *eops; retval = get_path_from_fd(mountdirfd, &ctx.root); if (retval) goto out_err; eops = ctx.root.mnt->mnt_sb->s_export_op; if (eops && eops->permission) retval = eops->permission(&ctx, o_flags); else retval = may_decode_fh(&ctx, o_flags); if (retval) goto out_path; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; goto out_path; } if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || (f_handle.handle_bytes == 0)) { retval = -EINVAL; goto out_path; } if (f_handle.handle_type < 0 || FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) { retval = -EINVAL; goto out_path; } handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { retval = -ENOMEM; goto out_path; } /* copy the full handle */ *handle = f_handle; if (copy_from_user(&handle->f_handle, &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; goto out_handle; } /* * If handle was encoded with AT_HANDLE_CONNECTABLE, verify that we * are decoding an fd with connected path, which is accessible from * the mount fd path. */ if (f_handle.handle_type & FILEID_IS_CONNECTABLE) { ctx.fh_flags |= EXPORT_FH_CONNECTABLE; ctx.flags |= HANDLE_CHECK_SUBTREE; } if (f_handle.handle_type & FILEID_IS_DIR) ctx.fh_flags |= EXPORT_FH_DIR_ONLY; /* Filesystem code should not be exposed to user flags */ handle->handle_type &= ~FILEID_USER_FLAGS_MASK; retval = do_handle_to_path(handle, path, &ctx); out_handle: kfree(handle); out_path: path_put(&ctx.root); out_err: return retval; } static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag) { long retval = 0; struct path path __free(path_put) = {}; struct file *file; const struct export_operations *eops; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) return retval; CLASS(get_unused_fd, fd)(O_CLOEXEC); if (fd < 0) return fd; eops = path.mnt->mnt_sb->s_export_op; if (eops->open) file = eops->open(&path, open_flag); else file = file_open_root(&path, "", open_flag, 0); if (IS_ERR(file)) return PTR_ERR(file); fd_install(fd, file); return take_fd(fd); } /** * sys_open_by_handle_at: Open the file handle * @mountdirfd: directory file descriptor * @handle: file handle to be opened * @flags: open flags. * * @mountdirfd indicate the directory file descriptor * of the mount point. file handle is decoded relative * to the vfsmount pointed by the @mountdirfd. @flags * value is same as the open(2) flags. */ SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, struct file_handle __user *, handle, int, flags) { long ret; if (force_o_largefile()) flags |= O_LARGEFILE; ret = do_handle_open(mountdirfd, handle, flags); return ret; } #ifdef CONFIG_COMPAT /* * Exactly like fs/open.c:sys_open_by_handle_at(), except that it * doesn't set the O_LARGEFILE flag. */ COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, struct file_handle __user *, handle, int, flags) { return do_handle_open(mountdirfd, handle, flags); } #endif
8 8 4 4 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * */ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/nls.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" // clang-format off const struct cpu_str NAME_MFT = { 4, 0, { '$', 'M', 'F', 'T' }, }; const struct cpu_str NAME_MIRROR = { 8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' }, }; const struct cpu_str NAME_LOGFILE = { 8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' }, }; const struct cpu_str NAME_VOLUME = { 7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' }, }; const struct cpu_str NAME_ATTRDEF = { 8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' }, }; const struct cpu_str NAME_ROOT = { 1, 0, { '.' }, }; const struct cpu_str NAME_BITMAP = { 7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' }, }; const struct cpu_str NAME_BOOT = { 5, 0, { '$', 'B', 'o', 'o', 't' }, }; const struct cpu_str NAME_BADCLUS = { 8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' }, }; const struct cpu_str NAME_QUOTA = { 6, 0, { '$', 'Q', 'u', 'o', 't', 'a' }, }; const struct cpu_str NAME_SECURE = { 7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' }, }; const struct cpu_str NAME_UPCASE = { 7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' }, }; const struct cpu_str NAME_EXTEND = { 7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' }, }; const struct cpu_str NAME_OBJID = { 6, 0, { '$', 'O', 'b', 'j', 'I', 'd' }, }; const struct cpu_str NAME_REPARSE = { 8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' }, }; const struct cpu_str NAME_USNJRNL = { 8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' }, }; const __le16 BAD_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'), }; const __le16 I30_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'), }; const __le16 SII_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'), }; const __le16 SDH_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'), }; const __le16 SDS_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'), }; const __le16 SO_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('O'), }; const __le16 SQ_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('Q'), }; const __le16 SR_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('R'), }; #ifdef CONFIG_NTFS3_LZX_XPRESS const __le16 WOF_NAME[17] = { cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'), cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'), cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'), cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'), cpu_to_le16('a'), }; #endif static const __le16 CON_NAME[3] = { cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('N'), }; static const __le16 NUL_NAME[3] = { cpu_to_le16('N'), cpu_to_le16('U'), cpu_to_le16('L'), }; static const __le16 AUX_NAME[3] = { cpu_to_le16('A'), cpu_to_le16('U'), cpu_to_le16('X'), }; static const __le16 PRN_NAME[3] = { cpu_to_le16('P'), cpu_to_le16('R'), cpu_to_le16('N'), }; static const __le16 COM_NAME[3] = { cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('M'), }; static const __le16 LPT_NAME[3] = { cpu_to_le16('L'), cpu_to_le16('P'), cpu_to_le16('T'), }; // clang-format on /* * ntfs_fix_pre_write - Insert fixups into @rhdr before writing to disk. */ bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes) { u16 *fixup, *ptr; u16 sample; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return false; } /* Get fixup pointer. */ fixup = Add2Ptr(rhdr, fo); if (*fixup >= 0x7FFF) *fixup = 1; else *fixup += 1; sample = *fixup; ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); while (fn--) { *++fixup = *ptr; *ptr = sample; ptr += SECTOR_SIZE / sizeof(short); } return true; } /* * ntfs_fix_post_read - Remove fixups after reading from disk. * * Return: < 0 if error, 0 if ok, 1 if need to update fixups. */ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple) { int ret; u16 *fixup, *ptr; u16 sample, fo, fn; fo = le16_to_cpu(rhdr->fix_off); fn = simple ? ((bytes >> SECTOR_SHIFT) + 1) : le16_to_cpu(rhdr->fix_num); /* Check errors. */ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return -E_NTFS_CORRUPT; } /* Get fixup pointer. */ fixup = Add2Ptr(rhdr, fo); sample = *fixup; ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); ret = 0; while (fn--) { /* Test current word. */ if (*ptr != sample) { /* Fixup does not match! Is it serious error? */ ret = -E_NTFS_FIXUP; } /* Replace fixup. */ *ptr = *++fixup; ptr += SECTOR_SIZE / sizeof(short); } return ret; } /* * ntfs_extend_init - Load $Extend file. */ int ntfs_extend_init(struct ntfs_sb_info *sbi) { int err; struct super_block *sb = sbi->sb; struct inode *inode, *inode2; struct MFT_REF ref; if (sbi->volume.major_ver < 3) { ntfs_notice(sb, "Skip $Extend 'cause NTFS version"); return 0; } ref.low = cpu_to_le32(MFT_REC_EXTEND); ref.high = 0; ref.seq = cpu_to_le16(MFT_REC_EXTEND); inode = ntfs_iget5(sb, &ref, &NAME_EXTEND); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Extend (%d).", err); inode = NULL; goto out; } /* If ntfs_iget5() reads from disk it never returns bad inode. */ if (!S_ISDIR(inode->i_mode)) { err = -EINVAL; goto out; } /* Try to find $ObjId */ inode2 = dir_search_u(inode, &NAME_OBJID, NULL); if (inode2 && !IS_ERR(inode2)) { if (is_bad_inode(inode2)) { iput(inode2); } else { sbi->objid.ni = ntfs_i(inode2); sbi->objid_no = inode2->i_ino; } } /* Try to find $Quota */ inode2 = dir_search_u(inode, &NAME_QUOTA, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->quota_no = inode2->i_ino; iput(inode2); } /* Try to find $Reparse */ inode2 = dir_search_u(inode, &NAME_REPARSE, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->reparse.ni = ntfs_i(inode2); sbi->reparse_no = inode2->i_ino; } /* Try to find $UsnJrnl */ inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->usn_jrnl_no = inode2->i_ino; iput(inode2); } err = 0; out: iput(inode); return err; } int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi) { int err = 0; struct super_block *sb = sbi->sb; bool initialized = false; struct MFT_REF ref; struct inode *inode; /* Check for 4GB. */ if (ni->vfs_inode.i_size >= 0x100000000ull) { ntfs_err(sb, "\x24LogFile is large than 4G."); err = -EINVAL; goto out; } sbi->flags |= NTFS_FLAGS_LOG_REPLAYING; ref.low = cpu_to_le32(MFT_REC_MFT); ref.high = 0; ref.seq = cpu_to_le16(1); inode = ntfs_iget5(sb, &ref, NULL); if (IS_ERR(inode)) inode = NULL; if (!inode) { /* Try to use MFT copy. */ u64 t64 = sbi->mft.lbo; sbi->mft.lbo = sbi->mft.lbo2; inode = ntfs_iget5(sb, &ref, NULL); sbi->mft.lbo = t64; if (IS_ERR(inode)) inode = NULL; } if (!inode) { err = -EINVAL; ntfs_err(sb, "Failed to load $MFT."); goto out; } sbi->mft.ni = ntfs_i(inode); /* LogFile should not contains attribute list. */ err = ni_load_all_mi(sbi->mft.ni); if (!err) err = log_replay(ni, &initialized); iput(inode); sbi->mft.ni = NULL; sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) { err = 0; goto out; } if (sb_rdonly(sb) || !initialized) goto out; /* Fill LogFile by '-1' if it is initialized. */ err = ntfs_bio_fill_1(sbi, &ni->file.run); out: sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING; return err; } /* * ntfs_look_for_free_space - Look for a free space in bitmap. */ int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, CLST *new_lcn, CLST *new_len, enum ALLOCATE_OPT opt) { int err; CLST alen; struct super_block *sb = sbi->sb; size_t alcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (opt & ALLOCATE_MFT) { zlen = wnd_zone_len(wnd); if (!zlen) { err = ntfs_refresh_zone(sbi); if (err) goto up_write; zlen = wnd_zone_len(wnd); } if (!zlen) { ntfs_err(sbi->sb, "no free space to extend mft"); err = -ENOSPC; goto up_write; } lcn = wnd_zone_bit(wnd); alen = min_t(CLST, len, zlen); wnd_zone_set(wnd, lcn + alen, zlen - alen); err = wnd_set_used(wnd, lcn, alen); if (err) goto up_write; alcn = lcn; goto space_found; } /* * 'Cause cluster 0 is always used this value means that we should use * cached value of 'next_free_lcn' to improve performance. */ if (!lcn) lcn = sbi->used.next_free_lcn; if (lcn >= wnd->nbits) lcn = 0; alen = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &alcn); if (alen) goto space_found; /* Try to use clusters from MftZone. */ zlen = wnd_zone_len(wnd); zeroes = wnd_zeroes(wnd); /* Check too big request */ if (len > zeroes + zlen || zlen <= NTFS_MIN_MFT_ZONE) { err = -ENOSPC; goto up_write; } /* How many clusters to cat from zone. */ zlcn = wnd_zone_bit(wnd); zlen2 = zlen >> 1; ztrim = clamp_val(len, zlen2, zlen); new_zlen = max_t(size_t, zlen - ztrim, NTFS_MIN_MFT_ZONE); wnd_zone_set(wnd, zlcn, new_zlen); /* Allocate continues clusters. */ alen = wnd_find(wnd, len, 0, BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &alcn); if (!alen) { err = -ENOSPC; goto up_write; } space_found: err = 0; *new_len = alen; *new_lcn = alcn; ntfs_unmap_meta(sb, alcn, alen); /* Set hint for next requests. */ if (!(opt & ALLOCATE_MFT)) sbi->used.next_free_lcn = alcn + alen; up_write: up_write(&wnd->rw_lock); return err; } /* * ntfs_check_for_free_space * * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records */ bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) { size_t free, zlen, avail; struct wnd_bitmap *wnd; wnd = &sbi->used.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); free = wnd_zeroes(wnd); zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); up_read(&wnd->rw_lock); if (free < zlen + clen) return false; avail = free - (zlen + clen); wnd = &sbi->mft.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); free = wnd_zeroes(wnd); zlen = wnd_zone_len(wnd); up_read(&wnd->rw_lock); if (free >= zlen + mlen) return true; return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); } /* * ntfs_extend_mft - Allocate additional MFT records. * * sbi->mft.bitmap is locked for write. * * NOTE: recursive: * ntfs_look_free_mft -> * ntfs_extend_mft -> * attr_set_size -> * ni_insert_nonresident -> * ni_insert_attr -> * ni_ins_attr_ext -> * ntfs_look_free_mft -> * ntfs_extend_mft * * To avoid recursive always allocate space for two new MFT records * see attrib.c: "at least two MFT to avoid recursive loop". */ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->mft.ni; size_t new_mft_total; u64 new_mft_bytes, new_bitmap_bytes; struct ATTRIB *attr; struct wnd_bitmap *wnd = &sbi->mft.bitmap; new_mft_total = ALIGN(wnd->nbits + NTFS_MFT_INCREASE_STEP, 128); new_mft_bytes = (u64)new_mft_total << sbi->record_bits; /* Step 1: Resize $MFT::DATA. */ down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_mft_bytes, NULL, false, &attr); if (err) { up_write(&ni->file.run_lock); goto out; } attr->nres.valid_size = attr->nres.data_size; new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits; ni->mi.dirty = true; /* Step 2: Resize $MFT::BITMAP. */ new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total); err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, new_bitmap_bytes, &new_bitmap_bytes, true, NULL); /* Refresh MFT Zone if necessary. */ down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); ntfs_refresh_zone(sbi); up_write(&sbi->used.bitmap.rw_lock); up_write(&ni->file.run_lock); if (err) goto out; err = wnd_extend(wnd, new_mft_total); if (err) goto out; ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total); err = _ni_write_inode(&ni->vfs_inode, 0); out: return err; } /* * ntfs_look_free_mft - Look for a free MFT record. */ int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi) { int err = 0; size_t zbit, zlen, from, to, fr; size_t mft_total; struct MFT_REF ref; struct super_block *sb = sbi->sb; struct wnd_bitmap *wnd = &sbi->mft.bitmap; u32 ir; static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >= MFT_REC_FREE - MFT_REC_RESERVED); if (!mft) down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); zlen = wnd_zone_len(wnd); /* Always reserve space for MFT. */ if (zlen) { if (mft) { zbit = wnd_zone_bit(wnd); *rno = zbit; wnd_zone_set(wnd, zbit + 1, zlen - 1); } goto found; } /* No MFT zone. Find the nearest to '0' free MFT. */ if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) { /* Resize MFT */ mft_total = wnd->nbits; err = ntfs_extend_mft(sbi); if (!err) { zbit = mft_total; goto reserve_mft; } if (!mft || MFT_REC_FREE == sbi->mft.next_reserved) goto out; err = 0; /* * Look for free record reserved area [11-16) == * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always * marks it as used. */ if (!sbi->mft.reserved_bitmap) { /* Once per session create internal bitmap for 5 bits. */ sbi->mft.reserved_bitmap = 0xFF; ref.high = 0; for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) { struct inode *i; struct ntfs_inode *ni; struct MFT_REC *mrec; ref.low = cpu_to_le32(ir); ref.seq = cpu_to_le16(ir); i = ntfs_iget5(sb, &ref, NULL); if (IS_ERR(i)) { next: ntfs_notice( sb, "Invalid reserved record %x", ref.low); continue; } if (is_bad_inode(i)) { iput(i); goto next; } ni = ntfs_i(i); mrec = ni->mi.mrec; if (!is_rec_base(mrec)) goto next; if (mrec->hard_links) goto next; if (!ni_std(ni)) goto next; if (ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL)) goto next; __clear_bit(ir - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } } /* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */ zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap, MFT_REC_FREE, MFT_REC_RESERVED); if (zbit >= MFT_REC_FREE) { sbi->mft.next_reserved = MFT_REC_FREE; goto out; } zlen = 1; sbi->mft.next_reserved = zbit; } else { reserve_mft: zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4; if (zbit + zlen > wnd->nbits) zlen = wnd->nbits - zbit; while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen)) zlen -= 1; /* [zbit, zbit + zlen) will be used for MFT itself. */ from = sbi->mft.used; if (from < zbit) from = zbit; to = zbit + zlen; if (from < to) { ntfs_clear_mft_tail(sbi, from, to); sbi->mft.used = to; } } if (mft) { *rno = zbit; zbit += 1; zlen -= 1; } wnd_zone_set(wnd, zbit, zlen); found: if (!mft) { /* The request to get record for general purpose. */ if (sbi->mft.next_free < MFT_REC_USER) sbi->mft.next_free = MFT_REC_USER; for (;;) { if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) { } else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) { sbi->mft.next_free = sbi->mft.bitmap.nbits; } else { *rno = fr; sbi->mft.next_free = *rno + 1; break; } err = ntfs_extend_mft(sbi); if (err) goto out; } } if (ni && !ni_add_subrecord(ni, *rno, mi)) { err = -ENOMEM; goto out; } /* We have found a record that are not reserved for next MFT. */ if (*rno >= MFT_REC_FREE) wnd_set_used(wnd, *rno, 1); else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) __set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); out: if (!mft) up_write(&wnd->rw_lock); return err; } /* * ntfs_mark_rec_free - Mark record as free. * is_mft - true if we are changing MFT */ void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft) { struct wnd_bitmap *wnd = &sbi->mft.bitmap; if (!is_mft) down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); if (rno >= wnd->nbits) goto out; if (rno >= MFT_REC_FREE) { if (!wnd_is_used(wnd, rno, 1)) ntfs_set_state(sbi, NTFS_DIRTY_ERROR); else wnd_set_free(wnd, rno, 1); } else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) { __clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } if (rno < wnd_zone_bit(wnd)) wnd_zone_set(wnd, rno, 1); else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER) sbi->mft.next_free = rno; out: if (!is_mft) up_write(&wnd->rw_lock); } /* * ntfs_clear_mft_tail - Format empty records [from, to). * * sbi->mft.bitmap is locked for write. */ int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to) { int err; u32 rs; u64 vbo; struct runs_tree *run; struct ntfs_inode *ni; if (from >= to) return 0; rs = sbi->record_size; ni = sbi->mft.ni; run = &ni->file.run; down_read(&ni->file.run_lock); vbo = (u64)from * rs; for (; from < to; from++, vbo += rs) { struct ntfs_buffers nb; err = ntfs_get_bh(sbi, run, vbo, rs, &nb); if (err) goto out; err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0); nb_put(&nb); if (err) goto out; } out: sbi->mft.used = from; up_read(&ni->file.run_lock); return err; } /* * ntfs_refresh_zone - Refresh MFT zone. * * sbi->used.bitmap is locked for rw. * sbi->mft.bitmap is locked for write. * sbi->mft.ni->file.run_lock for write. */ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) { CLST lcn, vcn, len; size_t lcn_s, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; struct ntfs_inode *ni = sbi->mft.ni; /* Do not change anything unless we have non empty MFT zone. */ if (wnd_zone_len(wnd)) return 0; vcn = bytes_to_cluster(sbi, (u64)sbi->mft.bitmap.nbits << sbi->record_bits); if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL)) lcn = SPARSE_LCN; /* We should always find Last Lcn for MFT. */ if (lcn == SPARSE_LCN) return -EINVAL; lcn_s = lcn + 1; /* Try to allocate clusters after last MFT run. */ zlen = wnd_find(wnd, sbi->zone_max, lcn_s, 0, &lcn_s); wnd_zone_set(wnd, lcn_s, zlen); return 0; } /* * ntfs_update_mftmirr - Update $MFTMirr data. */ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) { int err; struct super_block *sb = sbi->sb; u32 blocksize, bytes; sector_t block1, block2; /* * sb can be NULL here. In this case sbi->flags should be 0 too. */ if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) || unlikely(ntfs3_forced_shutdown(sb))) return; blocksize = sb->s_blocksize; bytes = sbi->mft.recs_mirr << sbi->record_bits; block1 = sbi->mft.lbo >> sb->s_blocksize_bits; block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits; for (; bytes >= blocksize; bytes -= blocksize) { struct buffer_head *bh1, *bh2; bh1 = sb_bread(sb, block1++); if (!bh1) return; bh2 = sb_getblk(sb, block2++); if (!bh2) { put_bh(bh1); return; } if (buffer_locked(bh2)) __wait_on_buffer(bh2); lock_buffer(bh2); memcpy(bh2->b_data, bh1->b_data, blocksize); set_buffer_uptodate(bh2); mark_buffer_dirty(bh2); unlock_buffer(bh2); put_bh(bh1); bh1 = NULL; err = wait ? sync_dirty_buffer(bh2) : 0; put_bh(bh2); if (err) return; } sbi->flags &= ~NTFS_FLAGS_MFTMIRR; } /* * ntfs_bad_inode * * Marks inode as bad and marks fs as 'dirty' */ void ntfs_bad_inode(struct inode *inode, const char *hint) { struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; ntfs_inode_err(inode, "%s", hint); make_bad_inode(inode); /* Avoid recursion if bad inode is $Volume. */ if (inode->i_ino != MFT_REC_VOL && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) { ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } } /* * ntfs_set_state * * Mount: ntfs_set_state(NTFS_DIRTY_DIRTY) * Umount: ntfs_set_state(NTFS_DIRTY_CLEAR) * NTFS error: ntfs_set_state(NTFS_DIRTY_ERROR) */ int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty) { int err; struct ATTRIB *attr; struct VOLUME_INFO *info; struct mft_inode *mi; struct ntfs_inode *ni; __le16 info_flags; /* * Do not change state if fs was real_dirty. * Do not change state if fs already dirty(clear). * Do not change any thing if mounted read only. */ if (sbi->volume.real_dirty || sb_rdonly(sbi->sb)) return 0; /* Check cached value. */ if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) == (sbi->volume.flags & VOLUME_FLAG_DIRTY)) return 0; ni = sbi->volume.ni; if (!ni) return -EINVAL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY); attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi); if (!attr) { err = -EINVAL; goto out; } info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO); if (!info) { err = -EINVAL; goto out; } info_flags = info->flags; switch (dirty) { case NTFS_DIRTY_ERROR: ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors"); sbi->volume.real_dirty = true; fallthrough; case NTFS_DIRTY_DIRTY: info->flags |= VOLUME_FLAG_DIRTY; break; case NTFS_DIRTY_CLEAR: info->flags &= ~VOLUME_FLAG_DIRTY; break; } /* Cache current volume flags. */ if (info_flags != info->flags) { sbi->volume.flags = info->flags; mi->dirty = true; } err = 0; out: ni_unlock(ni); if (err) return err; mark_inode_dirty_sync(&ni->vfs_inode); /* verify(!ntfs_update_mftmirr()); */ /* write mft record on disk. */ err = _ni_write_inode(&ni->vfs_inode, 1); return err; } /* * security_hash - Calculates a hash of security descriptor. */ static inline __le32 security_hash(const void *sd, size_t bytes) { u32 hash = 0; const __le32 *ptr = sd; bytes >>= 2; while (bytes--) hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++); return cpu_to_le32(hash); } /* * simple wrapper for sb_bread_unmovable. */ struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { struct ntfs_sb_info *sbi = sb->s_fs_info; struct buffer_head *bh; if (unlikely(block >= sbi->volume.blocks)) { /* prevent generic message "attempt to access beyond end of device" */ ntfs_err(sb, "try to read out of volume at offset 0x%llx", (u64)block << sb->s_blocksize_bits); return NULL; } bh = sb_bread_unmovable(sb, block); if (bh) return bh; ntfs_err(sb, "failed to read volume at offset 0x%llx", (u64)block << sb->s_blocksize_bits); return NULL; } int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, const void *buf, int wait) { u32 blocksize = sb->s_blocksize; struct block_device *bdev = sb->s_bdev; sector_t block = lbo >> sb->s_blocksize_bits; u32 off = lbo & (blocksize - 1); u32 op = blocksize - off; struct buffer_head *bh; if (!wait && (sb->s_flags & SB_SYNCHRONOUS)) wait = 1; for (; bytes; block += 1, off = 0, op = blocksize) { if (op > bytes) op = bytes; if (op < blocksize) { bh = __bread(bdev, block, blocksize); if (!bh) { ntfs_err(sb, "failed to read block %llx", (u64)block); return -EIO; } } else { bh = __getblk(bdev, block, blocksize); if (!bh) return -ENOMEM; } if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); if (buf) { memcpy(bh->b_data + off, buf, op); buf = Add2Ptr(buf, op); } else { memset(bh->b_data + off, -1, op); } set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); if (wait) { int err = sync_dirty_buffer(bh); if (err) { ntfs_err( sb, "failed to sync buffer at block %llx, error %d", (u64)block, err); put_bh(bh); return err; } } put_bh(bh); bytes -= op; } return 0; } int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, const void *buf, size_t bytes, int sync) { struct super_block *sb = sbi->sb; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next; u64 lbo, len; size_t idx; if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) return -ENOENT; if (lcn == SPARSE_LCN) return -EINVAL; lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; for (;;) { u32 op = min_t(u64, len, bytes); int err = ntfs_sb_write(sb, lbo, op, buf, sync); if (err) return err; bytes -= op; if (!bytes) break; vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) return -ENOENT; if (lcn == SPARSE_LCN) return -EINVAL; if (buf) buf = Add2Ptr(buf, op); lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } return 0; } struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo) { struct super_block *sb = sbi->sb; u8 cluster_bits = sbi->cluster_bits; CLST lcn; u64 lbo; if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL)) return ERR_PTR(-ENOENT); lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask); return ntfs_bread(sb, lbo >> sb->s_blocksize_bits); } int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb) { int err; struct super_block *sb = sbi->sb; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; u32 nbh = 0; CLST vcn_next, vcn = vbo >> cluster_bits; CLST lcn, clen; u64 lbo, len; size_t idx; struct buffer_head *bh; if (!run) { /* First reading of $Volume + $MFTMirr + $LogFile goes here. */ if (vbo > MFT_REC_VOL * sbi->record_size) { err = -ENOENT; goto out; } /* Use absolute boot's 'MFTCluster' to read record. */ lbo = vbo + sbi->mft.lbo; len = sbi->record_size; } else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { err = -ENOENT; goto out; } else { if (lcn == SPARSE_LCN) { err = -EINVAL; goto out; } lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; } off = lbo & (blocksize - 1); if (nb) { nb->off = off; nb->bytes = bytes; } for (;;) { u32 len32 = len >= bytes ? bytes : len; sector_t block = lbo >> sb->s_blocksize_bits; do { u32 op = blocksize - off; if (op > len32) op = len32; bh = ntfs_bread(sb, block); if (!bh) { err = -EIO; goto out; } if (buf) { memcpy(buf, bh->b_data + off, op); buf = Add2Ptr(buf, op); } if (!nb) { put_bh(bh); } else if (nbh >= ARRAY_SIZE(nb->bh)) { err = -EINVAL; goto out; } else { nb->bh[nbh++] = bh; nb->nbufs = nbh; } bytes -= op; if (!bytes) return 0; len32 -= op; block += 1; off = 0; } while (len32); vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } if (lcn == SPARSE_LCN) { err = -EINVAL; goto out; } lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } out: if (!nbh) return err; while (nbh) { put_bh(nb->bh[--nbh]); nb->bh[nbh] = NULL; } nb->nbufs = 0; return err; } /* * ntfs_read_bh * * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups. */ int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, struct ntfs_buffers *nb) { int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb); if (err) return err; return ntfs_fix_post_read(rhdr, nb->bytes, true); } int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u32 bytes, struct ntfs_buffers *nb) { int err = 0; struct super_block *sb = sbi->sb; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; CLST vcn_next, vcn = vbo >> cluster_bits; u32 off; u32 nbh = 0; CLST lcn, clen; u64 lbo, len; size_t idx; nb->bytes = bytes; if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { err = -ENOENT; goto out; } off = vbo & sbi->cluster_mask; lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; nb->off = off = lbo & (blocksize - 1); for (;;) { u32 len32 = min_t(u64, len, bytes); sector_t block = lbo >> sb->s_blocksize_bits; do { u32 op; struct buffer_head *bh; if (nbh >= ARRAY_SIZE(nb->bh)) { err = -EINVAL; goto out; } op = blocksize - off; if (op > len32) op = len32; if (op == blocksize) { bh = sb_getblk(sb, block); if (!bh) { err = -ENOMEM; goto out; } if (buffer_locked(bh)) __wait_on_buffer(bh); set_buffer_uptodate(bh); } else { bh = ntfs_bread(sb, block); if (!bh) { err = -EIO; goto out; } } nb->bh[nbh++] = bh; bytes -= op; if (!bytes) { nb->nbufs = nbh; return 0; } block += 1; len32 -= op; off = 0; } while (len32); vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } out: while (nbh) { put_bh(nb->bh[--nbh]); nb->bh[nbh] = NULL; } nb->nbufs = 0; return err; } int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, struct ntfs_buffers *nb, int sync) { int err = 0; struct super_block *sb = sbi->sb; u32 block_size = sb->s_blocksize; u32 bytes = nb->bytes; u32 off = nb->off; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); u32 idx; __le16 *fixup; __le16 sample; if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return -EINVAL; } for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) { u32 op = block_size - off; char *bh_data; struct buffer_head *bh = nb->bh[idx]; __le16 *ptr, *end_data; if (op > bytes) op = bytes; if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); bh_data = bh->b_data + off; end_data = Add2Ptr(bh_data, op); memcpy(bh_data, rhdr, op); if (!idx) { u16 t16; fixup = Add2Ptr(bh_data, fo); sample = *fixup; t16 = le16_to_cpu(sample); if (t16 >= 0x7FFF) { sample = *fixup = cpu_to_le16(1); } else { sample = cpu_to_le16(t16 + 1); *fixup = sample; } *(__le16 *)Add2Ptr(rhdr, fo) = sample; } ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short)); do { *++fixup = *ptr; *ptr = sample; ptr += SECTOR_SIZE / sizeof(short); } while (ptr < end_data); set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); if (sync) { int err2 = sync_dirty_buffer(bh); if (!err && err2) err = err2; } bytes -= op; rhdr = Add2Ptr(rhdr, op); } return err; } /* * ntfs_bio_pages - Read/write pages from/to disk. */ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, enum req_op op) { int err = 0; struct bio *new, *bio = NULL; struct super_block *sb = sbi->sb; struct block_device *bdev = sb->s_bdev; struct page *page; u8 cluster_bits = sbi->cluster_bits; CLST lcn, clen, vcn, vcn_next; u32 add, off, page_idx; u64 lbo, len; size_t run_idx; struct blk_plug plug; if (!bytes) return 0; blk_start_plug(&plug); /* Align vbo and bytes to be 512 bytes aligned. */ lbo = (vbo + bytes + 511) & ~511ull; vbo = vbo & ~511ull; bytes = lbo - vbo; vcn = vbo >> cluster_bits; if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) { err = -ENOENT; goto out; } off = vbo & sbi->cluster_mask; page_idx = 0; page = pages[0]; for (;;) { lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; new_bio: new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; bio->bi_iter.bi_sector = lbo >> 9; while (len) { off = vbo & (PAGE_SIZE - 1); add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len; if (bio_add_page(bio, page, add, off) < add) goto new_bio; if (bytes <= add) goto out; bytes -= add; vbo += add; if (add + off == PAGE_SIZE) { page_idx += 1; if (WARN_ON(page_idx >= nr_pages)) { err = -EINVAL; goto out; } page = pages[page_idx]; } if (len <= add) break; len -= add; lbo += add; } vcn_next = vcn + clen; if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } off = 0; } out: if (bio) { if (!err) err = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return err; } /* * ntfs_bio_fill_1 - Helper for ntfs_loadlog_and_replay(). * * Fill on-disk logfile range by (-1) * this means empty logfile. */ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run) { int err = 0; struct super_block *sb = sbi->sb; struct block_device *bdev = sb->s_bdev; u8 cluster_bits = sbi->cluster_bits; struct bio *new, *bio = NULL; CLST lcn, clen; u64 lbo, len; size_t run_idx; struct page *fill; void *kaddr; struct blk_plug plug; fill = alloc_page(GFP_KERNEL); if (!fill) return -ENOMEM; kaddr = kmap_atomic(fill); memset(kaddr, -1, PAGE_SIZE); kunmap_atomic(kaddr); flush_dcache_page(fill); lock_page(fill); if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) { err = -ENOENT; goto out; } /* * TODO: Try blkdev_issue_write_same. */ blk_start_plug(&plug); do { lbo = (u64)lcn << cluster_bits; len = (u64)clen << cluster_bits; new_bio: new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; bio->bi_iter.bi_sector = lbo >> 9; for (;;) { u32 add = len > PAGE_SIZE ? PAGE_SIZE : len; if (bio_add_page(bio, fill, add, 0) < add) goto new_bio; lbo += add; if (len <= add) break; len -= add; } } while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen)); if (!err) err = submit_bio_wait(bio); bio_put(bio); blk_finish_plug(&plug); out: unlock_page(fill); put_page(fill); return err; } int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u64 *lbo, u64 *bytes) { u32 off; CLST lcn, len; u8 cluster_bits = sbi->cluster_bits; if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL)) return -ENOENT; off = vbo & sbi->cluster_mask; *lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off); *bytes = ((u64)len << cluster_bits) - off; return 0; } struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, enum RECORD_FLAG flag) { int err = 0; struct super_block *sb = sbi->sb; struct inode *inode = new_inode(sb); struct ntfs_inode *ni; if (!inode) return ERR_PTR(-ENOMEM); ni = ntfs_i(inode); err = mi_format_new(&ni->mi, sbi, rno, flag, false); if (err) goto out; inode->i_ino = rno; if (insert_inode_locked(inode) < 0) { err = -EIO; goto out; } out: if (err) { make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } return ni; } /* * O:BAG:BAD:(A;OICI;FA;;;WD) * Owner S-1-5-32-544 (Administrators) * Group S-1-5-32-544 (Administrators) * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS */ const u8 s_default_security[] __aligned(8) = { 0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, }; static_assert(sizeof(s_default_security) == 0x50); static inline u32 sid_length(const struct SID *sid) { return struct_size(sid, SubAuthority, sid->SubAuthorityCount); } /* * is_acl_valid * * Thanks Mark Harmstone for idea. */ static bool is_acl_valid(const struct ACL *acl, u32 len) { const struct ACE_HEADER *ace; u32 i; u16 ace_count, ace_size; if (acl->AclRevision != ACL_REVISION && acl->AclRevision != ACL_REVISION_DS) { /* * This value should be ACL_REVISION, unless the ACL contains an * object-specific ACE, in which case this value must be ACL_REVISION_DS. * All ACEs in an ACL must be at the same revision level. */ return false; } if (acl->Sbz1) return false; if (le16_to_cpu(acl->AclSize) > len) return false; if (acl->Sbz2) return false; len -= sizeof(struct ACL); ace = (struct ACE_HEADER *)&acl[1]; ace_count = le16_to_cpu(acl->AceCount); for (i = 0; i < ace_count; i++) { if (len < sizeof(struct ACE_HEADER)) return false; ace_size = le16_to_cpu(ace->AceSize); if (len < ace_size) return false; len -= ace_size; ace = Add2Ptr(ace, ace_size); } return true; } bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len) { u32 sd_owner, sd_group, sd_sacl, sd_dacl; if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE)) return false; if (sd->Revision != 1) return false; if (sd->Sbz1) return false; if (!(sd->Control & SE_SELF_RELATIVE)) return false; sd_owner = le32_to_cpu(sd->Owner); if (sd_owner) { const struct SID *owner = Add2Ptr(sd, sd_owner); if (sd_owner + offsetof(struct SID, SubAuthority) > len) return false; if (owner->Revision != 1) return false; if (sd_owner + sid_length(owner) > len) return false; } sd_group = le32_to_cpu(sd->Group); if (sd_group) { const struct SID *group = Add2Ptr(sd, sd_group); if (sd_group + offsetof(struct SID, SubAuthority) > len) return false; if (group->Revision != 1) return false; if (sd_group + sid_length(group) > len) return false; } sd_sacl = le32_to_cpu(sd->Sacl); if (sd_sacl) { const struct ACL *sacl = Add2Ptr(sd, sd_sacl); if (sd_sacl + sizeof(struct ACL) > len) return false; if (!is_acl_valid(sacl, len - sd_sacl)) return false; } sd_dacl = le32_to_cpu(sd->Dacl); if (sd_dacl) { const struct ACL *dacl = Add2Ptr(sd, sd_dacl); if (sd_dacl + sizeof(struct ACL) > len) return false; if (!is_acl_valid(dacl, len - sd_dacl)) return false; } return true; } /* * ntfs_security_init - Load and parse $Secure. */ int ntfs_security_init(struct ntfs_sb_info *sbi) { int err; struct super_block *sb = sbi->sb; struct inode *inode; struct ntfs_inode *ni; struct MFT_REF ref; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; u64 sds_size; size_t off; struct NTFS_DE *ne; struct NTFS_DE_SII *sii_e; struct ntfs_fnd *fnd_sii = NULL; const struct INDEX_ROOT *root_sii; const struct INDEX_ROOT *root_sdh; struct ntfs_index *indx_sdh = &sbi->security.index_sdh; struct ntfs_index *indx_sii = &sbi->security.index_sii; ref.low = cpu_to_le32(MFT_REC_SECURE); ref.high = 0; ref.seq = cpu_to_le16(MFT_REC_SECURE); inode = ntfs_iget5(sb, &ref, &NAME_SECURE); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Secure (%d).", err); inode = NULL; goto out; } ni = ntfs_i(inode); le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME, ARRAY_SIZE(SDH_NAME), NULL, NULL); if (!attr || !(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root_sdh->ihdr.used) > le32_to_cpu(attr->res.data_size)) { ntfs_err(sb, "$Secure::$SDH is corrupted."); err = -EINVAL; goto out; } err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH); if (err) { ntfs_err(sb, "Failed to initialize $Secure::$SDH (%d).", err); goto out; } attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME, ARRAY_SIZE(SII_NAME), NULL, NULL); if (!attr || !(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root_sii->ihdr.used) > le32_to_cpu(attr->res.data_size)) { ntfs_err(sb, "$Secure::$SII is corrupted."); err = -EINVAL; goto out; } err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII); if (err) { ntfs_err(sb, "Failed to initialize $Secure::$SII (%d).", err); goto out; } fnd_sii = fnd_get(); if (!fnd_sii) { err = -ENOMEM; goto out; } sds_size = inode->i_size; /* Find the last valid Id. */ sbi->security.next_id = SECURITY_ID_FIRST; /* Always write new security at the end of bucket. */ sbi->security.next_off = ALIGN(sds_size - SecurityDescriptorsBlockSize, 16); off = 0; ne = NULL; for (;;) { u32 next_id; err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii); if (err || !ne) break; sii_e = (struct NTFS_DE_SII *)ne; if (le16_to_cpu(ne->view.data_size) < sizeof(sii_e->sec_hdr)) continue; next_id = le32_to_cpu(sii_e->sec_id) + 1; if (next_id >= sbi->security.next_id) sbi->security.next_id = next_id; } sbi->security.ni = ni; inode = NULL; out: iput(inode); fnd_put(fnd_sii); return err; } /* * ntfs_get_security_by_id - Read security descriptor by id. */ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, struct SECURITY_DESCRIPTOR_RELATIVE **sd, size_t *size) { int err; int diff; struct ntfs_inode *ni = sbi->security.ni; struct ntfs_index *indx = &sbi->security.index_sii; void *p = NULL; struct NTFS_DE_SII *sii_e; struct ntfs_fnd *fnd_sii; struct SECURITY_HDR d_security; const struct INDEX_ROOT *root_sii; u32 t32; *sd = NULL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); fnd_sii = fnd_get(); if (!fnd_sii) { err = -ENOMEM; goto out; } root_sii = indx_get_root(indx, ni, NULL, NULL); if (!root_sii) { err = -EINVAL; goto out; } /* Try to find this SECURITY descriptor in SII indexes. */ err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id), NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii); if (err) goto out; if (diff) goto out; t32 = le32_to_cpu(sii_e->sec_hdr.size); if (t32 < sizeof(struct SECURITY_HDR)) { err = -EINVAL; goto out; } if (t32 > sizeof(struct SECURITY_HDR) + 0x10000) { /* Looks like too big security. 0x10000 - is arbitrary big number. */ err = -EFBIG; goto out; } *size = t32 - sizeof(struct SECURITY_HDR); p = kmalloc(*size, GFP_NOFS); if (!p) { err = -ENOMEM; goto out; } err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(sii_e->sec_hdr.off), &d_security, sizeof(d_security), NULL); if (err) goto out; if (memcmp(&d_security, &sii_e->sec_hdr, sizeof(d_security))) { err = -EINVAL; goto out; } err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(sii_e->sec_hdr.off) + sizeof(struct SECURITY_HDR), p, *size, NULL); if (err) goto out; *sd = p; p = NULL; out: kfree(p); fnd_put(fnd_sii); ni_unlock(ni); return err; } /* * ntfs_insert_security - Insert security descriptor into $Secure::SDS. * * SECURITY Descriptor Stream data is organized into chunks of 256K bytes * and it contains a mirror copy of each security descriptor. When writing * to a security descriptor at location X, another copy will be written at * location (X+256K). * When writing a security descriptor that will cross the 256K boundary, * the pointer will be advanced by 256K to skip * over the mirror portion. */ int ntfs_insert_security(struct ntfs_sb_info *sbi, const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 size_sd, __le32 *security_id, bool *inserted) { int err, diff; struct ntfs_inode *ni = sbi->security.ni; struct ntfs_index *indx_sdh = &sbi->security.index_sdh; struct ntfs_index *indx_sii = &sbi->security.index_sii; struct NTFS_DE_SDH *e; struct NTFS_DE_SDH sdh_e; struct NTFS_DE_SII sii_e; struct SECURITY_HDR *d_security; u32 new_sec_size = size_sd + sizeof(struct SECURITY_HDR); u32 aligned_sec_size = ALIGN(new_sec_size, 16); struct SECURITY_KEY hash_key; struct ntfs_fnd *fnd_sdh = NULL; const struct INDEX_ROOT *root_sdh; const struct INDEX_ROOT *root_sii; u64 mirr_off, new_sds_size; u32 next, left; static_assert((1 << Log2OfSecurityDescriptorsBlockSize) == SecurityDescriptorsBlockSize); hash_key.hash = security_hash(sd, size_sd); hash_key.sec_id = SECURITY_ID_INVALID; if (inserted) *inserted = false; *security_id = SECURITY_ID_INVALID; /* Allocate a temporal buffer. */ d_security = kzalloc(aligned_sec_size, GFP_NOFS); if (!d_security) return -ENOMEM; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); fnd_sdh = fnd_get(); if (!fnd_sdh) { err = -ENOMEM; goto out; } root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL); if (!root_sdh) { err = -EINVAL; goto out; } root_sii = indx_get_root(indx_sii, ni, NULL, NULL); if (!root_sii) { err = -EINVAL; goto out; } /* * Check if such security already exists. * Use "SDH" and hash -> to get the offset in "SDS". */ err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key), &d_security->key.sec_id, &diff, (struct NTFS_DE **)&e, fnd_sdh); if (err) goto out; while (e) { if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) { err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(e->sec_hdr.off), d_security, new_sec_size, NULL); if (err) goto out; if (le32_to_cpu(d_security->size) == new_sec_size && d_security->key.hash == hash_key.hash && !memcmp(d_security + 1, sd, size_sd)) { /* Such security already exists. */ *security_id = d_security->key.sec_id; err = 0; goto out; } } err = indx_find_sort(indx_sdh, ni, root_sdh, (struct NTFS_DE **)&e, fnd_sdh); if (err) goto out; if (!e || e->key.hash != hash_key.hash) break; } /* Zero unused space. */ next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1); left = SecurityDescriptorsBlockSize - next; /* Zero gap until SecurityDescriptorsBlockSize. */ if (left < new_sec_size) { /* Zero "left" bytes from sbi->security.next_off. */ sbi->security.next_off += SecurityDescriptorsBlockSize + left; } /* Zero tail of previous security. */ //used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1); /* * Example: * 0x40438 == ni->vfs_inode.i_size * 0x00440 == sbi->security.next_off * need to zero [0x438-0x440) * if (next > used) { * u32 tozero = next - used; * zero "tozero" bytes from sbi->security.next_off - tozero */ /* Format new security descriptor. */ d_security->key.hash = hash_key.hash; d_security->key.sec_id = cpu_to_le32(sbi->security.next_id); d_security->off = cpu_to_le64(sbi->security.next_off); d_security->size = cpu_to_le32(new_sec_size); memcpy(d_security + 1, sd, size_sd); /* Write main SDS bucket. */ err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off, d_security, aligned_sec_size, 0); if (err) goto out; mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize; new_sds_size = mirr_off + aligned_sec_size; if (new_sds_size > ni->vfs_inode.i_size) { err = attr_set_size(ni, ATTR_DATA, SDS_NAME, ARRAY_SIZE(SDS_NAME), &ni->file.run, new_sds_size, &new_sds_size, false, NULL); if (err) goto out; } /* Write copy SDS bucket. */ err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security, aligned_sec_size, 0); if (err) goto out; /* Fill SII entry. */ sii_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr)); sii_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sii_e.de.view.res = 0; sii_e.de.size = cpu_to_le16(sizeof(struct NTFS_DE_SII)); sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id)); sii_e.de.flags = 0; sii_e.de.res = 0; sii_e.sec_id = d_security->key.sec_id; memcpy(&sii_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0); if (err) goto out; /* Fill SDH entry. */ sdh_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr)); sdh_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sdh_e.de.view.res = 0; sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY); sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key)); sdh_e.de.flags = 0; sdh_e.de.res = 0; sdh_e.key.hash = d_security->key.hash; sdh_e.key.sec_id = d_security->key.sec_id; memcpy(&sdh_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); sdh_e.magic[0] = cpu_to_le16('I'); sdh_e.magic[1] = cpu_to_le16('I'); fnd_clear(fnd_sdh); err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1, fnd_sdh, 0); if (err) goto out; *security_id = d_security->key.sec_id; if (inserted) *inserted = true; /* Update Id and offset for next descriptor. */ sbi->security.next_id += 1; sbi->security.next_off += aligned_sec_size; out: fnd_put(fnd_sdh); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); kfree(d_security); return err; } /* * ntfs_reparse_init - Load and parse $Extend/$Reparse. */ int ntfs_reparse_init(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; const struct INDEX_ROOT *root_r; if (!ni) return 0; le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME, ARRAY_SIZE(SR_NAME), NULL, NULL); if (!attr) { err = -EINVAL; goto out; } root_r = resident_data(attr); if (root_r->type != ATTR_ZERO || root_r->rule != NTFS_COLLATION_TYPE_UINTS) { err = -EINVAL; goto out; } err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR); if (err) goto out; out: return err; } /* * ntfs_objid_init - Load and parse $Extend/$ObjId. */ int ntfs_objid_init(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->objid.ni; struct ntfs_index *indx = &sbi->objid.index_o; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; const struct INDEX_ROOT *root; if (!ni) return 0; le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME, ARRAY_SIZE(SO_NAME), NULL, NULL); if (!attr) { err = -EINVAL; goto out; } root = resident_data(attr); if (root->type != ATTR_ZERO || root->rule != NTFS_COLLATION_TYPE_UINTS) { err = -EINVAL; goto out; } err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO); if (err) goto out; out: return err; } int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid) { int err; struct ntfs_inode *ni = sbi->objid.ni; struct ntfs_index *indx = &sbi->objid.index_o; if (!ni) return -EINVAL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID); err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref) { int err; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct NTFS_DE_R re; if (!ni) return -EINVAL; memset(&re, 0, sizeof(re)); re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero)); re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R)); re.de.key_size = cpu_to_le16(sizeof(re.key)); re.key.ReparseTag = rtag; memcpy(&re.key.ref, ref, sizeof(*ref)); mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); err = indx_insert_entry(indx, ni, &re.de, NULL, NULL, 0); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref) { int err, diff; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct ntfs_fnd *fnd = NULL; struct REPARSE_KEY rkey; struct NTFS_DE_R *re; struct INDEX_ROOT *root_r; if (!ni) return -EINVAL; rkey.ReparseTag = rtag; rkey.ref = *ref; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); if (rtag) { err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); goto out1; } fnd = fnd_get(); if (!fnd) { err = -ENOMEM; goto out1; } root_r = indx_get_root(indx, ni, NULL, NULL); if (!root_r) { err = -EINVAL; goto out; } /* 1 - forces to ignore rkey.ReparseTag when comparing keys. */ err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff, (struct NTFS_DE **)&re, fnd); if (err) goto out; if (memcmp(&re->key.ref, ref, sizeof(*ref))) { /* Impossible. Looks like volume corrupt? */ goto out; } memcpy(&rkey, &re->key, sizeof(rkey)); fnd_put(fnd); fnd = NULL; err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); if (err) goto out; out: fnd_put(fnd); out1: mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) { ntfs_unmap_meta(sbi->sb, lcn, len); ntfs_discard(sbi, lcn, len); } void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; bool dirty = false; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (!wnd_is_used(wnd, lcn, len)) { /* mark volume as dirty out of wnd->rw_lock */ dirty = true; end = lcn + len; len = 0; for (i = lcn; i < end; i++) { if (wnd_is_used(wnd, i, 1)) { if (!len) lcn = i; len += 1; continue; } if (!len) continue; if (trim) ntfs_unmap_and_discard(sbi, lcn, len); wnd_set_free(wnd, lcn, len); len = 0; } if (!len) goto out; } if (trim) ntfs_unmap_and_discard(sbi, lcn, len); wnd_set_free(wnd, lcn, len); /* append to MFT zone, if possible. */ zone_len = wnd_zone_len(wnd); zlen = min(zone_len + len, sbi->zone_max); if (zlen == zone_len) { /* MFT zone already has maximum size. */ } else if (!zone_len) { /* Create MFT zone only if 'zlen' is large enough. */ if (zlen == sbi->zone_max) wnd_zone_set(wnd, lcn, zlen); } else { CLST zone_lcn = wnd_zone_bit(wnd); if (lcn + len == zone_lcn) { /* Append into head MFT zone. */ wnd_zone_set(wnd, lcn, zlen); } else if (zone_lcn + zone_len == lcn) { /* Append into tail MFT zone. */ wnd_zone_set(wnd, zone_lcn, zlen); } } out: up_write(&wnd->rw_lock); if (dirty) ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* * run_deallocate - Deallocate clusters. */ int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run, bool trim) { CLST lcn, len; size_t idx = 0; while (run_get_entry(run, idx++, NULL, &lcn, &len)) { if (lcn == SPARSE_LCN) continue; mark_as_free_ex(sbi, lcn, len, trim); } return 0; } static inline bool name_has_forbidden_chars(const struct le_str *fname) { int i, ch; /* check for forbidden chars */ for (i = 0; i < fname->len; ++i) { ch = le16_to_cpu(fname->name[i]); /* control chars */ if (ch < 0x20) return true; switch (ch) { /* disallowed by Windows */ case '\\': case '/': case ':': case '*': case '?': case '<': case '>': case '|': case '\"': return true; default: /* allowed char */ break; } } /* file names cannot end with space or . */ if (fname->len > 0) { ch = le16_to_cpu(fname->name[fname->len - 1]); if (ch == ' ' || ch == '.') return true; } return false; } static inline bool is_reserved_name(const struct ntfs_sb_info *sbi, const struct le_str *fname) { int port_digit; const __le16 *name = fname->name; int len = fname->len; const u16 *upcase = sbi->upcase; /* check for 3 chars reserved names (device names) */ /* name by itself or with any extension is forbidden */ if (len == 3 || (len > 3 && le16_to_cpu(name[3]) == '.')) if (!ntfs_cmp_names(name, 3, CON_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, NUL_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, AUX_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, PRN_NAME, 3, upcase, false)) return true; /* check for 4 chars reserved names (port name followed by 1..9) */ /* name by itself or with any extension is forbidden */ if (len == 4 || (len > 4 && le16_to_cpu(name[4]) == '.')) { port_digit = le16_to_cpu(name[3]); if (port_digit >= '1' && port_digit <= '9') if (!ntfs_cmp_names(name, 3, COM_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, LPT_NAME, 3, upcase, false)) return true; } return false; } /* * valid_windows_name - Check if a file name is valid in Windows. */ bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *fname) { return !name_has_forbidden_chars(fname) && !is_reserved_name(sbi, fname); } /* * ntfs_set_label - updates current ntfs label. */ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) { int err; struct ATTRIB *attr; u32 uni_bytes; struct ntfs_inode *ni = sbi->volume.ni; /* Allocate PATH_MAX bytes. */ struct cpu_str *uni = __getname(); if (!uni) return -ENOMEM; err = ntfs_nls_to_utf16(sbi, label, len, uni, (PATH_MAX - 2) / 2, UTF16_LITTLE_ENDIAN); if (err < 0) goto out; uni_bytes = uni->len * sizeof(u16); if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) { ntfs_warn(sbi->sb, "new label is too long"); err = -EFBIG; goto out; } ni_lock(ni); /* Ignore any errors. */ ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL); err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr, NULL, NULL); if (err < 0) goto unlock_out; /* write new label in on-disk struct. */ memcpy(resident_data(attr), uni->name, uni_bytes); /* update cached value of current label. */ if (len >= ARRAY_SIZE(sbi->volume.label)) len = ARRAY_SIZE(sbi->volume.label) - 1; memcpy(sbi->volume.label, label, len); sbi->volume.label[len] = 0; mark_inode_dirty_sync(&ni->vfs_inode); unlock_out: ni_unlock(ni); if (!err) err = _ni_write_inode(&ni->vfs_inode, 0); out: __putname(uni); return err; }
9 9 9 5 10 5 7 2 2 2 1 4 2 2 3 2 1 7 1 3 3 1 5 2 27 1 4 5 5 5 4 32 4 1 2 1 25 12 27 10 40 40 40 40 40 40 39 40 40 40 94 1 90 93 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux I2C core SMBus and SMBus emulation code * * This file contains the SMBus functions which are always included in the I2C * core because they can be emulated via I2C. SMBus specific extensions * (e.g. smbalert) are handled in a separate i2c-smbus module. * * All SMBus-related things are written by Frodo Looijaard <frodol@dds.nl> * SMBus 2.0 support by Mark Studebaker <mdsxyz123@yahoo.com> and * Jean Delvare <jdelvare@suse.de> */ #include <linux/device.h> #include <linux/err.h> #include <linux/i2c.h> #include <linux/i2c-smbus.h> #include <linux/property.h> #include <linux/slab.h> #include "i2c-core.h" #define CREATE_TRACE_POINTS #include <trace/events/smbus.h> /* The SMBus parts */ #define POLY (0x1070U << 3) static u8 crc8(u16 data) { int i; for (i = 0; i < 8; i++) { if (data & 0x8000) data = data ^ POLY; data = data << 1; } return (u8)(data >> 8); } /** * i2c_smbus_pec - Incremental CRC8 over the given input data array * @crc: previous return crc8 value * @p: pointer to data buffer. * @count: number of bytes in data buffer. * * Incremental CRC8 over count bytes in the array pointed to by p */ u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count) { int i; for (i = 0; i < count; i++) crc = crc8((crc ^ p[i]) << 8); return crc; } EXPORT_SYMBOL(i2c_smbus_pec); /* Assume a 7-bit address, which is reasonable for SMBus */ static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg) { /* The address will be sent first */ u8 addr = i2c_8bit_addr_from_msg(msg); pec = i2c_smbus_pec(pec, &addr, 1); /* The data buffer follows */ return i2c_smbus_pec(pec, msg->buf, msg->len); } /* Used for write only transactions */ static inline void i2c_smbus_add_pec(struct i2c_msg *msg) { msg->buf[msg->len] = i2c_smbus_msg_pec(0, msg); msg->len++; } /* Return <0 on CRC error If there was a write before this read (most cases) we need to take the partial CRC from the write part into account. Note that this function does modify the message (we need to decrease the message length to hide the CRC byte from the caller). */ static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg) { u8 rpec = msg->buf[--msg->len]; cpec = i2c_smbus_msg_pec(cpec, msg); if (rpec != cpec) { pr_debug("Bad PEC 0x%02x vs. 0x%02x\n", rpec, cpec); return -EBADMSG; } return 0; } /** * i2c_smbus_read_byte - SMBus "receive byte" protocol * @client: Handle to slave device * * This executes the SMBus "receive byte" protocol, returning negative errno * else the byte received from the device. */ s32 i2c_smbus_read_byte(const struct i2c_client *client) { union i2c_smbus_data data; int status; status = i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &data); return (status < 0) ? status : data.byte; } EXPORT_SYMBOL(i2c_smbus_read_byte); /** * i2c_smbus_write_byte - SMBus "send byte" protocol * @client: Handle to slave device * @value: Byte to be sent * * This executes the SMBus "send byte" protocol, returning negative errno * else zero on success. */ s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value) { return i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL); } EXPORT_SYMBOL(i2c_smbus_write_byte); /** * i2c_smbus_read_byte_data - SMBus "read byte" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * * This executes the SMBus "read byte" protocol, returning negative errno * else a data byte received from the device. */ s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command) { union i2c_smbus_data data; int status; status = i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_READ, command, I2C_SMBUS_BYTE_DATA, &data); return (status < 0) ? status : data.byte; } EXPORT_SYMBOL(i2c_smbus_read_byte_data); /** * i2c_smbus_write_byte_data - SMBus "write byte" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * @value: Byte being written * * This executes the SMBus "write byte" protocol, returning negative errno * else zero on success. */ s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command, u8 value) { union i2c_smbus_data data; data.byte = value; return i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_WRITE, command, I2C_SMBUS_BYTE_DATA, &data); } EXPORT_SYMBOL(i2c_smbus_write_byte_data); /** * i2c_smbus_read_word_data - SMBus "read word" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * * This executes the SMBus "read word" protocol, returning negative errno * else a 16-bit unsigned "word" received from the device. */ s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command) { union i2c_smbus_data data; int status; status = i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_READ, command, I2C_SMBUS_WORD_DATA, &data); return (status < 0) ? status : data.word; } EXPORT_SYMBOL(i2c_smbus_read_word_data); /** * i2c_smbus_write_word_data - SMBus "write word" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * @value: 16-bit "word" being written * * This executes the SMBus "write word" protocol, returning negative errno * else zero on success. */ s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command, u16 value) { union i2c_smbus_data data; data.word = value; return i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_WRITE, command, I2C_SMBUS_WORD_DATA, &data); } EXPORT_SYMBOL(i2c_smbus_write_word_data); /** * i2c_smbus_read_block_data - SMBus "block read" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * @values: Byte array into which data will be read; big enough to hold * the data returned by the slave. SMBus allows at most 32 bytes. * * This executes the SMBus "block read" protocol, returning negative errno * else the number of data bytes in the slave's response. * * Note that using this function requires that the client's adapter support * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality. Not all adapter drivers * support this; its emulation through I2C messaging relies on a specific * mechanism (I2C_M_RECV_LEN) which may not be implemented. */ s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command, u8 *values) { union i2c_smbus_data data; int status; status = i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_READ, command, I2C_SMBUS_BLOCK_DATA, &data); if (status) return status; memcpy(values, &data.block[1], data.block[0]); return data.block[0]; } EXPORT_SYMBOL(i2c_smbus_read_block_data); /** * i2c_smbus_write_block_data - SMBus "block write" protocol * @client: Handle to slave device * @command: Byte interpreted by slave * @length: Size of data block; SMBus allows at most 32 bytes * @values: Byte array which will be written. * * This executes the SMBus "block write" protocol, returning negative errno * else zero on success. */ s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values) { union i2c_smbus_data data; if (length > I2C_SMBUS_BLOCK_MAX) length = I2C_SMBUS_BLOCK_MAX; data.block[0] = length; memcpy(&data.block[1], values, length); return i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_WRITE, command, I2C_SMBUS_BLOCK_DATA, &data); } EXPORT_SYMBOL(i2c_smbus_write_block_data); /* Returns the number of read bytes */ s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, u8 *values) { union i2c_smbus_data data; int status; if (length > I2C_SMBUS_BLOCK_MAX) length = I2C_SMBUS_BLOCK_MAX; data.block[0] = length; status = i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_READ, command, I2C_SMBUS_I2C_BLOCK_DATA, &data); if (status < 0) return status; memcpy(values, &data.block[1], data.block[0]); return data.block[0]; } EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data); s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values) { union i2c_smbus_data data; if (length > I2C_SMBUS_BLOCK_MAX) length = I2C_SMBUS_BLOCK_MAX; data.block[0] = length; memcpy(data.block + 1, values, length); return i2c_smbus_xfer(client->adapter, client->addr, client->flags, I2C_SMBUS_WRITE, command, I2C_SMBUS_I2C_BLOCK_DATA, &data); } EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data); static void i2c_smbus_try_get_dmabuf(struct i2c_msg *msg, u8 init_val) { bool is_read = msg->flags & I2C_M_RD; unsigned char *dma_buf; dma_buf = kzalloc(I2C_SMBUS_BLOCK_MAX + (is_read ? 2 : 3), GFP_KERNEL); if (!dma_buf) return; msg->buf = dma_buf; msg->flags |= I2C_M_DMA_SAFE; if (init_val) msg->buf[0] = init_val; } /* * Simulate a SMBus command using the I2C protocol. * No checking of parameters is done! */ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data) { /* * So we need to generate a series of msgs. In the case of writing, we * need to use only one message; when reading, we need two. We * initialize most things with sane defaults, to keep the code below * somewhat simpler. */ unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3]; unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2]; int nmsgs = read_write == I2C_SMBUS_READ ? 2 : 1; u8 partial_pec = 0; int status; struct i2c_msg msg[2] = { { .addr = addr, .flags = flags, .len = 1, .buf = msgbuf0, }, { .addr = addr, .flags = flags | I2C_M_RD, .len = 0, .buf = msgbuf1, }, }; bool wants_pec = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK && size != I2C_SMBUS_I2C_BLOCK_DATA); msgbuf0[0] = command; switch (size) { case I2C_SMBUS_QUICK: msg[0].len = 0; /* Special case: The read/write field is used as data */ msg[0].flags = flags | (read_write == I2C_SMBUS_READ ? I2C_M_RD : 0); nmsgs = 1; break; case I2C_SMBUS_BYTE: if (read_write == I2C_SMBUS_READ) { /* Special case: only a read! */ msg[0].flags = I2C_M_RD | flags; nmsgs = 1; } break; case I2C_SMBUS_BYTE_DATA: if (read_write == I2C_SMBUS_READ) msg[1].len = 1; else { msg[0].len = 2; msgbuf0[1] = data->byte; } break; case I2C_SMBUS_WORD_DATA: if (read_write == I2C_SMBUS_READ) msg[1].len = 2; else { msg[0].len = 3; msgbuf0[1] = data->word & 0xff; msgbuf0[2] = data->word >> 8; } break; case I2C_SMBUS_PROC_CALL: nmsgs = 2; /* Special case */ read_write = I2C_SMBUS_READ; msg[0].len = 3; msg[1].len = 2; msgbuf0[1] = data->word & 0xff; msgbuf0[2] = data->word >> 8; break; case I2C_SMBUS_BLOCK_DATA: if (read_write == I2C_SMBUS_READ) { msg[1].flags |= I2C_M_RECV_LEN; msg[1].len = 1; /* block length will be added by the underlying bus driver */ i2c_smbus_try_get_dmabuf(&msg[1], 0); } else { msg[0].len = data->block[0] + 2; if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); return -EINVAL; } i2c_smbus_try_get_dmabuf(&msg[0], command); memcpy(msg[0].buf + 1, data->block, msg[0].len - 1); } break; case I2C_SMBUS_BLOCK_PROC_CALL: nmsgs = 2; /* Another special case */ read_write = I2C_SMBUS_READ; if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); return -EINVAL; } msg[0].len = data->block[0] + 2; i2c_smbus_try_get_dmabuf(&msg[0], command); memcpy(msg[0].buf + 1, data->block, msg[0].len - 1); msg[1].flags |= I2C_M_RECV_LEN; msg[1].len = 1; /* block length will be added by the underlying bus driver */ i2c_smbus_try_get_dmabuf(&msg[1], 0); break; case I2C_SMBUS_I2C_BLOCK_DATA: if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block %s size %d\n", read_write == I2C_SMBUS_READ ? "read" : "write", data->block[0]); return -EINVAL; } if (read_write == I2C_SMBUS_READ) { msg[1].len = data->block[0]; i2c_smbus_try_get_dmabuf(&msg[1], 0); } else { msg[0].len = data->block[0] + 1; i2c_smbus_try_get_dmabuf(&msg[0], command); memcpy(msg[0].buf + 1, data->block + 1, data->block[0]); } break; default: dev_err(&adapter->dev, "Unsupported transaction %d\n", size); return -EOPNOTSUPP; } if (wants_pec) { /* Compute PEC if first message is a write */ if (!(msg[0].flags & I2C_M_RD)) { if (nmsgs == 1) /* Write only */ i2c_smbus_add_pec(&msg[0]); else /* Write followed by read */ partial_pec = i2c_smbus_msg_pec(0, &msg[0]); } /* Ask for PEC if last message is a read */ if (msg[nmsgs - 1].flags & I2C_M_RD) msg[nmsgs - 1].len++; } status = __i2c_transfer(adapter, msg, nmsgs); if (status < 0) goto cleanup; if (status != nmsgs) { status = -EIO; goto cleanup; } status = 0; /* Check PEC if last message is a read */ if (wants_pec && (msg[nmsgs - 1].flags & I2C_M_RD)) { status = i2c_smbus_check_pec(partial_pec, &msg[nmsgs - 1]); if (status < 0) goto cleanup; } if (read_write == I2C_SMBUS_READ) switch (size) { case I2C_SMBUS_BYTE: data->byte = msgbuf0[0]; break; case I2C_SMBUS_BYTE_DATA: data->byte = msgbuf1[0]; break; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: data->word = msgbuf1[0] | (msgbuf1[1] << 8); break; case I2C_SMBUS_I2C_BLOCK_DATA: memcpy(data->block + 1, msg[1].buf, data->block[0]); break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: if (msg[1].buf[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block size returned: %d\n", msg[1].buf[0]); status = -EPROTO; goto cleanup; } memcpy(data->block, msg[1].buf, msg[1].buf[0] + 1); break; } cleanup: if (msg[0].flags & I2C_M_DMA_SAFE) kfree(msg[0].buf); if (msg[1].flags & I2C_M_DMA_SAFE) kfree(msg[1].buf); return status; } /** * i2c_smbus_xfer - execute SMBus protocol operations * @adapter: Handle to I2C bus * @addr: Address of SMBus slave on that bus * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC) * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE * @command: Byte interpreted by slave, for protocols which use such bytes * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL * @data: Data to be read or written * * This executes an SMBus protocol operation, and returns a negative * errno code else zero on success. */ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data) { s32 res; res = __i2c_lock_bus_helper(adapter); if (res) return res; res = __i2c_smbus_xfer(adapter, addr, flags, read_write, command, protocol, data); i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT); return res; } EXPORT_SYMBOL(i2c_smbus_xfer); s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data) { int (*xfer_func)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); unsigned long orig_jiffies; int try; s32 res; res = __i2c_check_suspended(adapter); if (res) return res; /* If enabled, the following two tracepoints are conditional on * read_write and protocol. */ trace_smbus_write(adapter, addr, flags, read_write, command, protocol, data); trace_smbus_read(adapter, addr, flags, read_write, command, protocol); flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB; xfer_func = adapter->algo->smbus_xfer; if (i2c_in_atomic_xfer_mode()) { if (adapter->algo->smbus_xfer_atomic) xfer_func = adapter->algo->smbus_xfer_atomic; else if (adapter->algo->master_xfer_atomic) xfer_func = NULL; /* fallback to I2C emulation */ } if (xfer_func) { /* Retry automatically on arbitration loss */ orig_jiffies = jiffies; for (res = 0, try = 0; try <= adapter->retries; try++) { res = xfer_func(adapter, addr, flags, read_write, command, protocol, data); if (res != -EAGAIN) break; if (time_after(jiffies, orig_jiffies + adapter->timeout)) break; } if (res != -EOPNOTSUPP || !adapter->algo->master_xfer) goto trace; /* * Fall back to i2c_smbus_xfer_emulated if the adapter doesn't * implement native support for the SMBus operation. */ } res = i2c_smbus_xfer_emulated(adapter, addr, flags, read_write, command, protocol, data); trace: /* If enabled, the reply tracepoint is conditional on read_write. */ trace_smbus_reply(adapter, addr, flags, read_write, command, protocol, data, res); trace_smbus_result(adapter, addr, flags, read_write, command, protocol, res); return res; } EXPORT_SYMBOL(__i2c_smbus_xfer); /** * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate * @client: Handle to slave device * @command: Byte interpreted by slave * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes * @values: Byte array into which data will be read; big enough to hold * the data returned by the slave. SMBus allows at most * I2C_SMBUS_BLOCK_MAX bytes. * * This executes the SMBus "block read" protocol if supported by the adapter. * If block read is not supported, it emulates it using either word or byte * read protocols depending on availability. * * The addresses of the I2C slave device that are accessed with this function * must be mapped to a linear region, so that a block read will have the same * effect as a byte read. Before using this function you must double-check * if the I2C slave does support exchanging a block transfer with a byte * transfer. */ s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 command, u8 length, u8 *values) { u8 i = 0; int status; if (length > I2C_SMBUS_BLOCK_MAX) length = I2C_SMBUS_BLOCK_MAX; if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) return i2c_smbus_read_i2c_block_data(client, command, length, values); if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) return -EOPNOTSUPP; if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) { while ((i + 2) <= length) { status = i2c_smbus_read_word_data(client, command + i); if (status < 0) return status; values[i] = status & 0xff; values[i + 1] = status >> 8; i += 2; } } while (i < length) { status = i2c_smbus_read_byte_data(client, command + i); if (status < 0) return status; values[i] = status; i++; } return i; } EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated); /** * i2c_new_smbus_alert_device - get ara client for SMBus alert support * @adapter: the target adapter * @setup: setup data for the SMBus alert handler * Context: can sleep * * Setup handling of the SMBus alert protocol on a given I2C bus segment. * * Handling can be done either through our IRQ handler, or by the * adapter (from its handler, periodic polling, or whatever). * * This returns the ara client, which should be saved for later use with * i2c_handle_smbus_alert() and ultimately i2c_unregister_device(); or an * ERRPTR to indicate an error. */ struct i2c_client *i2c_new_smbus_alert_device(struct i2c_adapter *adapter, struct i2c_smbus_alert_setup *setup) { struct i2c_board_info ara_board_info = { I2C_BOARD_INFO("smbus_alert", 0x0c), .platform_data = setup, }; return i2c_new_client_device(adapter, &ara_board_info); } EXPORT_SYMBOL_GPL(i2c_new_smbus_alert_device); #if IS_ENABLED(CONFIG_I2C_SMBUS) int i2c_setup_smbus_alert(struct i2c_adapter *adapter) { struct device *parent = adapter->dev.parent; int irq; /* Adapter instantiated without parent, skip the SMBus alert setup */ if (!parent) return 0; /* Report serious errors */ irq = device_property_match_string(parent, "interrupt-names", "smbus_alert"); if (irq < 0 && irq != -EINVAL && irq != -ENODATA) return irq; /* Skip setup when no irq was found */ if (irq < 0 && !device_property_present(parent, "smbalert-gpios")) return 0; return PTR_ERR_OR_ZERO(i2c_new_smbus_alert_device(adapter, NULL)); } #endif
7 2 316 510 260 276 26 272 510 316 60 40 65 40 114 69 69 69 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HIGHMEM_H #define _LINUX_HIGHMEM_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> #include <linux/cacheflush.h> #include <linux/kmsan.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include "highmem-internal.h" /** * kmap - Map a page for long term usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can only be invoked from preemptible task context because on 32bit * systems with CONFIG_HIGHMEM enabled this function might sleep. * * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area * this returns the virtual address of the direct kernel mapping. * * The returned virtual address is globally visible and valid up to the * point where it is unmapped via kunmap(). The pointer can be handed to * other contexts. * * For highmem pages on 32bit systems this can be slow as the mapping space * is limited and protected by a global lock. In case that there is no * mapping slot available the function blocks until a slot is released via * kunmap(). */ static inline void *kmap(struct page *page); /** * kunmap - Unmap the virtual address mapped by kmap() * @page: Pointer to the page which was mapped by kmap() * * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of * pages in the low memory area. */ static inline void kunmap(struct page *page); /** * kmap_to_page - Get the page for a kmap'ed address * @addr: The address to look up * * Returns: The page which is mapped to @addr. */ static inline struct page *kmap_to_page(void *addr); /** * kmap_flush_unused - Flush all unused kmap mappings in order to * remove stray mappings */ static inline void kmap_flush_unused(void); /** * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can be invoked from any context, including interrupts. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * * addr1 = kmap_local_page(page1); * addr2 = kmap_local_page(page2); * ... * kunmap_local(addr2); * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * * Contrary to kmap() mappings the mapping is only valid in the context of * the caller and cannot be handed to other contexts. * * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * * While kmap_local_page() is significantly faster than kmap() for the highmem * case it comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_page() can rely on this side effect. */ static inline void *kmap_local_page(struct page *page); /** * kmap_local_folio - Map a page in this folio for temporary usage * @folio: The folio containing the page. * @offset: The byte offset within the folio which identifies the page. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation:: * * addr1 = kmap_local_folio(folio1, offset1); * addr2 = kmap_local_folio(folio2, offset2); * ... * kunmap_local(addr2); * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * * Contrary to kmap() mappings the mapping is only valid in the context of * the caller and cannot be handed to other contexts. * * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * * While it is significantly faster than kmap() for the highmem case it * comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_folio() can rely on this side effect. * * Context: Can be invoked from any context. * Return: The virtual address of @offset. */ static inline void *kmap_local_folio(struct folio *folio, size_t offset); /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * In fact a wrapper around kmap_local_page() which also disables pagefaults * and, depending on PREEMPT_RT configuration, also CPU migration and * preemption. Therefore users should not count on the latter two side effects. * * Mappings should always be released by kunmap_atomic(). * * Do not use in new code. Use kmap_local_page() instead. * * It is used in atomic context when code wants to access the contents of a * page that might be allocated from high memory (see __GFP_HIGHMEM), for * example a page in the pagecache. The API has two functions, and they * can be used in a manner similar to the following:: * * // Find the page of interest. * struct page *page = find_get_page(mapping, offset); * * // Gain access to the contents of that page. * void *vaddr = kmap_atomic(page); * * // Do something to the contents of that page. * memset(vaddr, 0, PAGE_SIZE); * * // Unmap that page. * kunmap_atomic(vaddr); * * Note that the kunmap_atomic() call takes the result of the kmap_atomic() * call, not the argument. * * If you need to map two pages because you want to copy from one page to * another you need to keep the kmap_atomic calls strictly nested, like: * * vaddr1 = kmap_atomic(page1); * vaddr2 = kmap_atomic(page2); * * memcpy(vaddr1, vaddr2, PAGE_SIZE); * * kunmap_atomic(vaddr2); * kunmap_atomic(vaddr1); */ static inline void *kmap_atomic(struct page *page); /* Highmem related interfaces for management code */ static inline unsigned long nr_free_highpages(void); static inline unsigned long totalhigh_pages(void); #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } #endif #ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { } #endif /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_local_page(page); clear_user_page(addr, vaddr, page); kunmap_local(addr); } #endif #ifndef vma_alloc_zeroed_movable_folio /** * vma_alloc_zeroed_movable_folio - Allocate a zeroed page for a VMA. * @vma: The VMA the page is to be allocated for. * @vaddr: The virtual address the page will be inserted into. * * This function will allocate a page suitable for inserting into this * VMA at this virtual address. It may be allocated from highmem or * the movable zone. An architecture may provide its own implementation. * * Return: A folio containing one allocated and zeroed page or NULL if * we are out of memory. */ static inline struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { struct folio *folio; folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr); if (folio && user_alloc_needs_zeroing()) clear_user_highpage(&folio->page, vaddr); return folio; } #endif static inline void clear_highpage(struct page *page) { void *kaddr = kmap_local_page(page); clear_page(kaddr); kunmap_local(kaddr); } static inline void clear_highpage_kasan_tagged(struct page *page) { void *kaddr = kmap_local_page(page); clear_page(kasan_reset_tag(kaddr)); kunmap_local(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE static inline void tag_clear_highpage(struct page *page) { } #endif /* * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ #ifdef CONFIG_HIGHMEM void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); #else static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { void *kaddr = kmap_local_page(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); kunmap_local(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } #endif static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) { zero_user_segments(page, start, end, 0, 0); } static inline void zero_user(struct page *page, unsigned start, unsigned size) { zero_user_segments(page, start, start + size, 0, 0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); } #endif #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_page(vto, vfrom); kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); } #endif #ifdef copy_mc_to_kernel /* * If architecture supports machine check exception handling, define the * #MC versions of copy_user_highpage and copy_highpage. They copy a memory * page with #MC in source page (@from) handled, and return the number * of bytes not copied if there was a #MC, otherwise 0 for success. */ static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { unsigned long ret; char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); if (!ret) kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); if (ret) memory_failure_queue(page_to_pfn(from), 0); return ret; } static inline int copy_mc_highpage(struct page *to, struct page *from) { unsigned long ret; char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); if (!ret) kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); if (ret) memory_failure_queue(page_to_pfn(from), 0); return ret; } #else static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { copy_user_highpage(to, from, vaddr, vma); return 0; } static inline int copy_mc_highpage(struct page *to, struct page *from) { copy_highpage(to, from); return 0; } #endif static inline void memcpy_page(struct page *dst_page, size_t dst_off, struct page *src_page, size_t src_off, size_t len) { char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memcpy(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); } static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { char *addr = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, val, len); kunmap_local(addr); } static inline void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) { char *from = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to, from + offset, len); kunmap_local(from); } static inline void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) { char *to = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); flush_dcache_page(page); kunmap_local(to); } static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, 0, len); flush_dcache_page(page); kunmap_local(addr); } /** * memcpy_from_folio - Copy a range of bytes from a folio. * @to: The memory to copy to. * @folio: The folio to read from. * @offset: The first byte in the folio to read. * @len: The number of bytes to copy. */ static inline void memcpy_from_folio(char *to, struct folio *folio, size_t offset, size_t len) { VM_BUG_ON(offset + len > folio_size(folio)); do { const char *from = kmap_local_folio(folio, offset); size_t chunk = len; if (folio_test_highmem(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); kunmap_local(from); to += chunk; offset += chunk; len -= chunk; } while (len > 0); } /** * memcpy_to_folio - Copy a range of bytes to a folio. * @folio: The folio to write to. * @offset: The first byte in the folio to store to. * @from: The memory to copy from. * @len: The number of bytes to copy. */ static inline void memcpy_to_folio(struct folio *folio, size_t offset, const char *from, size_t len) { VM_BUG_ON(offset + len > folio_size(folio)); do { char *to = kmap_local_folio(folio, offset); size_t chunk = len; if (folio_test_highmem(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); kunmap_local(to); from += chunk; offset += chunk; len -= chunk; } while (len > 0); flush_dcache_folio(folio); } /** * folio_zero_tail - Zero the tail of a folio. * @folio: The folio to zero. * @offset: The byte offset in the folio to start zeroing at. * @kaddr: The address the folio is currently mapped to. * * If you have already used kmap_local_folio() to map a folio, written * some data to it and now need to zero the end of the folio (and flush * the dcache), you can use this function. If you do not have the * folio kmapped (eg the folio has been partially populated by DMA), * use folio_zero_range() or folio_zero_segment() instead. * * Return: An address which can be passed to kunmap_local(). */ static inline __must_check void *folio_zero_tail(struct folio *folio, size_t offset, void *kaddr) { size_t len = folio_size(folio) - offset; if (folio_test_highmem(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { memset(kaddr, 0, max); kunmap_local(kaddr); len -= max; offset += max; max = PAGE_SIZE; kaddr = kmap_local_folio(folio, offset); } } memset(kaddr, 0, len); flush_dcache_folio(folio); return kaddr; } /** * folio_fill_tail - Copy some data to a folio and pad with zeroes. * @folio: The destination folio. * @offset: The offset into @folio at which to start copying. * @from: The data to copy. * @len: How many bytes of data to copy. * * This function is most useful for filesystems which support inline data. * When they want to copy data from the inode into the page cache, this * function does everything for them. It supports large folios even on * HIGHMEM configurations. */ static inline void folio_fill_tail(struct folio *folio, size_t offset, const char *from, size_t len) { char *to = kmap_local_folio(folio, offset); VM_BUG_ON(offset + len > folio_size(folio)); if (folio_test_highmem(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { memcpy(to, from, max); kunmap_local(to); len -= max; from += max; offset += max; max = PAGE_SIZE; to = kmap_local_folio(folio, offset); } } memcpy(to, from, len); to = folio_zero_tail(folio, offset + len, to + len); kunmap_local(to); } /** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. * @folio: The folio to copy from. * @pos: The position in the file. * @len: The maximum number of bytes to copy. * * Copy up to @len bytes from this folio. This may be limited by PAGE_SIZE * if the folio comes from HIGHMEM, and by the size of the folio. * * Return: The number of bytes copied from the folio. */ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, loff_t pos, size_t len) { size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); if (folio_test_highmem(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else len = min(len, folio_size(folio) - offset); memcpy(to, from, len); kunmap_local(from); return len; } /** * folio_zero_segments() - Zero two byte ranges in a folio. * @folio: The folio to write to. * @start1: The first byte to zero. * @xend1: One more than the last byte in the first range. * @start2: The first byte to zero in the second range. * @xend2: One more than the last byte in the second range. */ static inline void folio_zero_segments(struct folio *folio, size_t start1, size_t xend1, size_t start2, size_t xend2) { zero_user_segments(&folio->page, start1, xend1, start2, xend2); } /** * folio_zero_segment() - Zero a byte range in a folio. * @folio: The folio to write to. * @start: The first byte to zero. * @xend: One more than the last byte to zero. */ static inline void folio_zero_segment(struct folio *folio, size_t start, size_t xend) { zero_user_segments(&folio->page, start, xend, 0, 0); } /** * folio_zero_range() - Zero a byte range in a folio. * @folio: The folio to write to. * @start: The first byte to zero. * @length: The number of bytes to zero. */ static inline void folio_zero_range(struct folio *folio, size_t start, size_t length) { zero_user_segments(&folio->page, start, start + length, 0, 0); } /** * folio_release_kmap - Unmap a folio and drop a refcount. * @folio: The folio to release. * @addr: The address previously returned by a call to kmap_local_folio(). * * It is common, eg in directory handling to kmap a folio. This function * unmaps the folio and drops the refcount that was being held to keep the * folio alive while we accessed it. */ static inline void folio_release_kmap(struct folio *folio, void *addr) { kunmap_local(addr); folio_put(folio); } static inline void unmap_and_put_page(struct page *page, void *addr) { folio_release_kmap(page_folio(page), addr); } #endif /* _LINUX_HIGHMEM_H */
5 15 5944 3282 3294 6 470 6 14 3117 52 6 3724 3159 3159 4968 7 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 // SPDX-License-Identifier: GPL-2.0 /* * device.h - generic, centralized driver model * * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2008-2009 Novell Inc. * * See Documentation/driver-api/driver-model/ for more information. */ #ifndef _DEVICE_H_ #define _DEVICE_H_ #include <linux/dev_printk.h> #include <linux/energy_model.h> #include <linux/ioport.h> #include <linux/kobject.h> #include <linux/klist.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/compiler.h> #include <linux/types.h> #include <linux/mutex.h> #include <linux/pm.h> #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/gfp.h> #include <linux/device/bus.h> #include <linux/device/class.h> #include <linux/device/devres.h> #include <linux/device/driver.h> #include <linux/cleanup.h> #include <asm/device.h> struct device; struct device_private; struct device_driver; struct driver_private; struct module; struct class; struct subsys_private; struct device_node; struct fwnode_handle; struct iommu_group; struct dev_pin_info; struct dev_iommu; struct msi_device_data; /** * struct subsys_interface - interfaces to device functions * @name: name of the device function * @subsys: subsystem of the devices to attach to * @node: the list of functions registered at the subsystem * @add_dev: device hookup to device function handler * @remove_dev: device hookup to device function handler * * Simple interfaces attached to a subsystem. Multiple interfaces can * attach to a subsystem and its devices. Unlike drivers, they do not * exclusively claim or control devices. Interfaces usually represent * a specific functionality of a subsystem/class of devices. */ struct subsys_interface { const char *name; const struct bus_type *subsys; struct list_head node; int (*add_dev)(struct device *dev, struct subsys_interface *sif); void (*remove_dev)(struct device *dev, struct subsys_interface *sif); }; int subsys_interface_register(struct subsys_interface *sif); void subsys_interface_unregister(struct subsys_interface *sif); int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups); int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups); /* * The type of device, "struct device" is embedded in. A class * or bus can contain devices of different types * like "partitions" and "disks", "mouse" and "event". * This identifies the device type and carries type-specific * information, equivalent to the kobj_type of a kobject. * If "name" is specified, the uevent will contain it in * the DEVTYPE variable. */ struct device_type { const char *name; const struct attribute_group **groups; int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); void (*release)(struct device *dev); const struct dev_pm_ops *pm; }; /** * struct device_attribute - Interface for exporting device attributes. * @attr: sysfs attribute definition. * @show: Show handler. * @store: Store handler. */ struct device_attribute { struct attribute attr; ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); ssize_t (*store)(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); }; /** * struct dev_ext_attribute - Exported device attribute with extra context. * @attr: Exported device attribute. * @var: Pointer to context. */ struct dev_ext_attribute { struct device_attribute attr; void *var; }; ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr, char *buf); ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t device_show_int(struct device *dev, struct device_attribute *attr, char *buf); ssize_t device_store_int(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, char *buf); ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t device_show_string(struct device *dev, struct device_attribute *attr, char *buf); /** * DEVICE_ATTR - Define a device attribute. * @_name: Attribute name. * @_mode: File mode. * @_show: Show handler. Optional, but mandatory if attribute is readable. * @_store: Store handler. Optional, but mandatory if attribute is writable. * * Convenience macro for defining a struct device_attribute. * * For example, ``DEVICE_ATTR(foo, 0644, foo_show, foo_store);`` expands to: * * .. code-block:: c * * struct device_attribute dev_attr_foo = { * .attr = { .name = "foo", .mode = 0644 }, * .show = foo_show, * .store = foo_store, * }; */ #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) /** * DEVICE_ATTR_PREALLOC - Define a preallocated device attribute. * @_name: Attribute name. * @_mode: File mode. * @_show: Show handler. Optional, but mandatory if attribute is readable. * @_store: Store handler. Optional, but mandatory if attribute is writable. * * Like DEVICE_ATTR(), but ``SYSFS_PREALLOC`` is set on @_mode. */ #define DEVICE_ATTR_PREALLOC(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ __ATTR_PREALLOC(_name, _mode, _show, _store) /** * DEVICE_ATTR_RW - Define a read-write device attribute. * @_name: Attribute name. * * Like DEVICE_ATTR(), but @_mode is 0644, @_show is <_name>_show, * and @_store is <_name>_store. */ #define DEVICE_ATTR_RW(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RW(_name) /** * DEVICE_ATTR_ADMIN_RW - Define an admin-only read-write device attribute. * @_name: Attribute name. * * Like DEVICE_ATTR_RW(), but @_mode is 0600. */ #define DEVICE_ATTR_ADMIN_RW(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600) /** * DEVICE_ATTR_RO - Define a readable device attribute. * @_name: Attribute name. * * Like DEVICE_ATTR(), but @_mode is 0444 and @_show is <_name>_show. */ #define DEVICE_ATTR_RO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RO(_name) /** * DEVICE_ATTR_ADMIN_RO - Define an admin-only readable device attribute. * @_name: Attribute name. * * Like DEVICE_ATTR_RO(), but @_mode is 0400. */ #define DEVICE_ATTR_ADMIN_RO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400) /** * DEVICE_ATTR_WO - Define an admin-only writable device attribute. * @_name: Attribute name. * * Like DEVICE_ATTR(), but @_mode is 0200 and @_store is <_name>_store. */ #define DEVICE_ATTR_WO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_WO(_name) /** * DEVICE_ULONG_ATTR - Define a device attribute backed by an unsigned long. * @_name: Attribute name. * @_mode: File mode. * @_var: Identifier of unsigned long. * * Like DEVICE_ATTR(), but @_show and @_store are automatically provided * such that reads and writes to the attribute from userspace affect @_var. */ #define DEVICE_ULONG_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } /** * DEVICE_INT_ATTR - Define a device attribute backed by an int. * @_name: Attribute name. * @_mode: File mode. * @_var: Identifier of int. * * Like DEVICE_ULONG_ATTR(), but @_var is an int. */ #define DEVICE_INT_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) } /** * DEVICE_BOOL_ATTR - Define a device attribute backed by a bool. * @_name: Attribute name. * @_mode: File mode. * @_var: Identifier of bool. * * Like DEVICE_ULONG_ATTR(), but @_var is a bool. */ #define DEVICE_BOOL_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) } /** * DEVICE_STRING_ATTR_RO - Define a device attribute backed by a r/o string. * @_name: Attribute name. * @_mode: File mode. * @_var: Identifier of string. * * Like DEVICE_ULONG_ATTR(), but @_var is a string. Because the length of the * string allocation is unknown, the attribute must be read-only. */ #define DEVICE_STRING_ATTR_RO(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, (_mode) & ~0222, device_show_string, NULL), (_var) } #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) int device_create_file(struct device *device, const struct device_attribute *entry); void device_remove_file(struct device *dev, const struct device_attribute *attr); bool device_remove_file_self(struct device *dev, const struct device_attribute *attr); int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); /* allows to add/remove a custom action to devres stack */ int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); /** * devm_remove_action() - removes previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Removes instance of @action previously added by devm_add_action(). * Both action and data should match one of the existing entries. */ static inline void devm_remove_action(struct device *dev, void (*action)(void *), void *data) { WARN_ON(devm_remove_action_nowarn(dev, action, data)); } void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); #define devm_add_action(dev, action, data) \ __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) { int ret; ret = __devm_add_action(dev, action, data, name); if (ret) action(data); return ret; } #define devm_add_action_or_reset(dev, action, data) \ __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for * @type: Type to allocate per-cpu memory for * * Managed alloc_percpu. Per-cpu memory allocated with this function is * automatically freed on driver detach. * * RETURNS: * Pointer to allocated memory on success, NULL on failure. */ #define devm_alloc_percpu(dev, type) \ ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \ __alignof__(type))) void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, size_t align); void devm_free_percpu(struct device *dev, void __percpu *pdata); struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about * sg limitations. */ unsigned int max_segment_size; unsigned int min_align_mask; unsigned long segment_boundary_mask; }; /** * enum device_link_state - Device link states. * @DL_STATE_NONE: The presence of the drivers is not being tracked. * @DL_STATE_DORMANT: None of the supplier/consumer drivers is present. * @DL_STATE_AVAILABLE: The supplier driver is present, but the consumer is not. * @DL_STATE_CONSUMER_PROBE: The consumer is probing (supplier driver present). * @DL_STATE_ACTIVE: Both the supplier and consumer drivers are present. * @DL_STATE_SUPPLIER_UNBIND: The supplier driver is unbinding. */ enum device_link_state { DL_STATE_NONE = -1, DL_STATE_DORMANT = 0, DL_STATE_AVAILABLE, DL_STATE_CONSUMER_PROBE, DL_STATE_ACTIVE, DL_STATE_SUPPLIER_UNBIND, }; /* * Device link flags. * * STATELESS: The core will not remove this link automatically. * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind. * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. * MANAGED: The core tracks presence of supplier/consumer drivers (internal). * SYNC_STATE_ONLY: Link only affects sync_state() behavior. * INFERRED: Inferred from data (eg: firmware) and not from driver actions. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) #define DL_FLAG_PM_RUNTIME BIT(2) #define DL_FLAG_RPM_ACTIVE BIT(3) #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) #define DL_FLAG_AUTOPROBE_CONSUMER BIT(5) #define DL_FLAG_MANAGED BIT(6) #define DL_FLAG_SYNC_STATE_ONLY BIT(7) #define DL_FLAG_INFERRED BIT(8) #define DL_FLAG_CYCLE BIT(9) /** * enum dl_dev_state - Device driver presence tracking information. * @DL_DEV_NO_DRIVER: There is no driver attached to the device. * @DL_DEV_PROBING: A driver is probing. * @DL_DEV_DRIVER_BOUND: The driver has been bound to the device. * @DL_DEV_UNBINDING: The driver is unbinding from the device. */ enum dl_dev_state { DL_DEV_NO_DRIVER = 0, DL_DEV_PROBING, DL_DEV_DRIVER_BOUND, DL_DEV_UNBINDING, }; /** * enum device_removable - Whether the device is removable. The criteria for a * device to be classified as removable is determined by its subsystem or bus. * @DEVICE_REMOVABLE_NOT_SUPPORTED: This attribute is not supported for this * device (default). * @DEVICE_REMOVABLE_UNKNOWN: Device location is Unknown. * @DEVICE_FIXED: Device is not removable by the user. * @DEVICE_REMOVABLE: Device is removable by the user. */ enum device_removable { DEVICE_REMOVABLE_NOT_SUPPORTED = 0, /* must be 0 */ DEVICE_REMOVABLE_UNKNOWN, DEVICE_FIXED, DEVICE_REMOVABLE, }; /** * struct dev_links_info - Device data related to device links. * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @defer_sync: Hook to global list of devices that have deferred sync_state. * @status: Driver status information. */ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head defer_sync; enum dl_dev_state status; }; /** * struct dev_msi_info - Device data related to MSI * @domain: The MSI interrupt domain associated to the device * @data: Pointer to MSI device data */ struct dev_msi_info { #ifdef CONFIG_GENERIC_MSI_IRQ struct irq_domain *domain; struct msi_device_data *data; #endif }; /** * enum device_physical_location_panel - Describes which panel surface of the * system's housing the device connection point resides on. * @DEVICE_PANEL_TOP: Device connection point is on the top panel. * @DEVICE_PANEL_BOTTOM: Device connection point is on the bottom panel. * @DEVICE_PANEL_LEFT: Device connection point is on the left panel. * @DEVICE_PANEL_RIGHT: Device connection point is on the right panel. * @DEVICE_PANEL_FRONT: Device connection point is on the front panel. * @DEVICE_PANEL_BACK: Device connection point is on the back panel. * @DEVICE_PANEL_UNKNOWN: The panel with device connection point is unknown. */ enum device_physical_location_panel { DEVICE_PANEL_TOP, DEVICE_PANEL_BOTTOM, DEVICE_PANEL_LEFT, DEVICE_PANEL_RIGHT, DEVICE_PANEL_FRONT, DEVICE_PANEL_BACK, DEVICE_PANEL_UNKNOWN, }; /** * enum device_physical_location_vertical_position - Describes vertical * position of the device connection point on the panel surface. * @DEVICE_VERT_POS_UPPER: Device connection point is at upper part of panel. * @DEVICE_VERT_POS_CENTER: Device connection point is at center part of panel. * @DEVICE_VERT_POS_LOWER: Device connection point is at lower part of panel. */ enum device_physical_location_vertical_position { DEVICE_VERT_POS_UPPER, DEVICE_VERT_POS_CENTER, DEVICE_VERT_POS_LOWER, }; /** * enum device_physical_location_horizontal_position - Describes horizontal * position of the device connection point on the panel surface. * @DEVICE_HORI_POS_LEFT: Device connection point is at left part of panel. * @DEVICE_HORI_POS_CENTER: Device connection point is at center part of panel. * @DEVICE_HORI_POS_RIGHT: Device connection point is at right part of panel. */ enum device_physical_location_horizontal_position { DEVICE_HORI_POS_LEFT, DEVICE_HORI_POS_CENTER, DEVICE_HORI_POS_RIGHT, }; /** * struct device_physical_location - Device data related to physical location * of the device connection point. * @panel: Panel surface of the system's housing that the device connection * point resides on. * @vertical_position: Vertical position of the device connection point within * the panel. * @horizontal_position: Horizontal position of the device connection point * within the panel. * @dock: Set if the device connection point resides in a docking station or * port replicator. * @lid: Set if this device connection point resides on the lid of laptop * system. */ struct device_physical_location { enum device_physical_location_panel panel; enum device_physical_location_vertical_position vertical_position; enum device_physical_location_horizontal_position horizontal_position; bool dock; bool lid; }; /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. * In most cases, a parent device is some sort of bus or host * controller. If parent is NULL, the device, is a top-level device, * which is not usually what you want. * @p: Holds the private data of the driver core portions of the device. * See the comment of the struct device_private for detail. * @kobj: A top-level, abstract class from which other classes are derived. * @init_name: Initial name of the device. * @type: The type of device. * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. * Example: For devices on custom boards, as typical of embedded * and SOC based hardware, Linux often uses platform_data to point * to board-specific structures describing devices and how they * are wired. That can include what ports are available, chip * variants, which GPIO pins act in what additional roles, and so * on. This shrinks the "Board Support Packages" (BSPs) and * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. * @links: Links to suppliers and consumers of this device. * @power: For device power management. * See Documentation/driver-api/pm/devices.rst for details. * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. * @msi: MSI related data * @numa_node: NUMA node this device is close to. * @dma_ops: DMA mapping operations for this device. * @dma_mask: Dma mask (if dma'ble device). * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all * hardware supports 64-bit addresses for consistent allocations * such descriptors. * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller * DMA limit than the device itself supports. * @dma_range_map: map for DMA memory ranges relative to that of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. * @devt: For creating the sysfs "dev". * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. * @class: The class of the device. * @groups: Optional attribute groups. * @release: Callback to free the device after all references have * gone away. This should be set by the allocator of the * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. * @iommu: Per device generic IOMMU runtime data * @physical_location: Describes physical location of the device connection * point in the system housing. * @removable: Whether the device can be removed from the system. This * should be set by the subsystem / bus driver that discovered * the device. * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. * @state_synced: The hardware state of this device has been synced to match * the software state of this device by calling the driver/bus * sync_state() callback. * @can_match: The device has matched with a driver at least once or it is in * a bus (like AMBA) which can't check for matching drivers until * other devices probe successfully. * @dma_coherent: this particular device is dma coherent, even if the * architecture supports non-coherent devices. * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the * streaming DMA operations (->map_* / ->unmap_* / ->sync_*), * and optionall (if the coherent mask is large enough) also * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. * @dma_iommu: Device is using default IOMMU implementation for DMA and * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information * that the device model core needs to model the system. Most subsystems, * however, track additional information about the devices they host. As a * result, it is rare for devices to be represented by bare device structures; * instead, that structure, like kobject structures, is usually embedded within * a higher-level representation of the device. */ struct device { struct kobject kobj; struct device *parent; struct device_private *p; const char *init_name; /* initial name of the device */ const struct device_type *type; const struct bus_type *bus; /* type of bus device is on */ struct device_driver *driver; /* which driver has allocated this device */ void *platform_data; /* Platform specific data, device core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ struct mutex mutex; /* mutex to synchronize calls to * its driver. */ struct dev_links_info links; struct dev_pm_info power; struct dev_pm_domain *pm_domain; #ifdef CONFIG_ENERGY_MODEL struct em_perf_domain *em_pd; #endif #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif struct dev_msi_info msi; #ifdef CONFIG_ARCH_HAS_DMA_OPS const struct dma_map_ops *dma_ops; #endif u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as not all hardware supports 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ const struct bus_dma_region *dma_range_map; struct device_dma_parameters *dma_parms; struct list_head dma_pools; /* dma pools (if dma'ble) */ #ifdef CONFIG_DMA_DECLARE_COHERENT struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ #endif #ifdef CONFIG_DMA_CMA struct cma *cma_area; /* contiguous memory area for dma allocations */ #endif #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; #endif #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; bool dma_uses_io_tlb; #endif /* arch specific additions */ struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ struct fwnode_handle *fwnode; /* firmware device node */ #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ #endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ spinlock_t devres_lock; struct list_head devres_head; const struct class *class; const struct attribute_group **groups; /* optional groups */ void (*release)(struct device *dev); struct iommu_group *iommu_group; struct dev_iommu *iommu; struct device_physical_location *physical_location; enum device_removable removable; bool offline_disabled:1; bool offline:1; bool of_node_reused:1; bool state_synced:1; bool can_match:1; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) bool dma_coherent:1; #endif #ifdef CONFIG_DMA_OPS_BYPASS bool dma_ops_bypass : 1; #endif #ifdef CONFIG_DMA_NEED_SYNC bool dma_skip_sync:1; #endif #ifdef CONFIG_IOMMU_DMA bool dma_iommu:1; #endif }; /** * struct device_link - Device link representation. * @supplier: The device on the supplier end of the link. * @s_node: Hook to the supplier device's list of links to consumers. * @consumer: The device on the consumer end of the link. * @c_node: Hook to the consumer device's list of links to suppliers. * @link_dev: device used to expose link details in sysfs * @status: The state of the link (with respect to the presence of drivers). * @flags: Link flags. * @rpm_active: Whether or not the consumer device is runtime-PM-active. * @kref: Count repeated addition of the same link. * @rm_work: Work structure used for removing the link. * @supplier_preactivated: Supplier has been made active before consumer probe. */ struct device_link { struct device *supplier; struct list_head s_node; struct device *consumer; struct list_head c_node; struct device link_dev; enum device_link_state status; u32 flags; refcount_t rpm_active; struct kref kref; struct work_struct rm_work; bool supplier_preactivated; /* Owned by consumer probe. */ }; #define kobj_to_dev(__kobj) container_of_const(__kobj, struct device, kobj) /** * device_iommu_mapped - Returns true when the device DMA is translated * by an IOMMU * @dev: Device to perform the check on */ static inline bool device_iommu_mapped(struct device *dev) { return (dev->iommu_group != NULL); } /* Get the wakeup routines, which depend on struct device */ #include <linux/pm_wakeup.h> /** * dev_name - Return a device's name. * @dev: Device with name to get. * Return: The kobject name of the device, or its initial name if unavailable. */ static inline const char *dev_name(const struct device *dev) { /* Use the init name until the kobject becomes available */ if (dev->init_name) return dev->init_name; return kobject_name(&dev->kobj); } /** * dev_bus_name - Return a device's bus/class name, if at all possible * @dev: struct device to get the bus/class name of * * Will return the name of the bus/class the device is attached to. If it is * not attached to a bus/class, an empty string will be returned. */ static inline const char *dev_bus_name(const struct device *dev) { return dev->bus ? dev->bus->name : (dev->class ? dev->class->name : ""); } __printf(2, 3) int dev_set_name(struct device *dev, const char *name, ...); #ifdef CONFIG_NUMA static inline int dev_to_node(struct device *dev) { return dev->numa_node; } static inline void set_dev_node(struct device *dev, int node) { dev->numa_node = node; } #else static inline int dev_to_node(struct device *dev) { return NUMA_NO_NODE; } static inline void set_dev_node(struct device *dev, int node) { } #endif static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { #ifdef CONFIG_GENERIC_MSI_IRQ return dev->msi.domain; #else return NULL; #endif } static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { #ifdef CONFIG_GENERIC_MSI_IRQ dev->msi.domain = d; #endif } static inline void *dev_get_drvdata(const struct device *dev) { return dev->driver_data; } static inline void dev_set_drvdata(struct device *dev, void *data) { dev->driver_data = data; } static inline struct pm_subsys_data *dev_to_psd(struct device *dev) { return dev ? dev->power.subsys_data : NULL; } static inline unsigned int dev_get_uevent_suppress(const struct device *dev) { return dev->kobj.uevent_suppress; } static inline void dev_set_uevent_suppress(struct device *dev, int val) { dev->kobj.uevent_suppress = val; } static inline int device_is_registered(struct device *dev) { return dev->kobj.state_in_sysfs; } static inline void device_enable_async_suspend(struct device *dev) { if (!dev->power.is_prepared) dev->power.async_suspend = true; } static inline void device_disable_async_suspend(struct device *dev) { if (!dev->power.is_prepared) dev->power.async_suspend = false; } static inline bool device_async_suspend_enabled(struct device *dev) { return !!dev->power.async_suspend; } static inline bool device_pm_not_required(struct device *dev) { return dev->power.no_pm; } static inline void device_set_pm_not_required(struct device *dev) { dev->power.no_pm = true; } static inline void dev_pm_syscore_device(struct device *dev, bool val) { #ifdef CONFIG_PM_SLEEP dev->power.syscore = val; #endif } static inline void dev_pm_set_driver_flags(struct device *dev, u32 flags) { dev->power.driver_flags = flags; } static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags) { return !!(dev->power.driver_flags & flags); } static inline bool dev_pm_smart_suspend(struct device *dev) { #ifdef CONFIG_PM_SLEEP return dev->power.smart_suspend; #else return false; #endif } static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); } static inline int device_lock_interruptible(struct device *dev) { return mutex_lock_interruptible(&dev->mutex); } static inline int device_trylock(struct device *dev) { return mutex_trylock(&dev->mutex); } static inline void device_unlock(struct device *dev) { mutex_unlock(&dev->mutex); } DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) static inline void device_lock_assert(struct device *dev) { lockdep_assert_held(&dev->mutex); } static inline bool dev_has_sync_state(struct device *dev) { if (!dev) return false; if (dev->driver && dev->driver->sync_state) return true; if (dev->bus && dev->bus->sync_state) return true; return false; } static inline void dev_set_removable(struct device *dev, enum device_removable removable) { dev->removable = removable; } static inline bool dev_is_removable(struct device *dev) { return dev->removable == DEVICE_REMOVABLE; } static inline bool dev_removable_is_valid(struct device *dev) { return dev->removable != DEVICE_REMOVABLE_NOT_SUPPORTED; } /* * High level routines for use by the bus drivers */ int __must_check device_register(struct device *dev); void device_unregister(struct device *dev); void device_initialize(struct device *dev); int __must_check device_add(struct device *dev); void device_del(struct device *dev); DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) int device_for_each_child(struct device *parent, void *data, device_iter_t fn); int device_for_each_child_reverse(struct device *parent, void *data, device_iter_t fn); int device_for_each_child_reverse_from(struct device *parent, struct device *from, void *data, device_iter_t fn); struct device *device_find_child(struct device *parent, const void *data, device_match_t match); /** * device_find_child_by_name - device iterator for locating a child device. * @parent: parent struct device * @name: name of the child device * * This is similar to the device_find_child() function above, but it * returns a reference to a device that has the name @name. * * NOTE: you will need to drop the reference with put_device() after use. */ static inline struct device *device_find_child_by_name(struct device *parent, const char *name) { return device_find_child(parent, name, device_match_name); } /** * device_find_any_child - device iterator for locating a child device, if any. * @parent: parent struct device * * This is similar to the device_find_child() function above, but it * returns a reference to a child device, if any. * * NOTE: you will need to drop the reference with put_device() after use. */ static inline struct device *device_find_any_child(struct device *parent) { return device_find_child(parent, NULL, device_match_any); } int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); static inline bool device_supports_offline(struct device *dev) { return dev->bus && dev->bus->offline && dev->bus->online; } #define __device_lock_set_class(dev, name, key) \ do { \ struct device *__d2 __maybe_unused = dev; \ lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \ } while (0) /** * device_lock_set_class - Specify a temporary lock class while a device * is attached to a driver * @dev: device to modify * @key: lock class key data * * This must be called with the device_lock() already held, for example * from driver ->probe(). Take care to only override the default * lockdep_no_validate class. */ #ifdef CONFIG_LOCKDEP #define device_lock_set_class(dev, key) \ do { \ struct device *__d = dev; \ dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex, \ &__lockdep_no_validate__), \ "overriding existing custom lock class\n"); \ __device_lock_set_class(__d, #key, key); \ } while (0) #else #define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key) #endif /** * device_lock_reset_class - Return a device to the default lockdep novalidate state * @dev: device to modify * * This must be called with the device_lock() already held, for example * from driver ->remove(). */ #define device_lock_reset_class(dev) \ do { \ struct device *__d __maybe_unused = dev; \ lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex", \ _THIS_IP_); \ } while (0) void lock_device_hotplug(void); void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); int device_offline(struct device *dev); int device_online(struct device *dev); void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void device_set_node(struct device *dev, struct fwnode_handle *fwnode); int device_add_of_node(struct device *dev, struct device_node *of_node); void device_remove_of_node(struct device *dev); void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); static inline struct device_node *dev_of_node(struct device *dev) { if (!IS_ENABLED(CONFIG_OF) || !dev) return NULL; return dev->of_node; } static inline int dev_num_vf(struct device *dev) { if (dev->bus && dev->bus->num_vf) return dev->bus->num_vf(dev); return 0; } /* * Root device objects for grouping under /sys/devices */ struct device *__root_device_register(const char *name, struct module *owner); /* This is a macro to avoid include problems with THIS_MODULE */ #define root_device_register(name) \ __root_device_register(name, THIS_MODULE) void root_device_unregister(struct device *root); static inline void *dev_get_platdata(const struct device *dev) { return dev->platform_data; } /* * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ int __must_check device_driver_attach(const struct device_driver *drv, struct device *dev); int __must_check device_bind_driver(struct device *dev); void device_release_driver(struct device *dev); int __must_check device_attach(struct device *dev); int __must_check driver_attach(const struct device_driver *drv); void device_initial_probe(struct device *dev); int __must_check device_reprobe(struct device *dev); bool device_is_bound(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ __printf(5, 6) struct device * device_create(const struct class *cls, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...); __printf(6, 7) struct device * device_create_with_groups(const struct class *cls, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); void device_destroy(const struct class *cls, dev_t devt); int __must_check device_add_groups(struct device *dev, const struct attribute_group **groups); void device_remove_groups(struct device *dev, const struct attribute_group **groups); static inline int __must_check device_add_group(struct device *dev, const struct attribute_group *grp) { const struct attribute_group *groups[] = { grp, NULL }; return device_add_groups(dev, groups); } static inline void device_remove_group(struct device *dev, const struct attribute_group *grp) { const struct attribute_group *groups[] = { grp, NULL }; device_remove_groups(dev, groups); } int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); /* * get_device - atomically increment the reference count for the device. * */ struct device *get_device(struct device *dev); void put_device(struct device *dev); DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T)) bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS int devtmpfs_mount(void); #else static inline int devtmpfs_mount(void) { return 0; } #endif /* drivers/base/power/shutdown.c */ void device_shutdown(void); /* debugging and troubleshooting/diagnostic helpers. */ const char *dev_driver_string(const struct device *dev); /* Device links interface. */ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags); void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_CHARDEV_MAJOR(major) \ MODULE_ALIAS("char-major-" __stringify(major) "-*") #endif /* _DEVICE_H_ */
7 7 7 7 7 2 4 1 1 8 8 6 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_cgroup.c Control Group Classifier * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> #include <net/tc_wrapper.h> struct cls_cgroup_head { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct rcu_work rwork; }; TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); if (unlikely(!head)) return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) return -1; res->classid = classid; res->class = 0; return tcf_exts_exec(skb, &head->exts, res); } static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) { return 0; } static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; static void __cls_cgroup_destroy(struct cls_cgroup_head *head) { tcf_exts_destroy(&head->exts); tcf_em_tree_destroy(&head->ematches); tcf_exts_put_net(&head->exts); kfree(head); } static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(to_rcu_work(work), struct cls_cgroup_head, rwork); rtnl_lock(); __cls_cgroup_destroy(head); rtnl_unlock(); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; if (!head && !handle) return -EINVAL; if (head && handle != head->handle) return -ENOENT; new = kzalloc(sizeof(*head), GFP_KERNEL); if (!new) return -ENOBUFS; err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], cgroup_policy, NULL); if (err < 0) goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, extack); if (err < 0) goto errout; err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); if (err < 0) goto errout; rcu_assign_pointer(tp->root, new); if (head) { tcf_exts_get_net(&head->exts); tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); } return 0; errout: tcf_exts_destroy(&new->exts); kfree(new); return err; } static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ if (head) { if (tcf_exts_get_net(&head->exts)) tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); else __cls_cgroup_destroy(head); } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; if (!head) return; if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } skip: arg->count++; } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; t->tcm_handle = head->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (tcf_exts_dump(skb, &head->exts) < 0 || tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { .kind = "cgroup", .init = cls_cgroup_init, .change = cls_cgroup_change, .classify = cls_cgroup_classify, .destroy = cls_cgroup_destroy, .get = cls_cgroup_get, .delete = cls_cgroup_delete, .walk = cls_cgroup_walk, .dump = cls_cgroup_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("cgroup"); static int __init init_cgroup_cls(void) { return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); } module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL");
182 87 15 194 478 4 4 84 196 471 287 37 5 176 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_MQ_H #define BLK_MQ_H #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/lockdep.h> #include <linux/scatterlist.h> #include <linux/prefetch.h> #include <linux/srcu.h> #include <linux/rw_hint.h> struct blk_mq_tags; struct blk_flush_queue; #define BLKDEV_MIN_RQ 4 #define BLKDEV_DEFAULT_RQ 128 enum rq_end_io_ret { RQ_END_IO_NONE, RQ_END_IO_FREE, }; typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); /* * request flags */ typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ __RQF_FLUSH_SEQ, /* merge of different types, fail separately */ __RQF_MIXED_MERGE, /* don't call prep for this one */ __RQF_DONTPREP, /* use hctx->sched_tags */ __RQF_SCHED_TAGS, /* use an I/O scheduler for this request */ __RQF_USE_SCHED, /* vaguely specified driver internal error. Ignored by block layer */ __RQF_FAILED, /* don't warn about errors */ __RQF_QUIET, /* account into disk and partition IO statistics */ __RQF_IO_STAT, /* runtime pm request */ __RQF_PM, /* on IO scheduler merge hash */ __RQF_HASHED, /* track IO completion time */ __RQF_STATS, /* Look at ->special_vec for the actual data payload instead of the bio chain. */ __RQF_SPECIAL_PAYLOAD, /* request completion needs to be signaled to zone write plugging. */ __RQF_ZONE_WRITE_PLUGGING, /* ->timeout has been called, don't expire again */ __RQF_TIMED_OUT, __RQF_RESV, __RQF_BITS }; #define RQF_STARTED ((__force req_flags_t)(1 << __RQF_STARTED)) #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << __RQF_FLUSH_SEQ)) #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << __RQF_MIXED_MERGE)) #define RQF_DONTPREP ((__force req_flags_t)(1 << __RQF_DONTPREP)) #define RQF_SCHED_TAGS ((__force req_flags_t)(1 << __RQF_SCHED_TAGS)) #define RQF_USE_SCHED ((__force req_flags_t)(1 << __RQF_USE_SCHED)) #define RQF_FAILED ((__force req_flags_t)(1 << __RQF_FAILED)) #define RQF_QUIET ((__force req_flags_t)(1 << __RQF_QUIET)) #define RQF_IO_STAT ((__force req_flags_t)(1 << __RQF_IO_STAT)) #define RQF_PM ((__force req_flags_t)(1 << __RQF_PM)) #define RQF_HASHED ((__force req_flags_t)(1 << __RQF_HASHED)) #define RQF_STATS ((__force req_flags_t)(1 << __RQF_STATS)) #define RQF_SPECIAL_PAYLOAD \ ((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD)) #define RQF_ZONE_WRITE_PLUGGING \ ((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING)) #define RQF_TIMED_OUT ((__force req_flags_t)(1 << __RQF_TIMED_OUT)) #define RQF_RESV ((__force req_flags_t)(1 << __RQF_RESV)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) enum mq_rq_state { MQ_RQ_IDLE = 0, MQ_RQ_IN_FLIGHT = 1, MQ_RQ_COMPLETE = 2, }; /* * Try to put the fields that are referenced together in the same cacheline. * * If you modify this structure, make sure to update blk_rq_init() and * especially blk_mq_rq_ctx_init() to take care of the added fields. */ struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; blk_opf_t cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; int internal_tag; unsigned int timeout; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ sector_t __sector; /* sector cursor */ struct bio *bio; struct bio *biotail; union { struct list_head queuelist; struct request *rq_next; }; struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; #endif /* Time that this request was allocated for this IO. */ u64 start_time_ns; /* Time that I/O was submitted to the device. */ u64 io_start_time_ns; #ifdef CONFIG_BLK_WBT unsigned short wbt_flags; #endif /* * rq sectors used for blk stats. It has the same value * with blk_rq_sectors(rq), except that it never be zeroed * by completion. */ unsigned short stats_sectors; /* * Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments; unsigned short nr_integrity_segments; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *crypt_ctx; struct blk_crypto_keyslot *crypt_keyslot; #endif enum mq_rq_state state; atomic_t ref; unsigned long deadline; /* * The hash is used inside the scheduler, and killed once the * request reaches the dispatch list. The ipi_list is only used * to queue the request for softirq completion, which is long * after the request has been unhashed (and even removed from * the dispatch list). */ union { struct hlist_node hash; /* merge hash */ struct llist_node ipi_list; }; /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. special_vec must * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be * insert into an IO scheduler. */ union { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; }; /* * Three pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. */ struct { struct io_cq *icq; void *priv[2]; } elv; struct { unsigned int seq; rq_end_io_fn *saved_end_io; } flush; u64 fifo_time; /* * completion callback. */ rq_end_io_fn *end_io; void *end_io_data; }; static inline enum req_op req_op(const struct request *req) { return req->cmd_flags & REQ_OP_MASK; } static inline bool blk_rq_is_passthrough(struct request *rq) { return blk_op_is_passthrough(rq->cmd_flags); } static inline unsigned short req_get_ioprio(struct request *req) { if (req->bio) return req->bio->bi_ioprio; return 0; } #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) static inline int rq_list_empty(const struct rq_list *rl) { return rl->head == NULL; } static inline void rq_list_init(struct rq_list *rl) { rl->head = NULL; rl->tail = NULL; } static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq) { rq->rq_next = NULL; if (rl->tail) rl->tail->rq_next = rq; else rl->head = rq; rl->tail = rq; } static inline void rq_list_add_head(struct rq_list *rl, struct request *rq) { rq->rq_next = rl->head; rl->head = rq; if (!rl->tail) rl->tail = rq; } static inline struct request *rq_list_pop(struct rq_list *rl) { struct request *rq = rl->head; if (rq) { rl->head = rl->head->rq_next; if (!rl->head) rl->tail = NULL; rq->rq_next = NULL; } return rq; } static inline struct request *rq_list_peek(struct rq_list *rl) { return rl->head; } #define rq_list_for_each(rl, pos) \ for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next) #define rq_list_for_each_safe(rl, pos, nxt) \ for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \ pos; pos = nxt, nxt = pos ? pos->rq_next : NULL) /** * enum blk_eh_timer_return - How the timeout handler should proceed * @BLK_EH_DONE: The block driver completed the command or will complete it at * a later time. * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the * request to complete. */ enum blk_eh_timer_return { BLK_EH_DONE, BLK_EH_RESET_TIMER, }; /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device */ struct blk_mq_hw_ctx { struct { /** @lock: Protects the dispatch list. */ spinlock_t lock; /** * @dispatch: Used for requests that are ready to be * dispatched to the hardware but for some reason (e.g. lack of * resources) could not be sent to the hardware. As soon as the * driver can send new requests, requests at this list will * be sent first for a fairer dispatch. */ struct list_head dispatch; /** * @state: BLK_MQ_S_* flags. Defines the state of the hw * queue (active, scheduled to restart, stopped). */ unsigned long state; } ____cacheline_aligned_in_smp; /** * @run_work: Used for scheduling a hardware queue run at a later time. */ struct delayed_work run_work; /** @cpumask: Map of available CPUs where this hctx can run. */ cpumask_var_t cpumask; /** * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU * selection from @cpumask. */ int next_cpu; /** * @next_cpu_batch: Counter of how many works left in the batch before * changing to the next CPU. */ int next_cpu_batch; /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ unsigned long flags; /** * @sched_data: Pointer owned by the IO scheduler attached to a request * queue. It's up to the IO scheduler how to use this pointer. */ void *sched_data; /** * @queue: Pointer to the request queue that owns this hardware context. */ struct request_queue *queue; /** @fq: Queue of requests that need to perform a flush operation. */ struct blk_flush_queue *fq; /** * @driver_data: Pointer to data owned by the block driver that created * this hctx */ void *driver_data; /** * @ctx_map: Bitmap for each software queue. If bit is on, there is a * pending request in that software queue. */ struct sbitmap ctx_map; /** * @dispatch_from: Software queue to be used when no scheduler was * selected. */ struct blk_mq_ctx *dispatch_from; /** * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to * decide if the hw_queue is busy using Exponential Weighted Moving * Average algorithm. */ unsigned int dispatch_busy; /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ unsigned short type; /** @nr_ctx: Number of software queues. */ unsigned short nr_ctx; /** @ctxs: Array of software queues. */ struct blk_mq_ctx **ctxs; /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ spinlock_t dispatch_wait_lock; /** * @dispatch_wait: Waitqueue to put requests when there is no tag * available at the moment, to wait for another try in the future. */ wait_queue_entry_t dispatch_wait; /** * @wait_index: Index of next available dispatch_wait queue to insert * requests. */ atomic_t wait_index; /** * @tags: Tags owned by the block driver. A tag at this set is only * assigned when a request is dispatched from a hardware queue. */ struct blk_mq_tags *tags; /** * @sched_tags: Tags owned by I/O scheduler. If there is an I/O * scheduler associated with a request queue, a tag is assigned when * that request is allocated. Else, this member is not used. */ struct blk_mq_tags *sched_tags; /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; /** * @nr_active: Number of active requests. Only used when a tag set is * shared across request queues. */ atomic_t nr_active; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; /** @kobj: Kernel object for sysfs. */ struct kobject kobj; #ifdef CONFIG_BLK_DEBUG_FS /** * @debugfs_dir: debugfs directory for this hardware queue. Named * as cpu<cpu_number>. */ struct dentry *debugfs_dir; /** @sched_debugfs_dir: debugfs directory for the scheduler. */ struct dentry *sched_debugfs_dir; #endif /** * @hctx_list: if this hctx is not in use, this is an entry in * q->unused_hctx_list. */ struct list_head hctx_list; }; /** * struct blk_mq_queue_map - Map software queues to hardware queues * @mq_map: CPU ID to hardware queue index map. This is an array * with nr_cpu_ids elements. Each element has a value in the range * [@queue_offset, @queue_offset + @nr_queues). * @nr_queues: Number of hardware queues to map CPU IDs onto. * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe * driver to map each hardware queue type (enum hctx_type) onto a distinct * set of hardware queues. */ struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; unsigned int queue_offset; }; /** * enum hctx_type - Type of hardware queue * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. * @HCTX_TYPE_READ: Just for READ I/O. * @HCTX_TYPE_POLL: Polled I/O of any kind. * @HCTX_MAX_TYPES: Number of types of hctx. */ enum hctx_type { HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL, HCTX_MAX_TYPES, }; /** * struct blk_mq_tag_set - tag set that can be shared between request queues * @ops: Pointers to functions that implement block driver behavior. * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the * same size, and it's perfectly legal to share maps between * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag * allocations. * @cmd_size: Number of additional bytes to allocate per request. The block * driver owns these additional bytes. * @numa_node: NUMA node the storage adapter has been connected to. * @timeout: Request processing timeout in jiffies. * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @shared_tags: * Shared set of tags. Has @nr_hw_queues elements. If set, * shared by all @tags. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { const struct blk_mq_ops *ops; struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; unsigned int cmd_size; int numa_node; unsigned int timeout; unsigned int flags; void *driver_data; struct blk_mq_tags **tags; struct blk_mq_tags *shared_tags; struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; }; /** * struct blk_mq_queue_data - Data about a request inserted in a queue * * @rq: Request pointer. * @last: If it is the last request in the queue. */ struct blk_mq_queue_data { struct request *rq; bool last; }; typedef bool (busy_tag_iter_fn)(struct request *, void *); /** * struct blk_mq_ops - Callback functions that implements block driver * behaviour. */ struct blk_mq_ops { /** * @queue_rq: Queue a new request from block IO. */ blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); /** * @commit_rqs: If a driver uses bd->last to judge when to submit * requests to hardware, it must define this function. In case of errors * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ void (*commit_rqs)(struct blk_mq_hw_ctx *); /** * @queue_rqs: Queue a list of new requests. Driver is guaranteed * that each request belongs to the same queue. If the driver doesn't * empty the @rqlist completely, then the rest will be queued * individually by the block layer upon return. */ void (*queue_rqs)(struct rq_list *rqlist); /** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ int (*get_budget)(struct request_queue *); /** * @put_budget: Release the reserved budget. */ void (*put_budget)(struct request_queue *, int); /** * @set_rq_budget_token: store rq's budget token */ void (*set_rq_budget_token)(struct request *, int); /** * @get_rq_budget_token: retrieve rq's budget token */ int (*get_rq_budget_token)(struct request *); /** * @timeout: Called on request timeout. */ enum blk_eh_timer_return (*timeout)(struct request *); /** * @poll: Called to poll for completion of a specific tag. */ int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); /** * @complete: Mark the request as complete. */ void (*complete)(struct request *); /** * @init_hctx: Called when the block layer side of a hardware queue has * been set up, allowing the driver to allocate/init matching * structures. */ int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); /** * @exit_hctx: Ditto for exit/teardown. */ void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); /** * @init_request: Called for every command allocated by the block layer * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. */ int (*init_request)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); /** * @exit_request: Ditto for exit/teardown. */ void (*exit_request)(struct blk_mq_tag_set *set, struct request *, unsigned int); /** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ void (*cleanup_rq)(struct request *); /** * @busy: If set, returns whether or not this queue currently is busy. */ bool (*busy)(struct request_queue *); /** * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ void (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); #endif }; /* Keep hctx_flag_name[] in sync with the definitions below */ enum { BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 4, /* * Alloc tags on a round-robin base instead of the first available one. */ BLK_MQ_F_TAG_RR = 1 << 5, /* * Select 'none' during queue registration in case of a single hwq * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, BLK_MQ_F_MAX = 1 << 7, }; #define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) enum { /* Keep hctx_state_name[] in sync with the definitions below */ BLK_MQ_S_STOPPED, BLK_MQ_S_TAG_ACTIVE, BLK_MQ_S_SCHED_RESTART, /* hw queue is inactive after all its CPUs become offline */ BLK_MQ_S_INACTIVE, BLK_MQ_S_MAX }; struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); #define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int queue_depth, unsigned int set_flags); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_request(struct request *rq); int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, unsigned int poll_flags); bool blk_mq_queue_inflight(struct request_queue *q); enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx); /* * Tag address space map. */ struct blk_mq_tags { unsigned int nr_tags; unsigned int nr_reserved_tags; unsigned int active_queues; struct sbitmap_queue bitmap_tags; struct sbitmap_queue breserved_tags; struct request **rqs; struct request **static_rqs; struct list_head page_list; /* * used to clear request reference in rqs[] before freeing one * request pool */ spinlock_t lock; }; static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { if (tag < tags->nr_tags) { prefetch(tags->rqs[tag]); return tags->rqs[tag]; } return NULL; } enum { BLK_MQ_UNIQUE_TAG_BITS = 16, BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, }; u32 blk_mq_unique_tag(struct request *rq); static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) { return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; } static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) { return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } /** * blk_mq_rq_state() - read the current MQ_RQ_* state of a request * @rq: target request. */ static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) { return READ_ONCE(rq->state); } static inline int blk_mq_request_started(struct request *rq) { return blk_mq_rq_state(rq) != MQ_RQ_IDLE; } static inline int blk_mq_request_completed(struct request *rq) { return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; } /* * * Set the state to complete when completing a request from inside ->queue_rq. * This is used by drivers that want to ensure special complete actions that * need access to the request are called on failure, e.g. by nvme for * multipathing. */ static inline void blk_mq_set_request_complete(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); } /* * Complete the request directly instead of deferring it to softirq or * completing it another CPU. Useful in preemptible instead of an interrupt. */ static inline void blk_mq_complete_request_direct(struct request *rq, void (*complete)(struct request *rq)) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); complete(rq); } void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_end_request_batch(struct io_comp_batch *ib); /* * Only need start/end time stamping if we have iostat or * blk stats enabled, or using an IO scheduler. */ static inline bool blk_mq_need_time_stamp(struct request *rq) { return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } static inline bool blk_mq_is_reserved_rq(struct request *rq) { return rq->rq_flags & RQF_RESV; } /** * blk_mq_add_to_batch() - add a request to the completion batch * @req: The request to add to batch * @iob: The batch to add the request * @is_error: Specify true if the request failed with an error * @complete: The completaion handler for the request * * Batched completions only work when there is no I/O error and no special * ->end_io handler. * * Return: true when the request was added to the batch, otherwise false */ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, bool is_error, void (*complete)(struct io_comp_batch *)) { /* * Check various conditions that exclude batch processing: * 1) No batch container * 2) Has scheduler data attached * 3) Not a passthrough request and end_io set * 4) Not a passthrough request and failed with an error */ if (!iob) return false; if (req->rq_flags & RQF_SCHED_TAGS) return false; if (!blk_rq_is_passthrough(req)) { if (req->end_io) return false; if (is_error) return false; } if (!iob->complete) iob->complete = complete; else if (iob->complete != complete) return false; iob->need_ts |= blk_mq_need_time_stamp(req); rq_list_add_tail(&iob->req_list, req); return true; } void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set); void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set); void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); void blk_mq_freeze_queue_nomemsave(struct request_queue *q); void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); static inline unsigned int __must_check blk_mq_freeze_queue(struct request_queue *q) { unsigned int memflags = memalloc_noio_save(); blk_mq_freeze_queue_nomemsave(q); return memflags; } static inline void blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) { blk_mq_unfreeze_queue_nomemrestore(q); memalloc_noio_restore(memflags); } void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, struct device *dev, unsigned int offset); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); bool __blk_should_fake_timeout(struct request_queue *q); static inline bool blk_should_fake_timeout(struct request_queue *q) { if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) return __blk_should_fake_timeout(q); return false; } /** * blk_mq_rq_from_pdu - cast a PDU to a request * @pdu: the PDU (Protocol Data Unit) to be casted * * Return: request * * Driver command data is immediately after the request. So subtract request * size to get back to the original request. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); } /** * blk_mq_rq_to_pdu - cast a request to a PDU * @rq: the request to be casted * * Return: pointer to the PDU * * Driver command data is immediately after the request. So add request to get * the PDU. */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; } #define queue_for_each_hw_ctx(q, hctx, i) \ xa_for_each(&(q)->hctx_table, (i), (hctx)) #define hctx_for_each_ctx(hctx, ctx, i) \ for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) rq->q->mq_ops->cleanup_rq(rq); } void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); static inline bool rq_is_sync(struct request *rq) { return op_is_sync(rq->cmd_flags); } void blk_rq_init(struct request_queue *q, struct request *rq); int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); void blk_rq_unprep_clone(struct request *rq); blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; unsigned long offset; unsigned short page_order; unsigned short nr_entries; bool null_mapped; bool from_user; }; int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); int blk_rq_map_user_io(struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t, bool, int, bool, int); int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); bool blk_rq_is_poll(struct request *rq); struct req_iterator { struct bvec_iter iter; struct bio *bio; }; #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) #define rq_for_each_segment(bvl, _rq, _iter) \ __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_segment(bvl, _iter.bio, _iter.iter) #define rq_for_each_bvec(bvl, _rq, _iter) \ __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_bvec(bvl, _iter.bio, _iter.iter) #define rq_iter_last(bvec, _iter) \ (_iter.bio->bi_next == NULL && \ bio_iter_last(bvec, _iter.iter)) /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request * blk_rq_cur_bytes() : bytes left in the current segment * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment * blk_rq_stats_sectors() : sectors of the entire request used for stats */ static inline sector_t blk_rq_pos(const struct request *rq) { return rq->__sector; } static inline unsigned int blk_rq_bytes(const struct request *rq) { return rq->__data_len; } static inline int blk_rq_cur_bytes(const struct request *rq) { if (!rq->bio) return 0; if (!bio_has_data(rq->bio)) /* dataless requests such as discard */ return rq->bio->bi_iter.bi_size; return bio_iovec(rq->bio).bv_len; } static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_stats_sectors(const struct request *rq) { return rq->stats_sectors; } /* * Some commands like WRITE SAME have a payload or data transfer size which * is different from the size of the request. Any driver that supports such * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to * calculate the data transfer size. */ static inline unsigned int blk_rq_payload_bytes(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return rq->special_vec.bv_len; return blk_rq_bytes(rq); } /* * Return the first full biovec in the request. The caller needs to check that * there are any bvecs before calling this helper. */ static inline struct bio_vec req_bvec(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return rq->special_vec; return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); } static inline unsigned int blk_rq_count_bios(struct request *rq) { unsigned int nr_bios = 0; struct bio *bio; __rq_for_each_bio(bio, rq) nr_bios++; return nr_bios; } void blk_steal_bios(struct bio_list *list, struct request *rq); /* * Request completion related functions. * * blk_update_request() completes given number of bytes and updates * the request without completing it. */ bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); void blk_abort_request(struct request *); /* * Number of physical segments as sent to the device. * * Normally this is the number of discontiguous data segments sent by the * submitter. But for data-less command like discard we might have no * actual data segments submitted, but the driver might have to add it's * own special payload. In that case we still return 1 here so that this * special payload will be mapped. */ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) return 1; return rq->nr_phys_segments; } /* * Number of discard segments (or ranges) the driver needs to fill in. * Each discard bio merged into a request is counted as one segment. */ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) { return max_t(unsigned short, rq->nr_phys_segments, 1); } int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg); static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) { struct scatterlist *last_sg = NULL; return __blk_rq_map_sg(rq, sglist, &last_sg); } void blk_dump_rq_flags(struct request *, char *); #endif /* BLK_MQ_H */
53 6 46 41 42 28 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Single-block cipher operations. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <crypto/internal/cipher.h> #include <linux/kernel.h> #include <linux/crypto.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/string.h> #include "internal.h" static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key, unsigned int keylen) { struct cipher_alg *cia = crypto_cipher_alg(tfm); unsigned long alignmask = crypto_cipher_alignmask(tfm); int ret; u8 *buffer, *alignbuffer; unsigned long absize; absize = keylen + alignmask; buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen); kfree_sensitive(buffer); return ret; } int crypto_cipher_setkey(struct crypto_cipher *tfm, const u8 *key, unsigned int keylen) { struct cipher_alg *cia = crypto_cipher_alg(tfm); unsigned long alignmask = crypto_cipher_alignmask(tfm); if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) return -EINVAL; if ((unsigned long)key & alignmask) return setkey_unaligned(tfm, key, keylen); return cia->cia_setkey(crypto_cipher_tfm(tfm), key, keylen); } EXPORT_SYMBOL_NS_GPL(crypto_cipher_setkey, "CRYPTO_INTERNAL"); static inline void cipher_crypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src, bool enc) { unsigned long alignmask = crypto_cipher_alignmask(tfm); struct cipher_alg *cia = crypto_cipher_alg(tfm); void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = enc ? cia->cia_encrypt : cia->cia_decrypt; if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) { unsigned int bs = crypto_cipher_blocksize(tfm); u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(tmp, src, bs); fn(crypto_cipher_tfm(tfm), tmp, tmp); memcpy(dst, tmp, bs); } else { fn(crypto_cipher_tfm(tfm), dst, src); } } void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src) { cipher_crypt_one(tfm, dst, src, true); } EXPORT_SYMBOL_NS_GPL(crypto_cipher_encrypt_one, "CRYPTO_INTERNAL"); void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src) { cipher_crypt_one(tfm, dst, src, false); } EXPORT_SYMBOL_NS_GPL(crypto_cipher_decrypt_one, "CRYPTO_INTERNAL"); struct crypto_cipher *crypto_clone_cipher(struct crypto_cipher *cipher) { struct crypto_tfm *tfm = crypto_cipher_tfm(cipher); struct crypto_alg *alg = tfm->__crt_alg; struct crypto_cipher *ncipher; struct crypto_tfm *ntfm; if (alg->cra_init) return ERR_PTR(-ENOSYS); if (unlikely(!crypto_mod_get(alg))) return ERR_PTR(-ESTALE); ntfm = __crypto_alloc_tfmgfp(alg, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK, GFP_ATOMIC); if (IS_ERR(ntfm)) { crypto_mod_put(alg); return ERR_CAST(ntfm); } ntfm->crt_flags = tfm->crt_flags; ncipher = __crypto_cipher_cast(ntfm); return ncipher; } EXPORT_SYMBOL_GPL(crypto_clone_cipher);
231 230 230 15 76 258 258 257 15 258 258 46 96 32 125 26 13 5 14 165 166 27 143 23 141 103 127 140 141 32 140 13 13 63 83 32 82 410 87 87 87 159 318 110 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 // SPDX-License-Identifier: GPL-2.0 /* * Tty buffer allocation management */ #include <linux/types.h> #include <linux/errno.h> #include <linux/minmax.h> #include <linux/tty.h> #include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/ratelimit.h> #include "tty.h" #define MIN_TTYB_SIZE 256 #define TTYB_ALIGN_MASK 0xff /* * Byte threshold to limit memory consumption for flip buffers. * The actual memory limit is > 2x this amount. */ #define TTYB_DEFAULT_MEM_LIMIT (640 * 1024UL) /* * We default to dicing tty buffer allocations to this many characters * in order to avoid multiple page allocations. We know the size of * tty_buffer itself but it must also be taken into account that the * buffer is 256 byte aligned. See tty_buffer_find for the allocation * logic this must match. */ #define TTY_BUFFER_PAGE (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~TTYB_ALIGN_MASK) /** * tty_buffer_lock_exclusive - gain exclusive access to buffer * @port: tty port owning the flip buffer * * Guarantees safe use of the &tty_ldisc_ops.receive_buf() method by excluding * the buffer work and any pending flush from using the flip buffer. Data can * continue to be added concurrently to the flip buffer from the driver side. * * See also tty_buffer_unlock_exclusive(). */ void tty_buffer_lock_exclusive(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; atomic_inc(&buf->priority); mutex_lock(&buf->lock); } EXPORT_SYMBOL_GPL(tty_buffer_lock_exclusive); /** * tty_buffer_unlock_exclusive - release exclusive access * @port: tty port owning the flip buffer * * The buffer work is restarted if there is data in the flip buffer. * * See also tty_buffer_lock_exclusive(). */ void tty_buffer_unlock_exclusive(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; bool restart = buf->head->commit != buf->head->read; atomic_dec(&buf->priority); mutex_unlock(&buf->lock); if (restart) queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL_GPL(tty_buffer_unlock_exclusive); /** * tty_buffer_space_avail - return unused buffer space * @port: tty port owning the flip buffer * * Returns: the # of bytes which can be written by the driver without reaching * the buffer limit. * * Note: this does not guarantee that memory is available to write the returned * # of bytes (use tty_prepare_flip_string() to pre-allocate if memory * guarantee is required). */ unsigned int tty_buffer_space_avail(struct tty_port *port) { int space = port->buf.mem_limit - atomic_read(&port->buf.mem_used); return max(space, 0); } EXPORT_SYMBOL_GPL(tty_buffer_space_avail); static void tty_buffer_reset(struct tty_buffer *p, size_t size) { p->used = 0; p->size = size; p->next = NULL; p->commit = 0; p->lookahead = 0; p->read = 0; p->flags = true; } /** * tty_buffer_free_all - free buffers used by a tty * @port: tty port to free from * * Remove all the buffers pending on a tty whether queued with data or in the * free ring. Must be called when the tty is no longer in use. */ void tty_buffer_free_all(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; struct tty_buffer *p, *next; struct llist_node *llist; unsigned int freed = 0; int still_used; while ((p = buf->head) != NULL) { buf->head = p->next; freed += p->size; if (p->size > 0) kfree(p); } llist = llist_del_all(&buf->free); llist_for_each_entry_safe(p, next, llist, free) kfree(p); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; still_used = atomic_xchg(&buf->mem_used, 0); WARN(still_used != freed, "we still have not freed %d bytes!", still_used - freed); } /** * tty_buffer_alloc - allocate a tty buffer * @port: tty port * @size: desired size (characters) * * Allocate a new tty buffer to hold the desired number of characters. We * round our buffers off in 256 character chunks to get better allocation * behaviour. * * Returns: %NULL if out of memory or the allocation would exceed the per * device queue. */ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size) { struct llist_node *free; struct tty_buffer *p; /* Round the buffer size out */ size = __ALIGN_MASK(size, TTYB_ALIGN_MASK); if (size <= MIN_TTYB_SIZE) { free = llist_del_first(&port->buf.free); if (free) { p = llist_entry(free, struct tty_buffer, free); goto found; } } /* Should possibly check if this fails for the largest buffer we * have queued and recycle that ? */ if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit) return NULL; p = kmalloc(struct_size(p, data, 2 * size), GFP_ATOMIC | __GFP_NOWARN); if (p == NULL) return NULL; found: tty_buffer_reset(p, size); atomic_add(size, &port->buf.mem_used); return p; } /** * tty_buffer_free - free a tty buffer * @port: tty port owning the buffer * @b: the buffer to free * * Free a tty buffer, or add it to the free list according to our internal * strategy. */ static void tty_buffer_free(struct tty_port *port, struct tty_buffer *b) { struct tty_bufhead *buf = &port->buf; /* Dumb strategy for now - should keep some stats */ WARN_ON(atomic_sub_return(b->size, &buf->mem_used) < 0); if (b->size > MIN_TTYB_SIZE) kfree(b); else if (b->size > 0) llist_add(&b->free, &buf->free); } /** * tty_buffer_flush - flush full tty buffers * @tty: tty to flush * @ld: optional ldisc ptr (must be referenced) * * Flush all the buffers containing receive data. If @ld != %NULL, flush the * ldisc input buffer. * * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'. */ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld) { struct tty_port *port = tty->port; struct tty_bufhead *buf = &port->buf; struct tty_buffer *next; atomic_inc(&buf->priority); mutex_lock(&buf->lock); /* paired w/ release in __tty_buffer_request_room; ensures there are * no pending memory accesses to the freed buffer */ while ((next = smp_load_acquire(&buf->head->next)) != NULL) { tty_buffer_free(port, buf->head); buf->head = next; } buf->head->read = buf->head->commit; buf->head->lookahead = buf->head->read; if (ld && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); atomic_dec(&buf->priority); mutex_unlock(&buf->lock); } /** * __tty_buffer_request_room - grow tty buffer if needed * @port: tty port * @size: size desired * @flags: buffer has to store flags along character data * * Make at least @size bytes of linear space available for the tty buffer. * * Will change over to a new buffer if the current buffer is encoded as * %TTY_NORMAL (so has no flags buffer) and the new buffer requires a flags * buffer. * * Returns: the size we managed to find. */ static int __tty_buffer_request_room(struct tty_port *port, size_t size, bool flags) { struct tty_bufhead *buf = &port->buf; struct tty_buffer *n, *b = buf->tail; size_t left = (b->flags ? 1 : 2) * b->size - b->used; bool change = !b->flags && flags; if (!change && left >= size) return size; /* This is the slow path - looking for new buffers to use */ n = tty_buffer_alloc(port, size); if (n == NULL) return change ? 0 : left; n->flags = flags; buf->tail = n; /* * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs() * ensures they see all buffer data. */ smp_store_release(&b->commit, b->used); /* * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs() * ensures the latest commit value can be read before the head * is advanced to the next buffer. */ smp_store_release(&b->next, n); return size; } int tty_buffer_request_room(struct tty_port *port, size_t size) { return __tty_buffer_request_room(port, size, true); } EXPORT_SYMBOL_GPL(tty_buffer_request_room); size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, const u8 *flags, bool mutable_flags, size_t size) { bool need_flags = mutable_flags || flags[0] != TTY_NORMAL; size_t copied = 0; do { size_t goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE); size_t space = __tty_buffer_request_room(port, goal, need_flags); struct tty_buffer *tb = port->buf.tail; if (unlikely(space == 0)) break; memcpy(char_buf_ptr(tb, tb->used), chars, space); if (mutable_flags) { memcpy(flag_buf_ptr(tb, tb->used), flags, space); flags += space; } else if (tb->flags) { memset(flag_buf_ptr(tb, tb->used), flags[0], space); } else { /* tb->flags should be available once requested */ WARN_ON_ONCE(need_flags); } tb->used += space; copied += space; chars += space; /* There is a small chance that we need to split the data over * several buffers. If this is the case we must loop. */ } while (unlikely(size > copied)); return copied; } EXPORT_SYMBOL(__tty_insert_flip_string_flags); /** * tty_prepare_flip_string - make room for characters * @port: tty port * @chars: return pointer for character write area * @size: desired size * * Prepare a block of space in the buffer for data. * * This is used for drivers that need their own block copy routines into the * buffer. There is no guarantee the buffer is a DMA target! * * Returns: the length available and buffer pointer (@chars) to the space which * is now allocated and accounted for as ready for normal characters. */ size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size) { size_t space = __tty_buffer_request_room(port, size, false); if (likely(space)) { struct tty_buffer *tb = port->buf.tail; *chars = char_buf_ptr(tb, tb->used); if (tb->flags) memset(flag_buf_ptr(tb, tb->used), TTY_NORMAL, space); tb->used += space; } return space; } EXPORT_SYMBOL_GPL(tty_prepare_flip_string); /** * tty_ldisc_receive_buf - forward data to line discipline * @ld: line discipline to process input * @p: char buffer * @f: %TTY_NORMAL, %TTY_BREAK, etc. flags buffer * @count: number of bytes to process * * Callers other than flush_to_ldisc() need to exclude the kworker from * concurrent use of the line discipline, see paste_selection(). * * Returns: the number of bytes processed. */ size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f, size_t count) { if (ld->ops->receive_buf2) count = ld->ops->receive_buf2(ld->tty, p, f, count); else { count = min_t(size_t, count, ld->tty->receive_room); if (count && ld->ops->receive_buf) ld->ops->receive_buf(ld->tty, p, f, count); } return count; } EXPORT_SYMBOL_GPL(tty_ldisc_receive_buf); static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head) { head->lookahead = max(head->lookahead, head->read); while (head) { struct tty_buffer *next; unsigned int count; /* * Paired w/ release in __tty_buffer_request_room(); * ensures commit value read is not stale if the head * is advancing to the next buffer. */ next = smp_load_acquire(&head->next); /* * Paired w/ release in __tty_buffer_request_room() or in * tty_buffer_flush(); ensures we see the committed buffer data. */ count = smp_load_acquire(&head->commit) - head->lookahead; if (!count) { head = next; continue; } if (port->client_ops->lookahead_buf) { u8 *p, *f = NULL; p = char_buf_ptr(head, head->lookahead); if (head->flags) f = flag_buf_ptr(head, head->lookahead); port->client_ops->lookahead_buf(port, p, f, count); } head->lookahead += count; } } static size_t receive_buf(struct tty_port *port, struct tty_buffer *head, size_t count) { u8 *p = char_buf_ptr(head, head->read); const u8 *f = NULL; size_t n; if (head->flags) f = flag_buf_ptr(head, head->read); n = port->client_ops->receive_buf(port, p, f, count); if (n > 0) memset(p, 0, n); return n; } /** * flush_to_ldisc - flush data from buffer to ldisc * @work: tty structure passed from work queue. * * This routine is called out of the software interrupt to flush data from the * buffer chain to the line discipline. * * The receive_buf() method is single threaded for each tty instance. * * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'. */ static void flush_to_ldisc(struct work_struct *work) { struct tty_port *port = container_of(work, struct tty_port, buf.work); struct tty_bufhead *buf = &port->buf; mutex_lock(&buf->lock); while (1) { struct tty_buffer *head = buf->head; struct tty_buffer *next; size_t count, rcvd; /* Ldisc or user is trying to gain exclusive access */ if (atomic_read(&buf->priority)) break; /* paired w/ release in __tty_buffer_request_room(); * ensures commit value read is not stale if the head * is advancing to the next buffer */ next = smp_load_acquire(&head->next); /* paired w/ release in __tty_buffer_request_room() or in * tty_buffer_flush(); ensures we see the committed buffer data */ count = smp_load_acquire(&head->commit) - head->read; if (!count) { if (next == NULL) break; buf->head = next; tty_buffer_free(port, head); continue; } rcvd = receive_buf(port, head, count); head->read += rcvd; if (rcvd < count) lookahead_bufs(port, head); if (!rcvd) break; if (need_resched()) cond_resched(); } mutex_unlock(&buf->lock); } static inline void tty_flip_buffer_commit(struct tty_buffer *tail) { /* * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees * buffer data. */ smp_store_release(&tail->commit, tail->used); } /** * tty_flip_buffer_push - push terminal buffers * @port: tty port to push * * Queue a push of the terminal flip buffers to the line discipline. Can be * called from IRQ/atomic context. * * In the event of the queue being busy for flipping the work will be held off * and retried later. */ void tty_flip_buffer_push(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; tty_flip_buffer_commit(buf->tail); queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); /** * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and * push * @port: tty port * @chars: characters * @size: size * * The function combines tty_insert_flip_string() and tty_flip_buffer_push() * with the exception of properly holding the @port->lock. * * To be used only internally (by pty currently). * * Returns: the number added. */ int tty_insert_flip_string_and_push_buffer(struct tty_port *port, const u8 *chars, size_t size) { struct tty_bufhead *buf = &port->buf; unsigned long flags; spin_lock_irqsave(&port->lock, flags); size = tty_insert_flip_string(port, chars, size); if (size) tty_flip_buffer_commit(buf->tail); spin_unlock_irqrestore(&port->lock, flags); queue_work(system_unbound_wq, &buf->work); return size; } /** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise * * Set up the initial state of the buffer management for a tty device. Must be * called before the other tty buffer functions are used. */ void tty_buffer_init(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; mutex_init(&buf->lock); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; init_llist_head(&buf->free); atomic_set(&buf->mem_used, 0); atomic_set(&buf->priority, 0); INIT_WORK(&buf->work, flush_to_ldisc); buf->mem_limit = TTYB_DEFAULT_MEM_LIMIT; } /** * tty_buffer_set_limit - change the tty buffer memory limit * @port: tty port to change * @limit: memory limit to set * * Change the tty buffer memory limit. * * Must be called before the other tty buffer functions are used. */ int tty_buffer_set_limit(struct tty_port *port, int limit) { if (limit < MIN_TTYB_SIZE) return -EINVAL; port->buf.mem_limit = limit; return 0; } EXPORT_SYMBOL_GPL(tty_buffer_set_limit); /* slave ptys can claim nested buffer lock when handling BRK and INTR */ void tty_buffer_set_lock_subclass(struct tty_port *port) { lockdep_set_subclass(&port->buf.lock, TTY_LOCK_SLAVE); } bool tty_buffer_restart_work(struct tty_port *port) { return queue_work(system_unbound_wq, &port->buf.work); } bool tty_buffer_cancel_work(struct tty_port *port) { return cancel_work_sync(&port->buf.work); } void tty_buffer_flush_work(struct tty_port *port) { flush_work(&port->buf.work); }
2172 11 32 18 46 3 1 21 3 3 3 23 23 23 1 3 3 21 21 21 20 20 20 21 21 21 3 3 3 3 3 3 3 3 3 3 3 1 1 1 7 12 11 4 4 3 1 1 1 1 1 20 20 12 12 11 12 9 4 2 7 23 1 20 1 1 42 43 42 1 1 1 1 1 20 20 22 22 9 21 1 20 20 20 20 20 20 22 16 16 16 22 22 22 22 22 22 44 44 8 30 44 44 25 26 31 31 32 32 29 30 2 1 3 36 36 3 2 31 31 2 29 29 4 9 20 20 3 20 2 10 18 20 9 11 11 21 21 9 21 21 2 21 21 6 18 21 21 21 21 21 5 2 2 5 1 7 3 5 1 11 7 45 2 1 1 43 46 3 3 3 44 1 1 45 3 1 1 1 2 2 2 2 2 3 3 3 7 12 12 7 7 7 8 8 25 25 25 12 18 23 8 25 47 47 47 47 8 8 6 6 4 4 3 1 186 187 12 12 8 19 19 42 42 13 18 18 53 53 53 53 3 1 2 1 52 52 52 19 51 52 52 1 1 34 2 1 2 2 2 2 2 2 2 1 1 45 2 3 3 37 37 2 1 62 62 56 5 1 48 44 62 2 60 1 66 1 62 62 62 62 1 62 4 2 2 2 2 37 3 2 7 2 3 14 3 4 2 6 21 14 11 22 1 22 23 21 22 21 12 2 5 1 2 18 9 34 45 1 44 33 21 29 20 45 45 45 44 1 45 4 3 1 63 63 43 43 41 43 40 3 44 44 29 8 11 41 3 4 24 64 15 26 13 36 20 20 21 36 2173 2172 2167 63 12 25 7 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/team/team.c - Network team device driver * Copyright (c) 201